diff options
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/dlm/config.c | 7 | ||||
| -rw-r--r-- | fs/dlm/config.h | 1 | ||||
| -rw-r--r-- | fs/dlm/debug_fs.c | 103 | ||||
| -rw-r--r-- | fs/dlm/dir.c | 287 | ||||
| -rw-r--r-- | fs/dlm/dir.h | 7 | ||||
| -rw-r--r-- | fs/dlm/dlm_internal.h | 46 | ||||
| -rw-r--r-- | fs/dlm/lock.c | 1022 | ||||
| -rw-r--r-- | fs/dlm/lock.h | 5 | ||||
| -rw-r--r-- | fs/dlm/lockspace.c | 23 | ||||
| -rw-r--r-- | fs/dlm/rcom.c | 145 | ||||
| -rw-r--r-- | fs/dlm/rcom.h | 1 | ||||
| -rw-r--r-- | fs/dlm/recover.c | 140 | ||||
| -rw-r--r-- | fs/dlm/recover.h | 2 | ||||
| -rw-r--r-- | fs/dlm/recoverd.c | 14 |
14 files changed, 1215 insertions, 588 deletions
diff --git a/fs/dlm/config.c b/fs/dlm/config.c index e7e327d43fa5..9ccf7346834a 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -96,7 +96,6 @@ struct dlm_cluster { unsigned int cl_tcp_port; unsigned int cl_buffer_size; unsigned int cl_rsbtbl_size; - unsigned int cl_dirtbl_size; unsigned int cl_recover_timer; unsigned int cl_toss_secs; unsigned int cl_scan_secs; @@ -113,7 +112,6 @@ enum { CLUSTER_ATTR_TCP_PORT = 0, CLUSTER_ATTR_BUFFER_SIZE, CLUSTER_ATTR_RSBTBL_SIZE, - CLUSTER_ATTR_DIRTBL_SIZE, CLUSTER_ATTR_RECOVER_TIMER, CLUSTER_ATTR_TOSS_SECS, CLUSTER_ATTR_SCAN_SECS, @@ -189,7 +187,6 @@ __CONFIGFS_ATTR(name, 0644, name##_read, name##_write) CLUSTER_ATTR(tcp_port, 1); CLUSTER_ATTR(buffer_size, 1); CLUSTER_ATTR(rsbtbl_size, 1); -CLUSTER_ATTR(dirtbl_size, 1); CLUSTER_ATTR(recover_timer, 1); CLUSTER_ATTR(toss_secs, 1); CLUSTER_ATTR(scan_secs, 1); @@ -204,7 +201,6 @@ static struct configfs_attribute *cluster_attrs[] = { [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, [CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr, [CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr, - [CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr, [CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr, [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr, [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr, @@ -478,7 +474,6 @@ static struct config_group *make_cluster(struct config_group *g, cl->cl_tcp_port = dlm_config.ci_tcp_port; cl->cl_buffer_size = dlm_config.ci_buffer_size; cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size; - cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size; cl->cl_recover_timer = dlm_config.ci_recover_timer; cl->cl_toss_secs = dlm_config.ci_toss_secs; cl->cl_scan_secs = dlm_config.ci_scan_secs; @@ -1050,7 +1045,6 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) #define DEFAULT_TCP_PORT 21064 #define DEFAULT_BUFFER_SIZE 4096 #define DEFAULT_RSBTBL_SIZE 1024 -#define DEFAULT_DIRTBL_SIZE 1024 #define DEFAULT_RECOVER_TIMER 5 #define DEFAULT_TOSS_SECS 10 #define DEFAULT_SCAN_SECS 5 @@ -1066,7 +1060,6 @@ struct dlm_config_info dlm_config = { .ci_tcp_port = DEFAULT_TCP_PORT, .ci_buffer_size = DEFAULT_BUFFER_SIZE, .ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE, - .ci_dirtbl_size = DEFAULT_DIRTBL_SIZE, .ci_recover_timer = DEFAULT_RECOVER_TIMER, .ci_toss_secs = DEFAULT_TOSS_SECS, .ci_scan_secs = DEFAULT_SCAN_SECS, diff --git a/fs/dlm/config.h b/fs/dlm/config.h index 9f5e3663bb0c..dbd35a08f3a5 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h @@ -27,7 +27,6 @@ struct dlm_config_info { int ci_tcp_port; int ci_buffer_size; int ci_rsbtbl_size; - int ci_dirtbl_size; int ci_recover_timer; int ci_toss_secs; int ci_scan_secs; diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index 1c9b08095f98..b969deef9ebb 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c @@ -344,6 +344,45 @@ static int print_format3(struct dlm_rsb *r, struct seq_file *s) return rv; } +static int print_format4(struct dlm_rsb *r, struct seq_file *s) +{ + int our_nodeid = dlm_our_nodeid(); + int print_name = 1; + int i, rv; + + lock_rsb(r); + + rv = seq_printf(s, "rsb %p %d %d %d %d %lu %lx %d ", + r, + r->res_nodeid, + r->res_master_nodeid, + r->res_dir_nodeid, + our_nodeid, + r->res_toss_time, + r->res_flags, + r->res_length); + if (rv) + goto out; + + for (i = 0; i < r->res_length; i++) { + if (!isascii(r->res_name[i]) || !isprint(r->res_name[i])) + print_name = 0; + } + + seq_printf(s, "%s", print_name ? "str " : "hex"); + + for (i = 0; i < r->res_length; i++) { + if (print_name) + seq_printf(s, "%c", r->res_name[i]); + else + seq_printf(s, " %02x", (unsigned char)r->res_name[i]); + } + rv = seq_printf(s, "\n"); + out: + unlock_rsb(r); + return rv; +} + struct rsbtbl_iter { struct dlm_rsb *rsb; unsigned bucket; @@ -382,6 +421,13 @@ static int table_seq_show(struct seq_file *seq, void *iter_ptr) } rv = print_format3(ri->rsb, seq); break; + case 4: + if (ri->header) { + seq_printf(seq, "version 4 rsb 2\n"); + ri->header = 0; + } + rv = print_format4(ri->rsb, seq); + break; } return rv; @@ -390,15 +436,18 @@ static int table_seq_show(struct seq_file *seq, void *iter_ptr) static const struct seq_operations format1_seq_ops; static const struct seq_operations format2_seq_ops; static const struct seq_operations format3_seq_ops; +static const struct seq_operations format4_seq_ops; static void *table_seq_start(struct seq_file *seq, loff_t *pos) { + struct rb_root *tree; struct rb_node *node; struct dlm_ls *ls = seq->private; struct rsbtbl_iter *ri; struct dlm_rsb *r; loff_t n = *pos; unsigned bucket, entry; + int toss = (seq->op == &format4_seq_ops); bucket = n >> 32; entry = n & ((1LL << 32) - 1); @@ -417,11 +466,14 @@ static void *table_seq_start(struct seq_file *seq, loff_t *pos) ri->format = 2; if (seq->op == &format3_seq_ops) ri->format = 3; + if (seq->op == &format4_seq_ops) + ri->format = 4; + + tree = toss ? &ls->ls_rsbtbl[bucket].toss : &ls->ls_rsbtbl[bucket].keep; spin_lock(&ls->ls_rsbtbl[bucket].lock); - if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) { - for (node = rb_first(&ls->ls_rsbtbl[bucket].keep); node; - node = rb_next(node)) { + if (!RB_EMPTY_ROOT(tree)) { + for (node = rb_first(tree); node; node = rb_next(node)) { r = rb_entry(node, struct dlm_rsb, res_hashnode); if (!entry--) { dlm_hold_rsb(r); @@ -449,10 +501,11 @@ static void *table_seq_start(struct seq_file *seq, loff_t *pos) kfree(ri); return NULL; } + tree = toss ? &ls->ls_rsbtbl[bucket].toss : &ls->ls_rsbtbl[bucket].keep; spin_lock(&ls->ls_rsbtbl[bucket].lock); - if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) { - node = rb_first(&ls->ls_rsbtbl[bucket].keep); + if (!RB_EMPTY_ROOT(tree)) { + node = rb_first(tree); r = rb_entry(node, struct dlm_rsb, res_hashnode); dlm_hold_rsb(r); ri->rsb = r; @@ -469,10 +522,12 @@ static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos) { struct dlm_ls *ls = seq->private; struct rsbtbl_iter *ri = iter_ptr; + struct rb_root *tree; struct rb_node *next; struct dlm_rsb *r, *rp; loff_t n = *pos; unsigned bucket; + int toss = (seq->op == &format4_seq_ops); bucket = n >> 32; @@ -511,10 +566,11 @@ static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos) kfree(ri); return NULL; } + tree = toss ? &ls->ls_rsbtbl[bucket].toss : &ls->ls_rsbtbl[bucket].keep; spin_lock(&ls->ls_rsbtbl[bucket].lock); - if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) { - next = rb_first(&ls->ls_rsbtbl[bucket].keep); + if (!RB_EMPTY_ROOT(tree)) { + next = rb_first(tree); r = rb_entry(next, struct dlm_rsb, res_hashnode); dlm_hold_rsb(r); ri->rsb = r; @@ -558,9 +614,17 @@ static const struct seq_operations format3_seq_ops = { .show = table_seq_show, }; +static const struct seq_operations format4_seq_ops = { + .start = table_seq_start, + .next = table_seq_next, + .stop = table_seq_stop, + .show = table_seq_show, +}; + static const struct file_operations format1_fops; static const struct file_operations format2_fops; static const struct file_operations format3_fops; +static const struct file_operations format4_fops; static int table_open(struct inode *inode, struct file *file) { @@ -573,6 +637,8 @@ static int table_open(struct inode *inode, struct file *file) ret = seq_open(file, &format2_seq_ops); else if (file->f_op == &format3_fops) ret = seq_open(file, &format3_seq_ops); + else if (file->f_op == &format4_fops) + ret = seq_open(file, &format4_seq_ops); if (ret) return ret; @@ -606,6 +672,14 @@ static const struct file_operations format3_fops = { .release = seq_release }; +static const struct file_operations format4_fops = { + .owner = THIS_MODULE, + .open = table_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + /* * dump lkb's on the ls_waiters list */ @@ -652,6 +726,8 @@ void dlm_delete_debug_file(struct dlm_ls *ls) debugfs_remove(ls->ls_debug_locks_dentry); if (ls->ls_debug_all_dentry) debugfs_remove(ls->ls_debug_all_dentry); + if (ls->ls_debug_toss_dentry) + debugfs_remove(ls->ls_debug_toss_dentry); } int dlm_create_debug_file(struct dlm_ls *ls) @@ -694,6 +770,19 @@ int dlm_create_debug_file(struct dlm_ls *ls) if (!ls->ls_debug_all_dentry) goto fail; + /* format 4 */ + + memset(name, 0, sizeof(name)); + snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_toss", ls->ls_name); + + ls->ls_debug_toss_dentry = debugfs_create_file(name, + S_IFREG | S_IRUGO, + dlm_root, + ls, + &format4_fops); + if (!ls->ls_debug_toss_dentry) + goto fail; + memset(name, 0, sizeof(name)); snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_waiters", ls->ls_name); diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c index dc5eb598b81f..278a75cda446 100644 --- a/fs/dlm/dir.c +++ b/fs/dlm/dir.c @@ -23,50 +23,6 @@ #include "lock.h" #include "dir.h" - -static void put_free_de(struct dlm_ls *ls, struct dlm_direntry *de) -{ - spin_lock(&ls->ls_recover_list_lock); - list_add(&de->list, &ls->ls_recover_list); - spin_unlock(&ls->ls_recover_list_lock); -} - -static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len) -{ - int found = 0; - struct dlm_direntry *de; - - spin_lock(&ls->ls_recover_list_lock); - list_for_each_entry(de, &ls->ls_recover_list, list) { - if (de->length == len) { - list_del(&de->list); - de->master_nodeid = 0; - memset(de->name, 0, len); - found = 1; - break; - } - } - spin_unlock(&ls->ls_recover_list_lock); - - if (!found) - de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_NOFS); - return de; -} - -void dlm_clear_free_entries(struct dlm_ls *ls) -{ - struct dlm_direntry *de; - - spin_lock(&ls->ls_recover_list_lock); - while (!list_empty(&ls->ls_recover_list)) { - de = list_entry(ls->ls_recover_list.next, struct dlm_direntry, - list); - list_del(&de->list); - kfree(de); - } - spin_unlock(&ls->ls_recover_list_lock); -} - /* * We use the upper 16 bits of the hash value to select the directory node. * Low bits are used for distribution of rsb's among hash buckets on each node. @@ -78,144 +34,53 @@ void dlm_clear_free_entries(struct dlm_ls *ls) int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash) { - struct list_head *tmp; - struct dlm_member *memb = NULL; - uint32_t node, n = 0; - int nodeid; - - if (ls->ls_num_nodes == 1) { - nodeid = dlm_our_nodeid(); - goto out; - } + uint32_t node; - if (ls->ls_node_array) { + if (ls->ls_num_nodes == 1) + return dlm_our_nodeid(); + else { node = (hash >> 16) % ls->ls_total_weight; - nodeid = ls->ls_node_array[node]; - goto out; - } - - /* make_member_array() failed to kmalloc ls_node_array... */ - - node = (hash >> 16) % ls->ls_num_nodes; - - list_for_each(tmp, &ls->ls_nodes) { - if (n++ != node) - continue; - memb = list_entry(tmp, struct dlm_member, list); - break; + return ls->ls_node_array[node]; } - - DLM_ASSERT(memb , printk("num_nodes=%u n=%u node=%u\n", - ls->ls_num_nodes, n, node);); - nodeid = memb->nodeid; - out: - return nodeid; } int dlm_dir_nodeid(struct dlm_rsb *r) { - return dlm_hash2nodeid(r->res_ls, r->res_hash); -} - -static inline uint32_t dir_hash(struct dlm_ls *ls, char *name, int len) -{ - uint32_t val; - - val = jhash(name, len, 0); - val &= (ls->ls_dirtbl_size - 1); - - return val; -} - -static void add_entry_to_hash(struct dlm_ls *ls, struct dlm_direntry *de) -{ - uint32_t bucket; - - bucket = dir_hash(ls, de->name, de->length); - list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); + return r->res_dir_nodeid; } -static struct dlm_direntry *search_bucket(struct dlm_ls *ls, char *name, - int namelen, uint32_t bucket) +void dlm_recover_dir_nodeid(struct dlm_ls *ls) { - struct dlm_direntry *de; - - list_for_each_entry(de, &ls->ls_dirtbl[bucket].list, list) { - if (de->length == namelen && !memcmp(name, de->name, namelen)) - goto out; - } - de = NULL; - out: - return de; -} - -void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int namelen) -{ - struct dlm_direntry *de; - uint32_t bucket; - - bucket = dir_hash(ls, name, namelen); - - spin_lock(&ls->ls_dirtbl[bucket].lock); - - de = search_bucket(ls, name, namelen, bucket); - - if (!de) { - log_error(ls, "remove fr %u none", nodeid); - goto out; - } - - if (de->master_nodeid != nodeid) { - log_error(ls, "remove fr %u ID %u", nodeid, de->master_nodeid); - goto out; - } - - list_del(&de->list); - kfree(de); - out: - spin_unlock(&ls->ls_dirtbl[bucket].lock); -} + struct dlm_rsb *r; -void dlm_dir_clear(struct dlm_ls *ls) -{ - struct list_head *head; - struct dlm_direntry *de; - int i; - - DLM_ASSERT(list_empty(&ls->ls_recover_list), ); - - for (i = 0; i < ls->ls_dirtbl_size; i++) { - spin_lock(&ls->ls_dirtbl[i].lock); - head = &ls->ls_dirtbl[i].list; - while (!list_empty(head)) { - de = list_entry(head->next, struct dlm_direntry, list); - list_del(&de->list); - put_free_de(ls, de); - } - spin_unlock(&ls->ls_dirtbl[i].lock); + down_read(&ls->ls_root_sem); + list_for_each_entry(r, &ls->ls_root_list, res_root_list) { + r->res_dir_nodeid = dlm_hash2nodeid(ls, r->res_hash); } + up_read(&ls->ls_root_sem); } int dlm_recover_directory(struct dlm_ls *ls) { struct dlm_member *memb; - struct dlm_direntry *de; char *b, *last_name = NULL; - int error = -ENOMEM, last_len, count = 0; + int error = -ENOMEM, last_len, nodeid, result; uint16_t namelen; + unsigned int count = 0, count_match = 0, count_bad = 0, count_add = 0; log_debug(ls, "dlm_recover_directory"); if (dlm_no_directory(ls)) goto out_status; - dlm_dir_clear(ls); - last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS); if (!last_name) goto out; list_for_each_entry(memb, &ls->ls_nodes, list) { + if (memb->nodeid == dlm_our_nodeid()) + continue; + memset(last_name, 0, DLM_RESNAME_MAXLEN); last_len = 0; @@ -230,7 +95,7 @@ int dlm_recover_directory(struct dlm_ls *ls) if (error) goto out_free; - schedule(); + cond_resched(); /* * pick namelen/name pairs out of received buffer @@ -267,87 +132,71 @@ int dlm_recover_directory(struct dlm_ls *ls) if (namelen > DLM_RESNAME_MAXLEN) goto out_free; - error = -ENOMEM; - de = get_free_de(ls, namelen); - if (!de) + error = dlm_master_lookup(ls, memb->nodeid, + b, namelen, + DLM_LU_RECOVER_DIR, + &nodeid, &result); + if (error) { + log_error(ls, "recover_dir lookup %d", + error); goto out_free; + } + + /* The name was found in rsbtbl, but the + * master nodeid is different from + * memb->nodeid which says it is the master. + * This should not happen. */ + + if (result == DLM_LU_MATCH && + nodeid != memb->nodeid) { + count_bad++; + log_error(ls, "recover_dir lookup %d " + "nodeid %d memb %d bad %u", + result, nodeid, memb->nodeid, + count_bad); + print_hex_dump_bytes("dlm_recover_dir ", + DUMP_PREFIX_NONE, + b, namelen); + } + + /* The name was found in rsbtbl, and the + * master nodeid matches memb->nodeid. */ + + if (result == DLM_LU_MATCH && + nodeid == memb->nodeid) { + count_match++; + } + + /* The name was not found in rsbtbl and was + * added with memb->nodeid as the master. */ + + if (result == DLM_LU_ADD) { + count_add++; + } - de->master_nodeid = memb->nodeid; - de->length = namelen; last_len = namelen; - memcpy(de->name, b, namelen); memcpy(last_name, b, namelen); b += namelen; left -= namelen; - - add_entry_to_hash(ls, de); count++; } } - done: + done: ; } out_status: error = 0; - log_debug(ls, "dlm_recover_directory %d entries", count); + dlm_set_recover_status(ls, DLM_RS_DIR); + + log_debug(ls, "dlm_recover_directory %u in %u new", + count, count_add); out_free: kfree(last_name); out: - dlm_clear_free_entries(ls); return error; } -static int get_entry(struct dlm_ls *ls, int nodeid, char *name, - int namelen, int *r_nodeid) -{ - struct dlm_direntry *de, *tmp; - uint32_t bucket; - - bucket = dir_hash(ls, name, namelen); - - spin_lock(&ls->ls_dirtbl[bucket].lock); - de = search_bucket(ls, name, namelen, bucket); - if (de) { - *r_nodeid = de->master_nodeid; - spin_unlock(&ls->ls_dirtbl[bucket].lock); - if (*r_nodeid == nodeid) - return -EEXIST; - return 0; - } - - spin_unlock(&ls->ls_dirtbl[bucket].lock); - - if (namelen > DLM_RESNAME_MAXLEN) - return -EINVAL; - - de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_NOFS); - if (!de) - return -ENOMEM; - - de->master_nodeid = nodeid; - de->length = namelen; - memcpy(de->name, name, namelen); - - spin_lock(&ls->ls_dirtbl[bucket].lock); - tmp = search_bucket(ls, name, namelen, bucket); - if (tmp) { - kfree(de); - de = tmp; - } else { - list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); - } - *r_nodeid = de->master_nodeid; - spin_unlock(&ls->ls_dirtbl[bucket].lock); - return 0; -} - -int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen, - int *r_nodeid) -{ - return get_entry(ls, nodeid, name, namelen, r_nodeid); -} - static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len) { struct dlm_rsb *r; @@ -358,10 +207,10 @@ static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len) bucket = hash & (ls->ls_rsbtbl_size - 1); spin_lock(&ls->ls_rsbtbl[bucket].lock); - rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, 0, &r); + rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, &r); if (rv) rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].toss, - name, len, 0, &r); + name, len, &r); spin_unlock(&ls->ls_rsbtbl[bucket].lock); if (!rv) @@ -371,7 +220,7 @@ static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len) list_for_each_entry(r, &ls->ls_root_list, res_root_list) { if (len == r->res_length && !memcmp(name, r->res_name, len)) { up_read(&ls->ls_root_sem); - log_error(ls, "find_rsb_root revert to root_list %s", + log_debug(ls, "find_rsb_root revert to root_list %s", r->res_name); return r; } @@ -429,6 +278,7 @@ void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, be_namelen = cpu_to_be16(0); memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); offset += sizeof(__be16); + ls->ls_recover_dir_sent_msg++; goto out; } @@ -437,6 +287,7 @@ void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, offset += sizeof(__be16); memcpy(outbuf + offset, r->res_name, r->res_length); offset += r->res_length; + ls->ls_recover_dir_sent_res++; } /* @@ -449,8 +300,8 @@ void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, be_namelen = cpu_to_be16(0xFFFF); memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); offset += sizeof(__be16); + ls->ls_recover_dir_sent_msg++; } - out: up_read(&ls->ls_root_sem); } diff --git a/fs/dlm/dir.h b/fs/dlm/dir.h index 0b0eb1267b6e..417506344456 100644 --- a/fs/dlm/dir.h +++ b/fs/dlm/dir.h @@ -14,15 +14,10 @@ #ifndef __DIR_DOT_H__ #define __DIR_DOT_H__ - int dlm_dir_nodeid(struct dlm_rsb *rsb); int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash); -void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int len); -void dlm_dir_clear(struct dlm_ls *ls); -void dlm_clear_free_entries(struct dlm_ls *ls); +void dlm_recover_dir_nodeid(struct dlm_ls *ls); int dlm_recover_directory(struct dlm_ls *ls); -int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen, - int *r_nodeid); void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, char *outbuf, int outlen, int nodeid); diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index bc342f7ac3af..3093207a7684 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -55,8 +55,6 @@ struct dlm_lkb; struct dlm_rsb; struct dlm_member; struct dlm_rsbtable; -struct dlm_dirtable; -struct dlm_direntry; struct dlm_recover; struct dlm_header; struct dlm_message; @@ -98,18 +96,6 @@ do { \ } -struct dlm_direntry { - struct list_head list; - uint32_t master_nodeid; - uint16_t length; - char name[1]; -}; - -struct dlm_dirtable { - struct list_head list; - spinlock_t lock; -}; - struct dlm_rsbtable { struct rb_root keep; struct rb_root toss; @@ -283,6 +269,15 @@ struct dlm_lkb { }; }; +/* + * res_master_nodeid is "normal": 0 is unset/invalid, non-zero is the real + * nodeid, even when nodeid is our_nodeid. + * + * res_nodeid is "odd": -1 is unset/invalid, zero means our_nodeid, + * greater than zero when another nodeid. + * + * (TODO: remove res_nodeid and only use res_master_nodeid) + */ struct dlm_rsb { struct dlm_ls *res_ls; /* the lockspace */ @@ -291,6 +286,8 @@ struct dlm_rsb { unsigned long res_flags; int res_length; /* length of rsb name */ int res_nodeid; + int res_master_nodeid; + int res_dir_nodeid; uint32_t res_lvbseq; uint32_t res_hash; uint32_t res_bucket; /* rsbtbl */ @@ -313,10 +310,21 @@ struct dlm_rsb { char res_name[DLM_RESNAME_MAXLEN+1]; }; +/* dlm_master_lookup() flags */ + +#define DLM_LU_RECOVER_DIR 1 +#define DLM_LU_RECOVER_MASTER 2 + +/* dlm_master_lookup() results */ + +#define DLM_LU_MATCH 1 +#define DLM_LU_ADD 2 + /* find_rsb() flags */ -#define R_MASTER 1 /* only return rsb if it's a master */ -#define R_CREATE 2 /* create/add rsb if not found */ +#define R_REQUEST 0x00000001 +#define R_RECEIVE_REQUEST 0x00000002 +#define R_RECEIVE_RECOVER 0x00000004 /* rsb_flags */ @@ -509,9 +517,6 @@ struct dlm_ls { struct dlm_rsbtable *ls_rsbtbl; uint32_t ls_rsbtbl_size; - struct dlm_dirtable *ls_dirtbl; - uint32_t ls_dirtbl_size; - struct mutex ls_waiters_mutex; struct list_head ls_waiters; /* lkbs needing a reply */ @@ -545,6 +550,7 @@ struct dlm_ls { struct dentry *ls_debug_waiters_dentry; /* debugfs */ struct dentry *ls_debug_locks_dentry; /* debugfs */ struct dentry *ls_debug_all_dentry; /* debugfs */ + struct dentry *ls_debug_toss_dentry; /* debugfs */ wait_queue_head_t ls_uevent_wait; /* user part of join/leave */ int ls_uevent_result; @@ -573,6 +579,8 @@ struct dlm_ls { struct mutex ls_requestqueue_mutex; struct dlm_rcom *ls_recover_buf; int ls_recover_nodeid; /* for debugging */ + unsigned int ls_recover_dir_sent_res; /* for log info */ + unsigned int ls_recover_dir_sent_msg; /* for log info */ unsigned int ls_recover_locks_in; /* for log info */ uint64_t ls_rcom_seq; spinlock_t ls_rcom_spin; diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index bdafb65a5234..d9ee1b96549a 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -90,6 +90,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, static int receive_extralen(struct dlm_message *ms); static void do_purge(struct dlm_ls *ls, int nodeid, int pid); static void del_timeout(struct dlm_lkb *lkb); +static void toss_rsb(struct kref *kref); /* * Lock compatibilty matrix - thanks Steve @@ -170,9 +171,11 @@ void dlm_print_lkb(struct dlm_lkb *lkb) static void dlm_print_rsb(struct dlm_rsb *r) { - printk(KERN_ERR "rsb: nodeid %d flags %lx first %x rlc %d name %s\n", - r->res_nodeid, r->res_flags, r->res_first_lkid, - r->res_recover_locks_count, r->res_name); + printk(KERN_ERR "rsb: nodeid %d master %d dir %d flags %lx first %x " + "rlc %d name %s\n", + r->res_nodeid, r->res_master_nodeid, r->res_dir_nodeid, + r->res_flags, r->res_first_lkid, r->res_recover_locks_count, + r->res_name); } void dlm_dump_rsb(struct dlm_rsb *r) @@ -327,6 +330,37 @@ static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode) * Basic operations on rsb's and lkb's */ +/* This is only called to add a reference when the code already holds + a valid reference to the rsb, so there's no need for locking. */ + +static inline void hold_rsb(struct dlm_rsb *r) +{ + kref_get(&r->res_ref); +} + +void dlm_hold_rsb(struct dlm_rsb *r) +{ + hold_rsb(r); +} + +/* When all references to the rsb are gone it's transferred to + the tossed list for later disposal. */ + +static void put_rsb(struct dlm_rsb *r) +{ + struct dlm_ls *ls = r->res_ls; + uint32_t bucket = r->res_bucket; + + spin_lock(&ls->ls_rsbtbl[bucket].lock); + kref_put(&r->res_ref, toss_rsb); + spin_unlock(&ls->ls_rsbtbl[bucket].lock); +} + +void dlm_put_rsb(struct dlm_rsb *r) +{ + put_rsb(r); +} + static int pre_rsb_struct(struct dlm_ls *ls) { struct dlm_rsb *r1, *r2; @@ -411,11 +445,10 @@ static int rsb_cmp(struct dlm_rsb *r, const char *name, int nlen) } int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len, - unsigned int flags, struct dlm_rsb **r_ret) + struct dlm_rsb **r_ret) { struct rb_node *node = tree->rb_node; struct dlm_rsb *r; - int error = 0; int rc; while (node) { @@ -432,10 +465,8 @@ int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len, return -EBADR; found: - if (r->res_nodeid && (flags & R_MASTER)) - error = -ENOTBLK; *r_ret = r; - return error; + return 0; } static int rsb_insert(struct dlm_rsb *rsb, struct rb_root *tree) @@ -467,124 +498,587 @@ static int rsb_insert(struct dlm_rsb *rsb, struct rb_root *tree) return 0; } -static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b, - unsigned int flags, struct dlm_rsb **r_ret) +/* + * Find rsb in rsbtbl and potentially create/add one + * + * Delaying the release of rsb's has a similar benefit to applications keeping + * NL locks on an rsb, but without the guarantee that the cached master value + * will still be valid when the rsb is reused. Apps aren't always smart enough + * to keep NL locks on an rsb that they may lock again shortly; this can lead + * to excessive master lookups and removals if we don't delay the release. + * + * Searching for an rsb means looking through both the normal list and toss + * list. When found on the toss list the rsb is moved to the normal list with + * ref count of 1; when found on normal list the ref count is incremented. + * + * rsb's on the keep list are being used locally and refcounted. + * rsb's on the toss list are not being used locally, and are not refcounted. + * + * The toss list rsb's were either + * - previously used locally but not any more (were on keep list, then + * moved to toss list when last refcount dropped) + * - created and put on toss list as a directory record for a lookup + * (we are the dir node for the res, but are not using the res right now, + * but some other node is) + * + * The purpose of find_rsb() is to return a refcounted rsb for local use. + * So, if the given rsb is on the toss list, it is moved to the keep list + * before being returned. + * + * toss_rsb() happens when all local usage of the rsb is done, i.e. no + * more refcounts exist, so the rsb is moved from the keep list to the + * toss list. + * + * rsb's on both keep and toss lists are used for doing a name to master + * lookups. rsb's that are in use locally (and being refcounted) are on + * the keep list, rsb's that are not in use locally (not refcounted) and + * only exist for name/master lookups are on the toss list. + * + * rsb's on the toss list who's dir_nodeid is not local can have stale + * name/master mappings. So, remote requests on such rsb's can potentially + * return with an error, which means the mapping is stale and needs to + * be updated with a new lookup. (The idea behind MASTER UNCERTAIN and + * first_lkid is to keep only a single outstanding request on an rsb + * while that rsb has a potentially stale master.) + */ + +static int find_rsb_dir(struct dlm_ls *ls, char *name, int len, + uint32_t hash, uint32_t b, + int dir_nodeid, int from_nodeid, + unsigned int flags, struct dlm_rsb **r_ret) { - struct dlm_rsb *r; + struct dlm_rsb *r = NULL; + int our_nodeid = dlm_our_nodeid(); + int from_local = 0; + int from_other = 0; + int from_dir = 0; + int create = 0; int error; - error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, flags, &r); - if (!error) { - kref_get(&r->res_ref); - goto out; + if (flags & R_RECEIVE_REQUEST) { + if (from_nodeid == dir_nodeid) + from_dir = 1; + else + from_other = 1; + } else if (flags & R_REQUEST) { + from_local = 1; + } + + /* + * flags & R_RECEIVE_RECOVER is from dlm_recover_master_copy, so + * from_nodeid has sent us a lock in dlm_recover_locks, believing + * we're the new master. Our local recovery may not have set + * res_master_nodeid to our_nodeid yet, so allow either. Don't + * create the rsb; dlm_recover_process_copy() will handle EBADR + * by resending. + * + * If someone sends us a request, we are the dir node, and we do + * not find the rsb anywhere, then recreate it. This happens if + * someone sends us a request after we have removed/freed an rsb + * from our toss list. (They sent a request instead of lookup + * because they are using an rsb from their toss list.) + */ + + if (from_local || from_dir || + (from_other && (dir_nodeid == our_nodeid))) { + create = 1; } - if (error == -ENOTBLK) - goto out; - error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, flags, &r); + retry: + if (create) { + error = pre_rsb_struct(ls); + i |
