summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/dlm/config.c7
-rw-r--r--fs/dlm/config.h1
-rw-r--r--fs/dlm/debug_fs.c103
-rw-r--r--fs/dlm/dir.c287
-rw-r--r--fs/dlm/dir.h7
-rw-r--r--fs/dlm/dlm_internal.h46
-rw-r--r--fs/dlm/lock.c1022
-rw-r--r--fs/dlm/lock.h5
-rw-r--r--fs/dlm/lockspace.c23
-rw-r--r--fs/dlm/rcom.c145
-rw-r--r--fs/dlm/rcom.h1
-rw-r--r--fs/dlm/recover.c140
-rw-r--r--fs/dlm/recover.h2
-rw-r--r--fs/dlm/recoverd.c14
14 files changed, 1215 insertions, 588 deletions
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index e7e327d43fa5..9ccf7346834a 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -96,7 +96,6 @@ struct dlm_cluster {
unsigned int cl_tcp_port;
unsigned int cl_buffer_size;
unsigned int cl_rsbtbl_size;
- unsigned int cl_dirtbl_size;
unsigned int cl_recover_timer;
unsigned int cl_toss_secs;
unsigned int cl_scan_secs;
@@ -113,7 +112,6 @@ enum {
CLUSTER_ATTR_TCP_PORT = 0,
CLUSTER_ATTR_BUFFER_SIZE,
CLUSTER_ATTR_RSBTBL_SIZE,
- CLUSTER_ATTR_DIRTBL_SIZE,
CLUSTER_ATTR_RECOVER_TIMER,
CLUSTER_ATTR_TOSS_SECS,
CLUSTER_ATTR_SCAN_SECS,
@@ -189,7 +187,6 @@ __CONFIGFS_ATTR(name, 0644, name##_read, name##_write)
CLUSTER_ATTR(tcp_port, 1);
CLUSTER_ATTR(buffer_size, 1);
CLUSTER_ATTR(rsbtbl_size, 1);
-CLUSTER_ATTR(dirtbl_size, 1);
CLUSTER_ATTR(recover_timer, 1);
CLUSTER_ATTR(toss_secs, 1);
CLUSTER_ATTR(scan_secs, 1);
@@ -204,7 +201,6 @@ static struct configfs_attribute *cluster_attrs[] = {
[CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
[CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr,
[CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr,
- [CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr,
[CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr,
[CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr,
[CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr,
@@ -478,7 +474,6 @@ static struct config_group *make_cluster(struct config_group *g,
cl->cl_tcp_port = dlm_config.ci_tcp_port;
cl->cl_buffer_size = dlm_config.ci_buffer_size;
cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size;
- cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size;
cl->cl_recover_timer = dlm_config.ci_recover_timer;
cl->cl_toss_secs = dlm_config.ci_toss_secs;
cl->cl_scan_secs = dlm_config.ci_scan_secs;
@@ -1050,7 +1045,6 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
#define DEFAULT_TCP_PORT 21064
#define DEFAULT_BUFFER_SIZE 4096
#define DEFAULT_RSBTBL_SIZE 1024
-#define DEFAULT_DIRTBL_SIZE 1024
#define DEFAULT_RECOVER_TIMER 5
#define DEFAULT_TOSS_SECS 10
#define DEFAULT_SCAN_SECS 5
@@ -1066,7 +1060,6 @@ struct dlm_config_info dlm_config = {
.ci_tcp_port = DEFAULT_TCP_PORT,
.ci_buffer_size = DEFAULT_BUFFER_SIZE,
.ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE,
- .ci_dirtbl_size = DEFAULT_DIRTBL_SIZE,
.ci_recover_timer = DEFAULT_RECOVER_TIMER,
.ci_toss_secs = DEFAULT_TOSS_SECS,
.ci_scan_secs = DEFAULT_SCAN_SECS,
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 9f5e3663bb0c..dbd35a08f3a5 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -27,7 +27,6 @@ struct dlm_config_info {
int ci_tcp_port;
int ci_buffer_size;
int ci_rsbtbl_size;
- int ci_dirtbl_size;
int ci_recover_timer;
int ci_toss_secs;
int ci_scan_secs;
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 1c9b08095f98..b969deef9ebb 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -344,6 +344,45 @@ static int print_format3(struct dlm_rsb *r, struct seq_file *s)
return rv;
}
+static int print_format4(struct dlm_rsb *r, struct seq_file *s)
+{
+ int our_nodeid = dlm_our_nodeid();
+ int print_name = 1;
+ int i, rv;
+
+ lock_rsb(r);
+
+ rv = seq_printf(s, "rsb %p %d %d %d %d %lu %lx %d ",
+ r,
+ r->res_nodeid,
+ r->res_master_nodeid,
+ r->res_dir_nodeid,
+ our_nodeid,
+ r->res_toss_time,
+ r->res_flags,
+ r->res_length);
+ if (rv)
+ goto out;
+
+ for (i = 0; i < r->res_length; i++) {
+ if (!isascii(r->res_name[i]) || !isprint(r->res_name[i]))
+ print_name = 0;
+ }
+
+ seq_printf(s, "%s", print_name ? "str " : "hex");
+
+ for (i = 0; i < r->res_length; i++) {
+ if (print_name)
+ seq_printf(s, "%c", r->res_name[i]);
+ else
+ seq_printf(s, " %02x", (unsigned char)r->res_name[i]);
+ }
+ rv = seq_printf(s, "\n");
+ out:
+ unlock_rsb(r);
+ return rv;
+}
+
struct rsbtbl_iter {
struct dlm_rsb *rsb;
unsigned bucket;
@@ -382,6 +421,13 @@ static int table_seq_show(struct seq_file *seq, void *iter_ptr)
}
rv = print_format3(ri->rsb, seq);
break;
+ case 4:
+ if (ri->header) {
+ seq_printf(seq, "version 4 rsb 2\n");
+ ri->header = 0;
+ }
+ rv = print_format4(ri->rsb, seq);
+ break;
}
return rv;
@@ -390,15 +436,18 @@ static int table_seq_show(struct seq_file *seq, void *iter_ptr)
static const struct seq_operations format1_seq_ops;
static const struct seq_operations format2_seq_ops;
static const struct seq_operations format3_seq_ops;
+static const struct seq_operations format4_seq_ops;
static void *table_seq_start(struct seq_file *seq, loff_t *pos)
{
+ struct rb_root *tree;
struct rb_node *node;
struct dlm_ls *ls = seq->private;
struct rsbtbl_iter *ri;
struct dlm_rsb *r;
loff_t n = *pos;
unsigned bucket, entry;
+ int toss = (seq->op == &format4_seq_ops);
bucket = n >> 32;
entry = n & ((1LL << 32) - 1);
@@ -417,11 +466,14 @@ static void *table_seq_start(struct seq_file *seq, loff_t *pos)
ri->format = 2;
if (seq->op == &format3_seq_ops)
ri->format = 3;
+ if (seq->op == &format4_seq_ops)
+ ri->format = 4;
+
+ tree = toss ? &ls->ls_rsbtbl[bucket].toss : &ls->ls_rsbtbl[bucket].keep;
spin_lock(&ls->ls_rsbtbl[bucket].lock);
- if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) {
- for (node = rb_first(&ls->ls_rsbtbl[bucket].keep); node;
- node = rb_next(node)) {
+ if (!RB_EMPTY_ROOT(tree)) {
+ for (node = rb_first(tree); node; node = rb_next(node)) {
r = rb_entry(node, struct dlm_rsb, res_hashnode);
if (!entry--) {
dlm_hold_rsb(r);
@@ -449,10 +501,11 @@ static void *table_seq_start(struct seq_file *seq, loff_t *pos)
kfree(ri);
return NULL;
}
+ tree = toss ? &ls->ls_rsbtbl[bucket].toss : &ls->ls_rsbtbl[bucket].keep;
spin_lock(&ls->ls_rsbtbl[bucket].lock);
- if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) {
- node = rb_first(&ls->ls_rsbtbl[bucket].keep);
+ if (!RB_EMPTY_ROOT(tree)) {
+ node = rb_first(tree);
r = rb_entry(node, struct dlm_rsb, res_hashnode);
dlm_hold_rsb(r);
ri->rsb = r;
@@ -469,10 +522,12 @@ static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos)
{
struct dlm_ls *ls = seq->private;
struct rsbtbl_iter *ri = iter_ptr;
+ struct rb_root *tree;
struct rb_node *next;
struct dlm_rsb *r, *rp;
loff_t n = *pos;
unsigned bucket;
+ int toss = (seq->op == &format4_seq_ops);
bucket = n >> 32;
@@ -511,10 +566,11 @@ static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos)
kfree(ri);
return NULL;
}
+ tree = toss ? &ls->ls_rsbtbl[bucket].toss : &ls->ls_rsbtbl[bucket].keep;
spin_lock(&ls->ls_rsbtbl[bucket].lock);
- if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) {
- next = rb_first(&ls->ls_rsbtbl[bucket].keep);
+ if (!RB_EMPTY_ROOT(tree)) {
+ next = rb_first(tree);
r = rb_entry(next, struct dlm_rsb, res_hashnode);
dlm_hold_rsb(r);
ri->rsb = r;
@@ -558,9 +614,17 @@ static const struct seq_operations format3_seq_ops = {
.show = table_seq_show,
};
+static const struct seq_operations format4_seq_ops = {
+ .start = table_seq_start,
+ .next = table_seq_next,
+ .stop = table_seq_stop,
+ .show = table_seq_show,
+};
+
static const struct file_operations format1_fops;
static const struct file_operations format2_fops;
static const struct file_operations format3_fops;
+static const struct file_operations format4_fops;
static int table_open(struct inode *inode, struct file *file)
{
@@ -573,6 +637,8 @@ static int table_open(struct inode *inode, struct file *file)
ret = seq_open(file, &format2_seq_ops);
else if (file->f_op == &format3_fops)
ret = seq_open(file, &format3_seq_ops);
+ else if (file->f_op == &format4_fops)
+ ret = seq_open(file, &format4_seq_ops);
if (ret)
return ret;
@@ -606,6 +672,14 @@ static const struct file_operations format3_fops = {
.release = seq_release
};
+static const struct file_operations format4_fops = {
+ .owner = THIS_MODULE,
+ .open = table_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
/*
* dump lkb's on the ls_waiters list
*/
@@ -652,6 +726,8 @@ void dlm_delete_debug_file(struct dlm_ls *ls)
debugfs_remove(ls->ls_debug_locks_dentry);
if (ls->ls_debug_all_dentry)
debugfs_remove(ls->ls_debug_all_dentry);
+ if (ls->ls_debug_toss_dentry)
+ debugfs_remove(ls->ls_debug_toss_dentry);
}
int dlm_create_debug_file(struct dlm_ls *ls)
@@ -694,6 +770,19 @@ int dlm_create_debug_file(struct dlm_ls *ls)
if (!ls->ls_debug_all_dentry)
goto fail;
+ /* format 4 */
+
+ memset(name, 0, sizeof(name));
+ snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_toss", ls->ls_name);
+
+ ls->ls_debug_toss_dentry = debugfs_create_file(name,
+ S_IFREG | S_IRUGO,
+ dlm_root,
+ ls,
+ &format4_fops);
+ if (!ls->ls_debug_toss_dentry)
+ goto fail;
+
memset(name, 0, sizeof(name));
snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_waiters", ls->ls_name);
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c
index dc5eb598b81f..278a75cda446 100644
--- a/fs/dlm/dir.c
+++ b/fs/dlm/dir.c
@@ -23,50 +23,6 @@
#include "lock.h"
#include "dir.h"
-
-static void put_free_de(struct dlm_ls *ls, struct dlm_direntry *de)
-{
- spin_lock(&ls->ls_recover_list_lock);
- list_add(&de->list, &ls->ls_recover_list);
- spin_unlock(&ls->ls_recover_list_lock);
-}
-
-static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len)
-{
- int found = 0;
- struct dlm_direntry *de;
-
- spin_lock(&ls->ls_recover_list_lock);
- list_for_each_entry(de, &ls->ls_recover_list, list) {
- if (de->length == len) {
- list_del(&de->list);
- de->master_nodeid = 0;
- memset(de->name, 0, len);
- found = 1;
- break;
- }
- }
- spin_unlock(&ls->ls_recover_list_lock);
-
- if (!found)
- de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_NOFS);
- return de;
-}
-
-void dlm_clear_free_entries(struct dlm_ls *ls)
-{
- struct dlm_direntry *de;
-
- spin_lock(&ls->ls_recover_list_lock);
- while (!list_empty(&ls->ls_recover_list)) {
- de = list_entry(ls->ls_recover_list.next, struct dlm_direntry,
- list);
- list_del(&de->list);
- kfree(de);
- }
- spin_unlock(&ls->ls_recover_list_lock);
-}
-
/*
* We use the upper 16 bits of the hash value to select the directory node.
* Low bits are used for distribution of rsb's among hash buckets on each node.
@@ -78,144 +34,53 @@ void dlm_clear_free_entries(struct dlm_ls *ls)
int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash)
{
- struct list_head *tmp;
- struct dlm_member *memb = NULL;
- uint32_t node, n = 0;
- int nodeid;
-
- if (ls->ls_num_nodes == 1) {
- nodeid = dlm_our_nodeid();
- goto out;
- }
+ uint32_t node;
- if (ls->ls_node_array) {
+ if (ls->ls_num_nodes == 1)
+ return dlm_our_nodeid();
+ else {
node = (hash >> 16) % ls->ls_total_weight;
- nodeid = ls->ls_node_array[node];
- goto out;
- }
-
- /* make_member_array() failed to kmalloc ls_node_array... */
-
- node = (hash >> 16) % ls->ls_num_nodes;
-
- list_for_each(tmp, &ls->ls_nodes) {
- if (n++ != node)
- continue;
- memb = list_entry(tmp, struct dlm_member, list);
- break;
+ return ls->ls_node_array[node];
}
-
- DLM_ASSERT(memb , printk("num_nodes=%u n=%u node=%u\n",
- ls->ls_num_nodes, n, node););
- nodeid = memb->nodeid;
- out:
- return nodeid;
}
int dlm_dir_nodeid(struct dlm_rsb *r)
{
- return dlm_hash2nodeid(r->res_ls, r->res_hash);
-}
-
-static inline uint32_t dir_hash(struct dlm_ls *ls, char *name, int len)
-{
- uint32_t val;
-
- val = jhash(name, len, 0);
- val &= (ls->ls_dirtbl_size - 1);
-
- return val;
-}
-
-static void add_entry_to_hash(struct dlm_ls *ls, struct dlm_direntry *de)
-{
- uint32_t bucket;
-
- bucket = dir_hash(ls, de->name, de->length);
- list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list);
+ return r->res_dir_nodeid;
}
-static struct dlm_direntry *search_bucket(struct dlm_ls *ls, char *name,
- int namelen, uint32_t bucket)
+void dlm_recover_dir_nodeid(struct dlm_ls *ls)
{
- struct dlm_direntry *de;
-
- list_for_each_entry(de, &ls->ls_dirtbl[bucket].list, list) {
- if (de->length == namelen && !memcmp(name, de->name, namelen))
- goto out;
- }
- de = NULL;
- out:
- return de;
-}
-
-void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int namelen)
-{
- struct dlm_direntry *de;
- uint32_t bucket;
-
- bucket = dir_hash(ls, name, namelen);
-
- spin_lock(&ls->ls_dirtbl[bucket].lock);
-
- de = search_bucket(ls, name, namelen, bucket);
-
- if (!de) {
- log_error(ls, "remove fr %u none", nodeid);
- goto out;
- }
-
- if (de->master_nodeid != nodeid) {
- log_error(ls, "remove fr %u ID %u", nodeid, de->master_nodeid);
- goto out;
- }
-
- list_del(&de->list);
- kfree(de);
- out:
- spin_unlock(&ls->ls_dirtbl[bucket].lock);
-}
+ struct dlm_rsb *r;
-void dlm_dir_clear(struct dlm_ls *ls)
-{
- struct list_head *head;
- struct dlm_direntry *de;
- int i;
-
- DLM_ASSERT(list_empty(&ls->ls_recover_list), );
-
- for (i = 0; i < ls->ls_dirtbl_size; i++) {
- spin_lock(&ls->ls_dirtbl[i].lock);
- head = &ls->ls_dirtbl[i].list;
- while (!list_empty(head)) {
- de = list_entry(head->next, struct dlm_direntry, list);
- list_del(&de->list);
- put_free_de(ls, de);
- }
- spin_unlock(&ls->ls_dirtbl[i].lock);
+ down_read(&ls->ls_root_sem);
+ list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
+ r->res_dir_nodeid = dlm_hash2nodeid(ls, r->res_hash);
}
+ up_read(&ls->ls_root_sem);
}
int dlm_recover_directory(struct dlm_ls *ls)
{
struct dlm_member *memb;
- struct dlm_direntry *de;
char *b, *last_name = NULL;
- int error = -ENOMEM, last_len, count = 0;
+ int error = -ENOMEM, last_len, nodeid, result;
uint16_t namelen;
+ unsigned int count = 0, count_match = 0, count_bad = 0, count_add = 0;
log_debug(ls, "dlm_recover_directory");
if (dlm_no_directory(ls))
goto out_status;
- dlm_dir_clear(ls);
-
last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS);
if (!last_name)
goto out;
list_for_each_entry(memb, &ls->ls_nodes, list) {
+ if (memb->nodeid == dlm_our_nodeid())
+ continue;
+
memset(last_name, 0, DLM_RESNAME_MAXLEN);
last_len = 0;
@@ -230,7 +95,7 @@ int dlm_recover_directory(struct dlm_ls *ls)
if (error)
goto out_free;
- schedule();
+ cond_resched();
/*
* pick namelen/name pairs out of received buffer
@@ -267,87 +132,71 @@ int dlm_recover_directory(struct dlm_ls *ls)
if (namelen > DLM_RESNAME_MAXLEN)
goto out_free;
- error = -ENOMEM;
- de = get_free_de(ls, namelen);
- if (!de)
+ error = dlm_master_lookup(ls, memb->nodeid,
+ b, namelen,
+ DLM_LU_RECOVER_DIR,
+ &nodeid, &result);
+ if (error) {
+ log_error(ls, "recover_dir lookup %d",
+ error);
goto out_free;
+ }
+
+ /* The name was found in rsbtbl, but the
+ * master nodeid is different from
+ * memb->nodeid which says it is the master.
+ * This should not happen. */
+
+ if (result == DLM_LU_MATCH &&
+ nodeid != memb->nodeid) {
+ count_bad++;
+ log_error(ls, "recover_dir lookup %d "
+ "nodeid %d memb %d bad %u",
+ result, nodeid, memb->nodeid,
+ count_bad);
+ print_hex_dump_bytes("dlm_recover_dir ",
+ DUMP_PREFIX_NONE,
+ b, namelen);
+ }
+
+ /* The name was found in rsbtbl, and the
+ * master nodeid matches memb->nodeid. */
+
+ if (result == DLM_LU_MATCH &&
+ nodeid == memb->nodeid) {
+ count_match++;
+ }
+
+ /* The name was not found in rsbtbl and was
+ * added with memb->nodeid as the master. */
+
+ if (result == DLM_LU_ADD) {
+ count_add++;
+ }
- de->master_nodeid = memb->nodeid;
- de->length = namelen;
last_len = namelen;
- memcpy(de->name, b, namelen);
memcpy(last_name, b, namelen);
b += namelen;
left -= namelen;
-
- add_entry_to_hash(ls, de);
count++;
}
}
- done:
+ done:
;
}
out_status:
error = 0;
- log_debug(ls, "dlm_recover_directory %d entries", count);
+ dlm_set_recover_status(ls, DLM_RS_DIR);
+
+ log_debug(ls, "dlm_recover_directory %u in %u new",
+ count, count_add);
out_free:
kfree(last_name);
out:
- dlm_clear_free_entries(ls);
return error;
}
-static int get_entry(struct dlm_ls *ls, int nodeid, char *name,
- int namelen, int *r_nodeid)
-{
- struct dlm_direntry *de, *tmp;
- uint32_t bucket;
-
- bucket = dir_hash(ls, name, namelen);
-
- spin_lock(&ls->ls_dirtbl[bucket].lock);
- de = search_bucket(ls, name, namelen, bucket);
- if (de) {
- *r_nodeid = de->master_nodeid;
- spin_unlock(&ls->ls_dirtbl[bucket].lock);
- if (*r_nodeid == nodeid)
- return -EEXIST;
- return 0;
- }
-
- spin_unlock(&ls->ls_dirtbl[bucket].lock);
-
- if (namelen > DLM_RESNAME_MAXLEN)
- return -EINVAL;
-
- de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_NOFS);
- if (!de)
- return -ENOMEM;
-
- de->master_nodeid = nodeid;
- de->length = namelen;
- memcpy(de->name, name, namelen);
-
- spin_lock(&ls->ls_dirtbl[bucket].lock);
- tmp = search_bucket(ls, name, namelen, bucket);
- if (tmp) {
- kfree(de);
- de = tmp;
- } else {
- list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list);
- }
- *r_nodeid = de->master_nodeid;
- spin_unlock(&ls->ls_dirtbl[bucket].lock);
- return 0;
-}
-
-int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen,
- int *r_nodeid)
-{
- return get_entry(ls, nodeid, name, namelen, r_nodeid);
-}
-
static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len)
{
struct dlm_rsb *r;
@@ -358,10 +207,10 @@ static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len)
bucket = hash & (ls->ls_rsbtbl_size - 1);
spin_lock(&ls->ls_rsbtbl[bucket].lock);
- rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, 0, &r);
+ rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, &r);
if (rv)
rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].toss,
- name, len, 0, &r);
+ name, len, &r);
spin_unlock(&ls->ls_rsbtbl[bucket].lock);
if (!rv)
@@ -371,7 +220,7 @@ static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len)
list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
if (len == r->res_length && !memcmp(name, r->res_name, len)) {
up_read(&ls->ls_root_sem);
- log_error(ls, "find_rsb_root revert to root_list %s",
+ log_debug(ls, "find_rsb_root revert to root_list %s",
r->res_name);
return r;
}
@@ -429,6 +278,7 @@ void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
be_namelen = cpu_to_be16(0);
memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
offset += sizeof(__be16);
+ ls->ls_recover_dir_sent_msg++;
goto out;
}
@@ -437,6 +287,7 @@ void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
offset += sizeof(__be16);
memcpy(outbuf + offset, r->res_name, r->res_length);
offset += r->res_length;
+ ls->ls_recover_dir_sent_res++;
}
/*
@@ -449,8 +300,8 @@ void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
be_namelen = cpu_to_be16(0xFFFF);
memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
offset += sizeof(__be16);
+ ls->ls_recover_dir_sent_msg++;
}
-
out:
up_read(&ls->ls_root_sem);
}
diff --git a/fs/dlm/dir.h b/fs/dlm/dir.h
index 0b0eb1267b6e..417506344456 100644
--- a/fs/dlm/dir.h
+++ b/fs/dlm/dir.h
@@ -14,15 +14,10 @@
#ifndef __DIR_DOT_H__
#define __DIR_DOT_H__
-
int dlm_dir_nodeid(struct dlm_rsb *rsb);
int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash);
-void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int len);
-void dlm_dir_clear(struct dlm_ls *ls);
-void dlm_clear_free_entries(struct dlm_ls *ls);
+void dlm_recover_dir_nodeid(struct dlm_ls *ls);
int dlm_recover_directory(struct dlm_ls *ls);
-int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen,
- int *r_nodeid);
void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
char *outbuf, int outlen, int nodeid);
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index bc342f7ac3af..3093207a7684 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -55,8 +55,6 @@ struct dlm_lkb;
struct dlm_rsb;
struct dlm_member;
struct dlm_rsbtable;
-struct dlm_dirtable;
-struct dlm_direntry;
struct dlm_recover;
struct dlm_header;
struct dlm_message;
@@ -98,18 +96,6 @@ do { \
}
-struct dlm_direntry {
- struct list_head list;
- uint32_t master_nodeid;
- uint16_t length;
- char name[1];
-};
-
-struct dlm_dirtable {
- struct list_head list;
- spinlock_t lock;
-};
-
struct dlm_rsbtable {
struct rb_root keep;
struct rb_root toss;
@@ -283,6 +269,15 @@ struct dlm_lkb {
};
};
+/*
+ * res_master_nodeid is "normal": 0 is unset/invalid, non-zero is the real
+ * nodeid, even when nodeid is our_nodeid.
+ *
+ * res_nodeid is "odd": -1 is unset/invalid, zero means our_nodeid,
+ * greater than zero when another nodeid.
+ *
+ * (TODO: remove res_nodeid and only use res_master_nodeid)
+ */
struct dlm_rsb {
struct dlm_ls *res_ls; /* the lockspace */
@@ -291,6 +286,8 @@ struct dlm_rsb {
unsigned long res_flags;
int res_length; /* length of rsb name */
int res_nodeid;
+ int res_master_nodeid;
+ int res_dir_nodeid;
uint32_t res_lvbseq;
uint32_t res_hash;
uint32_t res_bucket; /* rsbtbl */
@@ -313,10 +310,21 @@ struct dlm_rsb {
char res_name[DLM_RESNAME_MAXLEN+1];
};
+/* dlm_master_lookup() flags */
+
+#define DLM_LU_RECOVER_DIR 1
+#define DLM_LU_RECOVER_MASTER 2
+
+/* dlm_master_lookup() results */
+
+#define DLM_LU_MATCH 1
+#define DLM_LU_ADD 2
+
/* find_rsb() flags */
-#define R_MASTER 1 /* only return rsb if it's a master */
-#define R_CREATE 2 /* create/add rsb if not found */
+#define R_REQUEST 0x00000001
+#define R_RECEIVE_REQUEST 0x00000002
+#define R_RECEIVE_RECOVER 0x00000004
/* rsb_flags */
@@ -509,9 +517,6 @@ struct dlm_ls {
struct dlm_rsbtable *ls_rsbtbl;
uint32_t ls_rsbtbl_size;
- struct dlm_dirtable *ls_dirtbl;
- uint32_t ls_dirtbl_size;
-
struct mutex ls_waiters_mutex;
struct list_head ls_waiters; /* lkbs needing a reply */
@@ -545,6 +550,7 @@ struct dlm_ls {
struct dentry *ls_debug_waiters_dentry; /* debugfs */
struct dentry *ls_debug_locks_dentry; /* debugfs */
struct dentry *ls_debug_all_dentry; /* debugfs */
+ struct dentry *ls_debug_toss_dentry; /* debugfs */
wait_queue_head_t ls_uevent_wait; /* user part of join/leave */
int ls_uevent_result;
@@ -573,6 +579,8 @@ struct dlm_ls {
struct mutex ls_requestqueue_mutex;
struct dlm_rcom *ls_recover_buf;
int ls_recover_nodeid; /* for debugging */
+ unsigned int ls_recover_dir_sent_res; /* for log info */
+ unsigned int ls_recover_dir_sent_msg; /* for log info */
unsigned int ls_recover_locks_in; /* for log info */
uint64_t ls_rcom_seq;
spinlock_t ls_rcom_spin;
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index bdafb65a5234..d9ee1b96549a 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -90,6 +90,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
static int receive_extralen(struct dlm_message *ms);
static void do_purge(struct dlm_ls *ls, int nodeid, int pid);
static void del_timeout(struct dlm_lkb *lkb);
+static void toss_rsb(struct kref *kref);
/*
* Lock compatibilty matrix - thanks Steve
@@ -170,9 +171,11 @@ void dlm_print_lkb(struct dlm_lkb *lkb)
static void dlm_print_rsb(struct dlm_rsb *r)
{
- printk(KERN_ERR "rsb: nodeid %d flags %lx first %x rlc %d name %s\n",
- r->res_nodeid, r->res_flags, r->res_first_lkid,
- r->res_recover_locks_count, r->res_name);
+ printk(KERN_ERR "rsb: nodeid %d master %d dir %d flags %lx first %x "
+ "rlc %d name %s\n",
+ r->res_nodeid, r->res_master_nodeid, r->res_dir_nodeid,
+ r->res_flags, r->res_first_lkid, r->res_recover_locks_count,
+ r->res_name);
}
void dlm_dump_rsb(struct dlm_rsb *r)
@@ -327,6 +330,37 @@ static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode)
* Basic operations on rsb's and lkb's
*/
+/* This is only called to add a reference when the code already holds
+ a valid reference to the rsb, so there's no need for locking. */
+
+static inline void hold_rsb(struct dlm_rsb *r)
+{
+ kref_get(&r->res_ref);
+}
+
+void dlm_hold_rsb(struct dlm_rsb *r)
+{
+ hold_rsb(r);
+}
+
+/* When all references to the rsb are gone it's transferred to
+ the tossed list for later disposal. */
+
+static void put_rsb(struct dlm_rsb *r)
+{
+ struct dlm_ls *ls = r->res_ls;
+ uint32_t bucket = r->res_bucket;
+
+ spin_lock(&ls->ls_rsbtbl[bucket].lock);
+ kref_put(&r->res_ref, toss_rsb);
+ spin_unlock(&ls->ls_rsbtbl[bucket].lock);
+}
+
+void dlm_put_rsb(struct dlm_rsb *r)
+{
+ put_rsb(r);
+}
+
static int pre_rsb_struct(struct dlm_ls *ls)
{
struct dlm_rsb *r1, *r2;
@@ -411,11 +445,10 @@ static int rsb_cmp(struct dlm_rsb *r, const char *name, int nlen)
}
int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len,
- unsigned int flags, struct dlm_rsb **r_ret)
+ struct dlm_rsb **r_ret)
{
struct rb_node *node = tree->rb_node;
struct dlm_rsb *r;
- int error = 0;
int rc;
while (node) {
@@ -432,10 +465,8 @@ int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len,
return -EBADR;
found:
- if (r->res_nodeid && (flags & R_MASTER))
- error = -ENOTBLK;
*r_ret = r;
- return error;
+ return 0;
}
static int rsb_insert(struct dlm_rsb *rsb, struct rb_root *tree)
@@ -467,124 +498,587 @@ static int rsb_insert(struct dlm_rsb *rsb, struct rb_root *tree)
return 0;
}
-static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b,
- unsigned int flags, struct dlm_rsb **r_ret)
+/*
+ * Find rsb in rsbtbl and potentially create/add one
+ *
+ * Delaying the release of rsb's has a similar benefit to applications keeping
+ * NL locks on an rsb, but without the guarantee that the cached master value
+ * will still be valid when the rsb is reused. Apps aren't always smart enough
+ * to keep NL locks on an rsb that they may lock again shortly; this can lead
+ * to excessive master lookups and removals if we don't delay the release.
+ *
+ * Searching for an rsb means looking through both the normal list and toss
+ * list. When found on the toss list the rsb is moved to the normal list with
+ * ref count of 1; when found on normal list the ref count is incremented.
+ *
+ * rsb's on the keep list are being used locally and refcounted.
+ * rsb's on the toss list are not being used locally, and are not refcounted.
+ *
+ * The toss list rsb's were either
+ * - previously used locally but not any more (were on keep list, then
+ * moved to toss list when last refcount dropped)
+ * - created and put on toss list as a directory record for a lookup
+ * (we are the dir node for the res, but are not using the res right now,
+ * but some other node is)
+ *
+ * The purpose of find_rsb() is to return a refcounted rsb for local use.
+ * So, if the given rsb is on the toss list, it is moved to the keep list
+ * before being returned.
+ *
+ * toss_rsb() happens when all local usage of the rsb is done, i.e. no
+ * more refcounts exist, so the rsb is moved from the keep list to the
+ * toss list.
+ *
+ * rsb's on both keep and toss lists are used for doing a name to master
+ * lookups. rsb's that are in use locally (and being refcounted) are on
+ * the keep list, rsb's that are not in use locally (not refcounted) and
+ * only exist for name/master lookups are on the toss list.
+ *
+ * rsb's on the toss list who's dir_nodeid is not local can have stale
+ * name/master mappings. So, remote requests on such rsb's can potentially
+ * return with an error, which means the mapping is stale and needs to
+ * be updated with a new lookup. (The idea behind MASTER UNCERTAIN and
+ * first_lkid is to keep only a single outstanding request on an rsb
+ * while that rsb has a potentially stale master.)
+ */
+
+static int find_rsb_dir(struct dlm_ls *ls, char *name, int len,
+ uint32_t hash, uint32_t b,
+ int dir_nodeid, int from_nodeid,
+ unsigned int flags, struct dlm_rsb **r_ret)
{
- struct dlm_rsb *r;
+ struct dlm_rsb *r = NULL;
+ int our_nodeid = dlm_our_nodeid();
+ int from_local = 0;
+ int from_other = 0;
+ int from_dir = 0;
+ int create = 0;
int error;
- error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, flags, &r);
- if (!error) {
- kref_get(&r->res_ref);
- goto out;
+ if (flags & R_RECEIVE_REQUEST) {
+ if (from_nodeid == dir_nodeid)
+ from_dir = 1;
+ else
+ from_other = 1;
+ } else if (flags & R_REQUEST) {
+ from_local = 1;
+ }
+
+ /*
+ * flags & R_RECEIVE_RECOVER is from dlm_recover_master_copy, so
+ * from_nodeid has sent us a lock in dlm_recover_locks, believing
+ * we're the new master. Our local recovery may not have set
+ * res_master_nodeid to our_nodeid yet, so allow either. Don't
+ * create the rsb; dlm_recover_process_copy() will handle EBADR
+ * by resending.
+ *
+ * If someone sends us a request, we are the dir node, and we do
+ * not find the rsb anywhere, then recreate it. This happens if
+ * someone sends us a request after we have removed/freed an rsb
+ * from our toss list. (They sent a request instead of lookup
+ * because they are using an rsb from their toss list.)
+ */
+
+ if (from_local || from_dir ||
+ (from_other && (dir_nodeid == our_nodeid))) {
+ create = 1;
}
- if (error == -ENOTBLK)
- goto out;
- error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, flags, &r);
+ retry:
+ if (create) {
+ error = pre_rsb_struct(ls);
+ i