summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-12 20:41:50 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-12 20:41:50 -0800
commit97971df811b8854882c0f6c6631e23ab8cdcc44f (patch)
tree2eaf96eda6b88342d88047521255c110600e1410 /fs
parent56c003e4db4adb1d57e599fb4c7d39f9130f9db1 (diff)
parent7a5e9f1f83e3271a9f05933a80b870fe55ebbb3d (diff)
downloadlinux-97971df811b8854882c0f6c6631e23ab8cdcc44f.tar.gz
linux-97971df811b8854882c0f6c6631e23ab8cdcc44f.tar.bz2
linux-97971df811b8854882c0f6c6631e23ab8cdcc44f.zip
Merge tag 'dlm-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm
Pull dlm updates from David Teigland: "These patches include the usual cleanups and minor fixes, the removal of code that is no longer needed due to recent improvements, and improvements to processing large volumes of messages during heavy locking activity. Summary: - Misc code cleanup - Fix a couple of socket handling bugs: a double release on an error path and a data-ready race in an accept loop - Remove code for resending dir-remove messages. This code is no longer needed since the midcomms layer now ensures the messages are resent if needed - Add tracepoints for dlm messages - Improve callback queueing by replacing the fixed array with a list - Simplify the handling of a remove message followed by a lookup message by sending both without releasing a spinlock in between - Improve the concurrency of sending and receiving messages by holding locks for a shorter time, and changing how workqueues are used - Remove old code for shutting down sockets, which is no longer needed with the reliable connection handling that was recently added" * tag 'dlm-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm: (37 commits) fs: dlm: fix building without lockdep fs: dlm: parallelize lowcomms socket handling fs: dlm: don't init error value fs: dlm: use saved sk_error_report() fs: dlm: use sock2con without checking null fs: dlm: remove dlm_node_addrs lookup list fs: dlm: don't put dlm_local_addrs on heap fs: dlm: cleanup listen sock handling fs: dlm: remove socket shutdown handling fs: dlm: use listen sock as dlm running indicator fs: dlm: use list_first_entry_or_null fs: dlm: remove twice INIT_WORK fs: dlm: add midcomms init/start functions fs: dlm: add dst nodeid for msg tracing fs: dlm: rename seq to h_seq for msg tracing fs: dlm: rename DLM_IFL_NEED_SCHED to DLM_IFL_CB_PENDING fs: dlm: ast do WARN_ON_ONCE() on hotpath fs: dlm: drop lkb ref in bug case fs: dlm: avoid false-positive checker warning fs: dlm: use WARN_ON_ONCE() instead of WARN_ON() ...
Diffstat (limited to 'fs')
-rw-r--r--fs/dlm/ast.c322
-rw-r--r--fs/dlm/ast.h17
-rw-r--r--fs/dlm/config.c4
-rw-r--r--fs/dlm/debug_fs.c2
-rw-r--r--fs/dlm/dlm_internal.h25
-rw-r--r--fs/dlm/lock.c190
-rw-r--r--fs/dlm/lockspace.c14
-rw-r--r--fs/dlm/lowcomms.c1538
-rw-r--r--fs/dlm/lowcomms.h6
-rw-r--r--fs/dlm/main.c7
-rw-r--r--fs/dlm/member.c5
-rw-r--r--fs/dlm/memory.c30
-rw-r--r--fs/dlm/memory.h4
-rw-r--r--fs/dlm/midcomms.c141
-rw-r--r--fs/dlm/midcomms.h7
-rw-r--r--fs/dlm/rcom.c4
-rw-r--r--fs/dlm/requestqueue.c3
-rw-r--r--fs/dlm/user.c74
-rw-r--r--fs/dlm/user.h2
19 files changed, 1152 insertions, 1243 deletions
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index d60a8d8f109d..26fef9945cc9 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -12,55 +12,67 @@
#include <trace/events/dlm.h>
#include "dlm_internal.h"
+#include "memory.h"
#include "lock.h"
#include "user.h"
#include "ast.h"
-static uint64_t dlm_cb_seq;
-static DEFINE_SPINLOCK(dlm_cb_seq_spin);
+void dlm_release_callback(struct kref *ref)
+{
+ struct dlm_callback *cb = container_of(ref, struct dlm_callback, ref);
+
+ dlm_free_cb(cb);
+}
+
+void dlm_callback_set_last_ptr(struct dlm_callback **from,
+ struct dlm_callback *to)
+{
+ if (*from)
+ kref_put(&(*from)->ref, dlm_release_callback);
+
+ if (to)
+ kref_get(&to->ref);
+
+ *from = to;
+}
-static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb)
+void dlm_purge_lkb_callbacks(struct dlm_lkb *lkb)
{
- int i;
-
- log_print("last_bast %x %llu flags %x mode %d sb %d %x",
- lkb->lkb_id,
- (unsigned long long)lkb->lkb_last_bast.seq,
- lkb->lkb_last_bast.flags,
- lkb->lkb_last_bast.mode,
- lkb->lkb_last_bast.sb_status,
- lkb->lkb_last_bast.sb_flags);
-
- log_print("last_cast %x %llu flags %x mode %d sb %d %x",
- lkb->lkb_id,
- (unsigned long long)lkb->lkb_last_cast.seq,
- lkb->lkb_last_cast.flags,
- lkb->lkb_last_cast.mode,
- lkb->lkb_last_cast.sb_status,
- lkb->lkb_last_cast.sb_flags);
-
- for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
- log_print("cb %x %llu flags %x mode %d sb %d %x",
- lkb->lkb_id,
- (unsigned long long)lkb->lkb_callbacks[i].seq,
- lkb->lkb_callbacks[i].flags,
- lkb->lkb_callbacks[i].mode,
- lkb->lkb_callbacks[i].sb_status,
- lkb->lkb_callbacks[i].sb_flags);
+ struct dlm_callback *cb, *safe;
+
+ list_for_each_entry_safe(cb, safe, &lkb->lkb_callbacks, list) {
+ list_del(&cb->list);
+ kref_put(&cb->ref, dlm_release_callback);
}
+
+ lkb->lkb_flags &= ~DLM_IFL_CB_PENDING;
+
+ /* invalidate */
+ dlm_callback_set_last_ptr(&lkb->lkb_last_cast, NULL);
+ dlm_callback_set_last_ptr(&lkb->lkb_last_cb, NULL);
+ lkb->lkb_last_bast_mode = -1;
}
-int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
- int status, uint32_t sbflags, uint64_t seq)
+int dlm_enqueue_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
+ int status, uint32_t sbflags)
{
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
- uint64_t prev_seq;
+ int rv = DLM_ENQUEUE_CALLBACK_SUCCESS;
+ struct dlm_callback *cb;
int prev_mode;
- int i, rv;
- for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
- if (lkb->lkb_callbacks[i].seq)
- continue;
+ if (flags & DLM_CB_BAST) {
+ /* if cb is a bast, it should be skipped if the blocking mode is
+ * compatible with the last granted mode
+ */
+ if (lkb->lkb_last_cast) {
+ if (dlm_modes_compat(mode, lkb->lkb_last_cast->mode)) {
+ log_debug(ls, "skip %x bast mode %d for cast mode %d",
+ lkb->lkb_id, mode,
+ lkb->lkb_last_cast->mode);
+ goto out;
+ }
+ }
/*
* Suppress some redundant basts here, do more on removal.
@@ -68,148 +80,95 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
* is a bast for the same mode or a more restrictive mode.
* (the addional > PR check is needed for PR/CW inversion)
*/
-
- if ((i > 0) && (flags & DLM_CB_BAST) &&
- (lkb->lkb_callbacks[i-1].flags & DLM_CB_BAST)) {
-
- prev_seq = lkb->lkb_callbacks[i-1].seq;
- prev_mode = lkb->lkb_callbacks[i-1].mode;
+ if (lkb->lkb_last_cb && lkb->lkb_last_cb->flags & DLM_CB_BAST) {
+ prev_mode = lkb->lkb_last_cb->mode;
if ((prev_mode == mode) ||
(prev_mode > mode && prev_mode > DLM_LOCK_PR)) {
-
- log_debug(ls, "skip %x add bast %llu mode %d "
- "for bast %llu mode %d",
- lkb->lkb_id,
- (unsigned long long)seq,
- mode,
- (unsigned long long)prev_seq,
- prev_mode);
- rv = 0;
+ log_debug(ls, "skip %x add bast mode %d for bast mode %d",
+ lkb->lkb_id, mode, prev_mode);
goto out;
}
}
-
- lkb->lkb_callbacks[i].seq = seq;
- lkb->lkb_callbacks[i].flags = flags;
- lkb->lkb_callbacks[i].mode = mode;
- lkb->lkb_callbacks[i].sb_status = status;
- lkb->lkb_callbacks[i].sb_flags = (sbflags & 0x000000FF);
- rv = 0;
- break;
}
- if (i == DLM_CALLBACKS_SIZE) {
- log_error(ls, "no callbacks %x %llu flags %x mode %d sb %d %x",
- lkb->lkb_id, (unsigned long long)seq,
- flags, mode, status, sbflags);
- dlm_dump_lkb_callbacks(lkb);
- rv = -1;
+ cb = dlm_allocate_cb();
+ if (!cb) {
+ rv = DLM_ENQUEUE_CALLBACK_FAILURE;
goto out;
}
- out:
- return rv;
-}
-
-int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
- struct dlm_callback *cb, int *resid)
-{
- int i, rv;
-
- *resid = 0;
-
- if (!lkb->lkb_callbacks[0].seq) {
- rv = -ENOENT;
- goto out;
- }
-
- /* oldest undelivered cb is callbacks[0] */
-
- memcpy(cb, &lkb->lkb_callbacks[0], sizeof(struct dlm_callback));
- memset(&lkb->lkb_callbacks[0], 0, sizeof(struct dlm_callback));
- /* shift others down */
-
- for (i = 1; i < DLM_CALLBACKS_SIZE; i++) {
- if (!lkb->lkb_callbacks[i].seq)
- break;
- memcpy(&lkb->lkb_callbacks[i-1], &lkb->lkb_callbacks[i],
- sizeof(struct dlm_callback));
- memset(&lkb->lkb_callbacks[i], 0, sizeof(struct dlm_callback));
- (*resid)++;
+ cb->flags = flags;
+ cb->mode = mode;
+ cb->sb_status = status;
+ cb->sb_flags = (sbflags & 0x000000FF);
+ kref_init(&cb->ref);
+ if (!(lkb->lkb_flags & DLM_IFL_CB_PENDING)) {
+ lkb->lkb_flags |= DLM_IFL_CB_PENDING;
+ rv = DLM_ENQUEUE_CALLBACK_NEED_SCHED;
}
+ list_add_tail(&cb->list, &lkb->lkb_callbacks);
- /* if cb is a bast, it should be skipped if the blocking mode is
- compatible with the last granted mode */
-
- if ((cb->flags & DLM_CB_BAST) && lkb->lkb_last_cast.seq) {
- if (dlm_modes_compat(cb->mode, lkb->lkb_last_cast.mode)) {
- cb->flags |= DLM_CB_SKIP;
-
- log_debug(ls, "skip %x bast %llu mode %d "
- "for cast %llu mode %d",
- lkb->lkb_id,
- (unsigned long long)cb->seq,
- cb->mode,
- (unsigned long long)lkb->lkb_last_cast.seq,
- lkb->lkb_last_cast.mode);
- rv = 0;
- goto out;
- }
- }
+ if (flags & DLM_CB_CAST)
+ dlm_callback_set_last_ptr(&lkb->lkb_last_cast, cb);
- if (cb->flags & DLM_CB_CAST) {
- memcpy(&lkb->lkb_last_cast, cb, sizeof(struct dlm_callback));
- lkb->lkb_last_cast_time = ktime_get();
- }
+ dlm_callback_set_last_ptr(&lkb->lkb_last_cb, cb);
- if (cb->flags & DLM_CB_BAST) {
- memcpy(&lkb->lkb_last_bast, cb, sizeof(struct dlm_callback));
- lkb->lkb_last_bast_time = ktime_get();
- }
- rv = 0;
out:
return rv;
}
+int dlm_dequeue_lkb_callback(struct dlm_lkb *lkb, struct dlm_callback **cb)
+{
+ /* oldest undelivered cb is callbacks first entry */
+ *cb = list_first_entry_or_null(&lkb->lkb_callbacks,
+ struct dlm_callback, list);
+ if (!*cb)
+ return DLM_DEQUEUE_CALLBACK_EMPTY;
+
+ /* remove it from callbacks so shift others down */
+ list_del(&(*cb)->list);
+ if (list_empty(&lkb->lkb_callbacks))
+ return DLM_DEQUEUE_CALLBACK_LAST;
+
+ return DLM_DEQUEUE_CALLBACK_SUCCESS;
+}
+
void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
uint32_t sbflags)
{
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
- uint64_t new_seq, prev_seq;
int rv;
- spin_lock(&dlm_cb_seq_spin);
- new_seq = ++dlm_cb_seq;
- if (!dlm_cb_seq)
- new_seq = ++dlm_cb_seq;
- spin_unlock(&dlm_cb_seq_spin);
-
if (lkb->lkb_flags & DLM_IFL_USER) {
- dlm_user_add_ast(lkb, flags, mode, status, sbflags, new_seq);
+ dlm_user_add_ast(lkb, flags, mode, status, sbflags);
return;
}
- mutex_lock(&lkb->lkb_cb_mutex);
- prev_seq = lkb->lkb_callbacks[0].seq;
-
- rv = dlm_add_lkb_callback(lkb, flags, mode, status, sbflags, new_seq);
- if (rv < 0)
- goto out;
-
- if (!prev_seq) {
+ spin_lock(&lkb->lkb_cb_lock);
+ rv = dlm_enqueue_lkb_callback(lkb, flags, mode, status, sbflags);
+ switch (rv) {
+ case DLM_ENQUEUE_CALLBACK_NEED_SCHED:
kref_get(&lkb->lkb_ref);
- mutex_lock(&ls->ls_cb_mutex);
+ spin_lock(&ls->ls_cb_lock);
if (test_bit(LSFL_CB_DELAY, &ls->ls_flags)) {
list_add(&lkb->lkb_cb_list, &ls->ls_cb_delay);
} else {
queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work);
}
- mutex_unlock(&ls->ls_cb_mutex);
+ spin_unlock(&ls->ls_cb_lock);
+ break;
+ case DLM_ENQUEUE_CALLBACK_FAILURE:
+ WARN_ON_ONCE(1);
+ break;
+ case DLM_ENQUEUE_CALLBACK_SUCCESS:
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
}
- out:
- mutex_unlock(&lkb->lkb_cb_mutex);
+ spin_unlock(&lkb->lkb_cb_lock);
}
void dlm_callback_work(struct work_struct *work)
@@ -218,53 +177,46 @@ void dlm_callback_work(struct work_struct *work)
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
void (*castfn) (void *astparam);
void (*bastfn) (void *astparam, int mode);
- struct dlm_callback callbacks[DLM_CALLBACKS_SIZE];
- int i, rv, resid;
-
- memset(&callbacks, 0, sizeof(callbacks));
+ struct dlm_callback *cb;
+ int rv;
- mutex_lock(&lkb->lkb_cb_mutex);
- if (!lkb->lkb_callbacks[0].seq) {
- /* no callback work exists, shouldn't happen */
- log_error(ls, "dlm_callback_work %x no work", lkb->lkb_id);
- dlm_print_lkb(lkb);
- dlm_dump_lkb_callbacks(lkb);
- }
+ spin_lock(&lkb->lkb_cb_lock);
+ rv = dlm_dequeue_lkb_callback(lkb, &cb);
+ spin_unlock(&lkb->lkb_cb_lock);
- for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
- rv = dlm_rem_lkb_callback(ls, lkb, &callbacks[i], &resid);
- if (rv < 0)
- break;
- }
+ if (WARN_ON_ONCE(rv == DLM_DEQUEUE_CALLBACK_EMPTY))
+ goto out;
- if (resid) {
- /* cbs remain, loop should have removed all, shouldn't happen */
- log_error(ls, "dlm_callback_work %x resid %d", lkb->lkb_id,
- resid);
- dlm_print_lkb(lkb);
- dlm_dump_lkb_callbacks(lkb);
- }
- mutex_unlock(&lkb->lkb_cb_mutex);
+ for (;;) {
+ castfn = lkb->lkb_astfn;
+ bastfn = lkb->lkb_bastfn;
+
+ if (cb->flags & DLM_CB_BAST) {
+ trace_dlm_bast(ls, lkb, cb->mode);
+ lkb->lkb_last_bast_time = ktime_get();
+ lkb->lkb_last_bast_mode = cb->mode;
+ bastfn(lkb->lkb_astparam, cb->mode);
+ } else if (cb->flags & DLM_CB_CAST) {
+ lkb->lkb_lksb->sb_status = cb->sb_status;
+ lkb->lkb_lksb->sb_flags = cb->sb_flags;
+ trace_dlm_ast(ls, lkb);
+ lkb->lkb_last_cast_time = ktime_get();
+ castfn(lkb->lkb_astparam);
+ }
- castfn = lkb->lkb_astfn;
- bastfn = lkb->lkb_bastfn;
+ kref_put(&cb->ref, dlm_release_callback);
- for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
- if (!callbacks[i].seq)
+ spin_lock(&lkb->lkb_cb_lock);
+ rv = dlm_dequeue_lkb_callback(lkb, &cb);
+ if (rv == DLM_DEQUEUE_CALLBACK_EMPTY) {
+ lkb->lkb_flags &= ~DLM_IFL_CB_PENDING;
+ spin_unlock(&lkb->lkb_cb_lock);
break;
- if (callbacks[i].flags & DLM_CB_SKIP) {
- continue;
- } else if (callbacks[i].flags & DLM_CB_BAST) {
- trace_dlm_bast(ls, lkb, callbacks[i].mode);
- bastfn(lkb->lkb_astparam, callbacks[i].mode);
- } else if (callbacks[i].flags & DLM_CB_CAST) {
- lkb->lkb_lksb->sb_status = callbacks[i].sb_status;
- lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags;
- trace_dlm_ast(ls, lkb);
- castfn(lkb->lkb_astparam);
}
+ spin_unlock(&lkb->lkb_cb_lock);
}
+out:
/* undo kref_get from dlm_add_callback, may cause lkb to be freed */
dlm_put_lkb(lkb);
}
@@ -289,9 +241,9 @@ void dlm_callback_stop(struct dlm_ls *ls)
void dlm_callback_suspend(struct dlm_ls *ls)
{
if (ls->ls_callback_wq) {
- mutex_lock(&ls->ls_cb_mutex);
+ spin_lock(&ls->ls_cb_lock);
set_bit(LSFL_CB_DELAY, &ls->ls_flags);
- mutex_unlock(&ls->ls_cb_mutex);
+ spin_unlock(&ls->ls_cb_lock);
flush_workqueue(ls->ls_callback_wq);
}
@@ -308,10 +260,8 @@ void dlm_callback_resume(struct dlm_ls *ls)
if (!ls->ls_callback_wq)
return;
- clear_bit(LSFL_CB_DELAY, &ls->ls_flags);
-
more:
- mutex_lock(&ls->ls_cb_mutex);
+ spin_lock(&ls->ls_cb_lock);
list_for_each_entry_safe(lkb, safe, &ls->ls_cb_delay, lkb_cb_list) {
list_del_init(&lkb->lkb_cb_list);
queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work);
@@ -320,7 +270,9 @@ more:
break;
}
empty = list_empty(&ls->ls_cb_delay);
- mutex_unlock(&ls->ls_cb_mutex);
+ if (empty)
+ clear_bit(LSFL_CB_DELAY, &ls->ls_flags);
+ spin_unlock(&ls->ls_cb_lock);
sum += count;
if (!empty) {
diff --git a/fs/dlm/ast.h b/fs/dlm/ast.h
index e5e05fcc5813..880b11882495 100644
--- a/fs/dlm/ast.h
+++ b/fs/dlm/ast.h
@@ -11,13 +11,22 @@
#ifndef __ASTD_DOT_H__
#define __ASTD_DOT_H__
-int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
- int status, uint32_t sbflags, uint64_t seq);
-int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
- struct dlm_callback *cb, int *resid);
+#define DLM_ENQUEUE_CALLBACK_NEED_SCHED 1
+#define DLM_ENQUEUE_CALLBACK_SUCCESS 0
+#define DLM_ENQUEUE_CALLBACK_FAILURE -1
+int dlm_enqueue_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
+ int status, uint32_t sbflags);
+#define DLM_DEQUEUE_CALLBACK_EMPTY 2
+#define DLM_DEQUEUE_CALLBACK_LAST 1
+#define DLM_DEQUEUE_CALLBACK_SUCCESS 0
+int dlm_dequeue_lkb_callback(struct dlm_lkb *lkb, struct dlm_callback **cb);
void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
uint32_t sbflags);
+void dlm_callback_set_last_ptr(struct dlm_callback **from,
+ struct dlm_callback *to);
+void dlm_release_callback(struct kref *ref);
+void dlm_purge_lkb_callbacks(struct dlm_lkb *lkb);
void dlm_callback_work(struct work_struct *work);
int dlm_callback_start(struct dlm_ls *ls);
void dlm_callback_stop(struct dlm_ls *ls);
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index ac8b62106ce0..20b60709eccf 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -183,7 +183,7 @@ static int dlm_check_protocol_and_dlm_running(unsigned int x)
return -EINVAL;
}
- if (dlm_allow_conn)
+ if (dlm_lowcomms_is_running())
return -EBUSY;
return 0;
@@ -194,7 +194,7 @@ static int dlm_check_zero_and_dlm_running(unsigned int x)
if (!x)
return -EINVAL;
- if (dlm_allow_conn)
+ if (dlm_lowcomms_is_running())
return -EBUSY;
return 0;
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 8fb04ebbafb5..8a0e1b1f74ad 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -246,7 +246,7 @@ static void print_format3_lock(struct seq_file *s, struct dlm_lkb *lkb,
lkb->lkb_status,
lkb->lkb_grmode,
lkb->lkb_rqmode,
- lkb->lkb_last_bast.mode,
+ lkb->lkb_last_bast_mode,
rsb_lookup,
lkb->lkb_wait_type,
lkb->lkb_lvbseq,
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index e34c3d2639a5..ab1a55337a6e 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -211,6 +211,7 @@ struct dlm_args {
#endif
#define DLM_IFL_DEADLOCK_CANCEL 0x01000000
#define DLM_IFL_STUB_MS 0x02000000 /* magic number for m_flags */
+#define DLM_IFL_CB_PENDING 0x04000000
/* least significant 2 bytes are message changed, they are full transmitted
* but at receive side only the 2 bytes LSB will be set.
*
@@ -222,18 +223,17 @@ struct dlm_args {
#define DLM_IFL_USER 0x00000001
#define DLM_IFL_ORPHAN 0x00000002
-#define DLM_CALLBACKS_SIZE 6
-
#define DLM_CB_CAST 0x00000001
#define DLM_CB_BAST 0x00000002
-#define DLM_CB_SKIP 0x00000004
struct dlm_callback {
- uint64_t seq;
uint32_t flags; /* DLM_CBF_ */
int sb_status; /* copy to lksb status */
uint8_t sb_flags; /* copy to lksb flags */
int8_t mode; /* rq mode of bast, gr mode of cast */
+
+ struct list_head list;
+ struct kref ref;
};
struct dlm_lkb {
@@ -268,12 +268,13 @@ struct dlm_lkb {
unsigned long lkb_timeout_cs;
#endif
- struct mutex lkb_cb_mutex;
+ spinlock_t lkb_cb_lock;
struct work_struct lkb_cb_work;
struct list_head lkb_cb_list; /* for ls_cb_delay or proc->asts */
- struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE];
- struct dlm_callback lkb_last_cast;
- struct dlm_callback lkb_last_bast;
+ struct list_head lkb_callbacks;
+ struct dlm_callback *lkb_last_cast;
+ struct dlm_callback *lkb_last_cb;
+ int lkb_last_bast_mode;
ktime_t lkb_last_cast_time; /* for debugging */
ktime_t lkb_last_bast_time; /* for debugging */
@@ -591,11 +592,7 @@ struct dlm_ls {
int ls_new_rsb_count;
struct list_head ls_new_rsb; /* new rsb structs */
- spinlock_t ls_remove_spin;
- wait_queue_head_t ls_remove_wait;
- char ls_remove_name[DLM_RESNAME_MAXLEN+1];
char *ls_remove_names[DLM_REMOVE_NAMES_MAX];
- int ls_remove_len;
int ls_remove_lens[DLM_REMOVE_NAMES_MAX];
struct list_head ls_nodes; /* current nodes in ls */
@@ -631,7 +628,7 @@ struct dlm_ls {
/* recovery related */
- struct mutex ls_cb_mutex;
+ spinlock_t ls_cb_lock;
struct list_head ls_cb_delay; /* save for queue_work later */
struct timer_list ls_timer;
struct task_struct *ls_recoverd_task;
@@ -670,7 +667,7 @@ struct dlm_ls {
void *ls_ops_arg;
int ls_namelen;
- char ls_name[1];
+ char ls_name[DLM_LOCKSPACE_LEN + 1];
};
/*
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 94a72ede5764..e1adfa5aed05 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1209,6 +1209,7 @@ static int _create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret,
if (!lkb)
return -ENOMEM;
+ lkb->lkb_last_bast_mode = -1;
lkb->lkb_nodeid = -1;
lkb->lkb_grmode = DLM_LOCK_IV;
kref_init(&lkb->lkb_ref);
@@ -1218,7 +1219,8 @@ static int _create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret,
INIT_LIST_HEAD(&lkb->lkb_time_list);
#endif
INIT_LIST_HEAD(&lkb->lkb_cb_list);
- mutex_init(&lkb->lkb_cb_mutex);
+ INIT_LIST_HEAD(&lkb->lkb_callbacks);
+ spin_lock_init(&lkb->lkb_cb_lock);
INIT_WORK(&lkb->lkb_cb_work, dlm_callback_work);
idr_preload(GFP_NOFS);
@@ -1587,37 +1589,6 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms)
return error;
}
-/* If there's an rsb for the same resource being removed, ensure
- * that the remove message is sent before the new lookup message.
- */
-
-#define DLM_WAIT_PENDING_COND(ls, r) \
- (ls->ls_remove_len && \
- !rsb_cmp(r, ls->ls_remove_name, \
- ls->ls_remove_len))
-
-static void wait_pending_remove(struct dlm_rsb *r)
-{
- struct dlm_ls *ls = r->res_ls;
- restart:
- spin_lock(&ls->ls_remove_spin);
- if (DLM_WAIT_PENDING_COND(ls, r)) {
- log_debug(ls, "delay lookup for remove dir %d %s",
- r->res_dir_nodeid, r->res_name);
- spin_unlock(&ls->ls_remove_spin);
- wait_event(ls->ls_remove_wait, !DLM_WAIT_PENDING_COND(ls, r));
- goto restart;
- }
- spin_unlock(&ls->ls_remove_spin);
-}
-
-/*
- * ls_remove_spin protects ls_remove_name and ls_remove_len which are
- * read by other threads in wait_pending_remove. ls_remove_names
- * and ls_remove_lens are only used by the scan thread, so they do
- * not need protection.
- */
-
static void shrink_bucket(struct dlm_ls *ls, int b)
{
struct rb_node *n, *next;
@@ -1699,11 +1670,6 @@ static void shrink_bucket(struct dlm_ls *ls, int b)
* list and sending the removal. Keeping this gap small is
* important to keep us (the master node) from being out of sync
* with the remote dir node for very long.
- *
- * From the time the rsb is removed from toss until just after
- * send_remove, the rsb name is saved in ls_remove_name. A new
- * lookup checks this to ensure that a new lookup message for the
- * same resource name is not sent just before the remove message.
*/
for (i = 0; i < remote_count; i++) {
@@ -1750,22 +1716,8 @@ static void shrink_bucket(struct dlm_ls *ls, int b)
}
rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss);
-
- /* block lookup of same name until we've sent remove */
- spin_lock(&ls->ls_remove_spin);
- ls->ls_remove_len = len;
- memcpy(ls->ls_remove_name, name, DLM_RESNAME_MAXLEN);
- spin_unlock(&ls->ls_remove_spin);
- spin_unlock(&ls->ls_rsbtbl[b].lock);
-
send_remove(r);
-
- /* allow lookup of name again */
- spin_lock(&ls->ls_remove_spin);
- ls->ls_remove_len = 0;
- memset(ls->ls_remove_name, 0, DLM_RESNAME_MAXLEN);
- spin_unlock(&ls->ls_remove_spin);
- wake_up(&ls->ls_remove_wait);
+ spin_unlock(&ls->ls_rsbtbl[b].lock);
dlm_free_rsb(r);
}
@@ -2716,8 +2668,6 @@ static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb)
return 0;
}
- wait_pending_remove(r);
-
r->res_first_lkid = lkb->lkb_id;
send_lookup(r, lkb);
return 1;
@@ -3552,7 +3502,8 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
static int _create_message(struct dlm_ls *ls, int mb_len,
int to_nodeid, int mstype,
struct dlm_message **ms_ret,
- struct dlm_mhandle **mh_ret)
+ struct dlm_mhandle **mh_ret,
+ gfp_t allocation)
{
struct dlm_message *ms;
struct dlm_mhandle *mh;
@@ -3562,7 +3513,7 @@ static int _create_message(struct dlm_ls *ls, int mb_len,
pass into midcomms_commit and a message buffer (mb) that we
write our data into */
- mh = dlm_midcomms_get_mhandle(to_nodeid, mb_len, GFP_NOFS, &mb);
+ mh = dlm_midcomms_get_mhandle(to_nodeid, mb_len, allocation, &mb);
if (!mh)
return -ENOBUFS;
@@ -3584,7 +3535,8 @@ static int _create_message(struct dlm_ls *ls, int mb_len,
static int create_message(struct dlm_rsb *r, struct dlm_lkb *lkb,
int to_nodeid, int mstype,
struct dlm_message **ms_ret,
- struct dlm_mhandle **mh_ret)
+ struct dlm_mhandle **mh_ret,
+ gfp_t allocation)
{
int mb_len = sizeof(struct dlm_message);
@@ -3605,15 +3557,16 @@ static int create_message(struct dlm_rsb *r, struct dlm_lkb *lkb,
}
return _create_message(r->res_ls, mb_len, to_nodeid, mstype,
- ms_ret, mh_ret);
+ ms_ret, mh_ret, allocation);
}
/* further lowcomms enhancements or alternate implementations may make
the return value from this function useful at some point */
-static int send_message(struct dlm_mhandle *mh, struct dlm_message *ms)
+static int send_message(struct dlm_mhandle *mh, struct dlm_message *ms,
+ const void *name, int namelen)
{
- dlm_midcomms_commit_mhandle(mh);
+ dlm_midcomms_commit_mhandle(mh, name, namelen);
return 0;
}
@@ -3673,13 +3626,13 @@ static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype)
if (error)
return error;
- error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh);
+ error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh, GFP_NOFS);
if (error)
goto fail;
send_args(r, lkb, ms);
- error = send_message(mh, ms);
+ error = send_message(mh, ms, r->res_name, r->res_length);
if (error)
goto fail;
return 0;
@@ -3734,7 +3687,8 @@ static int send_grant(struct dlm_rsb *r, struct dlm_lkb *lkb)
to_nodeid = lkb->lkb_nodeid;
- error = create_message(r, lkb, to_nodeid, DLM_MSG_GRANT, &ms, &mh);
+ error = create_message(r, lkb, to_nodeid, DLM_MSG_GRANT, &ms, &mh,
+ GFP_NOFS);
if (error)
goto out;
@@ -3742,7 +3696,7 @@ static int send_grant(struct dlm_rsb *r, struct dlm_lkb *lkb)
ms->m_result = 0;
- error = send_message(mh, ms);
+ error = send_message(mh, ms, r->res_name, r->res_length);
out:
return error;
}
@@ -3755,7 +3709,8 @@ static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode)
to_nodeid = lkb->lkb_nodeid;
- error = create_message(r, NULL, to_nodeid, DLM_MSG_BAST, &ms, &mh);
+ error = create_message(r, NULL, to_nodeid, DLM_MSG_BAST, &ms, &mh,
+ GFP_NOFS);
if (error)
goto out;
@@ -3763,7 +3718,7 @@ static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode)
ms->m_bastmode = cpu_to_le32(mode);
- error = send_message(mh, ms);
+ error = send_message(mh, ms, r->res_name, r->res_length);
out:
return error;
}
@@ -3780,13 +3735,14 @@ static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb)
if (error)
return error;
- error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh);
+ error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh,
+ GFP_NOFS);
if (error)
goto fail;
send_args(r, lkb, ms);
- error = send_message(mh, ms);
+ error = send_message(mh, ms, r->res_name, r->res_length);
if (error)
goto fail;
return 0;
@@ -3804,14 +3760,15 @@ static int send_remove(struct dlm_rsb *r)
to_nodeid = dlm_dir_nodeid(r);
- error = create_message(r, NULL, to_nodeid, DLM_MSG_REMOVE, &ms, &mh);
+ error = create_message(r, NULL, to_nodeid, DLM_MSG_REMOVE, &ms, &mh,
+ GFP_ATOMIC);
if (error)
goto out;
memcpy(ms->m_extra, r->res_name, r->res_length);
ms->m_hash = cpu_to_le32(r->res_hash);
- error = send_message(mh, ms);
+ error = send_message(mh, ms, r->res_name, r->res_length);
out:
return error;
}
@@ -3825,7 +3782,7 @@ static int send_common_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
to_nodeid = lkb->lkb_nodeid;
- error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh);
+ error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh, GFP_NOFS);
if (error)
goto out;
@@ -3833,7 +3790,7 @@ static int send_common_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
ms->m_result = cpu_to_le32(to_dlm_errno(rv));
- error = send_message(mh, ms);
+ error = send_message(mh, ms, r->res_name, r->res_length);
out:
return error;
}
@@ -3866,7 +3823,8 @@ static int send_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms_in,
struct dlm_mhandle *mh;
int error, nodeid = le32_to_cpu(ms_in->m_header.h_nodeid);
- error = create_message(r, NULL, nodeid, DLM_MSG_LOOKUP_REPLY, &ms, &mh);
+ error = create_message(r, NULL, nodeid, DLM_MSG_LOOKUP_REPLY, &ms, &mh,
+ GFP_NOFS);
if (error)
goto out;
@@ -3874,7 +3832,7 @@ static int send_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms_in,
ms->m_result = cpu_to_le32(to_dlm_errno(rv));
ms->m_nodeid = cpu_to_le32(ret_nodeid);
- error = send_message(mh, ms);
+ error = send_message(mh, ms, ms_in->m_extra, receive_extralen(ms_in));
out:
return error;
}
@@ -4044,66 +4002,6 @@ out:
return error;
}
-static void send_repeat_remove(struct dlm_ls *ls, char *ms_name, int len)
-{
- char name[DLM_RESNAME_MAXLEN + 1];
- struct dlm_message *ms;
- struct dlm_mhandle *mh;
- struct dlm_rsb *r;
- uint32_t hash, b;
- int rv, dir_nodeid;
-
- memset(name, 0, sizeof(name));
- memcpy(name, ms_name, len);
-
- hash = jhash(name, len, 0);
- b = hash & (ls->ls_rsbtbl_size - 1);
-
- dir_nodeid = dlm_hash2nodeid(ls, hash);
-
- log_error(ls, "send_repeat_remove dir %d %s", dir_nodeid, name);
-
- spin_lock(&ls->ls_rsbtbl[b].lock);
- rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r);
- if (!rv) {
- spin_unlock(&ls->ls_rsbtbl[b].lock);
- log_error(ls, "repeat_remove on keep %s", name);
- return;