diff options
| author | Ilya Dryomov <idryomov@gmail.com> | 2023-08-01 19:14:24 +0200 |
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2023-08-11 11:33:56 +0200 |
| commit | 0265f853d9b459e02966797bec8b3a5ebe29af1f (patch) | |
| tree | 8e3ba5a5fc9cb1dae6f2f68a6c6330d96267deb4 | |
| parent | 1969c2d11a64a1b492ef79c9436b578085b1f45b (diff) | |
| download | linux-0265f853d9b459e02966797bec8b3a5ebe29af1f.tar.gz linux-0265f853d9b459e02966797bec8b3a5ebe29af1f.tar.bz2 linux-0265f853d9b459e02966797bec8b3a5ebe29af1f.zip | |
libceph: fix potential hang in ceph_osdc_notify()
commit e6e2843230799230fc5deb8279728a7218b0d63c upstream.
If the cluster becomes unavailable, ceph_osdc_notify() may hang even
with osd_request_timeout option set because linger_notify_finish_wait()
waits for MWatchNotify NOTIFY_COMPLETE message with no associated OSD
request in flight -- it's completely asynchronous.
Introduce an additional timeout, derived from the specified notify
timeout. While at it, switch both waits to killable which is more
correct.
Cc: stable@vger.kernel.org
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
Reviewed-by: Xiubo Li <xiubli@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
| -rw-r--r-- | net/ceph/osd_client.c | 20 |
1 files changed, 14 insertions, 6 deletions
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index b026128a89d7..7d7b128c01bd 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -3041,17 +3041,24 @@ static int linger_reg_commit_wait(struct ceph_osd_linger_request *lreq) int ret; dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id); - ret = wait_for_completion_interruptible(&lreq->reg_commit_wait); + ret = wait_for_completion_killable(&lreq->reg_commit_wait); return ret ?: lreq->reg_commit_error; } -static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq) +static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq, + unsigned long timeout) { - int ret; + long left; dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id); - ret = wait_for_completion_interruptible(&lreq->notify_finish_wait); - return ret ?: lreq->notify_finish_error; + left = wait_for_completion_killable_timeout(&lreq->notify_finish_wait, + ceph_timeout_jiffies(timeout)); + if (left <= 0) + left = left ?: -ETIMEDOUT; + else + left = lreq->notify_finish_error; /* completed */ + + return left; } /* @@ -4666,7 +4673,8 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc, ret = linger_reg_commit_wait(lreq); if (!ret) - ret = linger_notify_finish_wait(lreq); + ret = linger_notify_finish_wait(lreq, + msecs_to_jiffies(2 * timeout * MSEC_PER_SEC)); else dout("lreq %p failed to initiate notify %d\n", lreq, ret); |
