From fe943d50425b6646606f8ef1ef8b8d4975fdbee2 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Thu, 12 Apr 2018 12:04:55 +0800 Subject: libceph, rbd: add error handling for osd_req_op_cls_init() Add proper error handling for osd_req_op_cls_init() to replace BUG_ON statement when failing from memory allocation. Signed-off-by: Chengguang Xu Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 96bb32285989..b73dd7ebe585 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -440,7 +440,7 @@ extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages); -extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req, +extern int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, const char *class, const char *method); extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, -- cgit v1.2.3 From 49a9f4f6714ec0ca2c6ada2ce764fbdd694962ee Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 25 Apr 2018 17:30:23 +0800 Subject: ceph: always get rstat from auth mds rstat is not tracked by capability. client can't know if rstat from non-auth mds is uptodate or not. Link: http://tracker.ceph.com/issues/23538 Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 7ecfc88314d8..4903deb0777a 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -628,6 +628,7 @@ int ceph_flags_to_mode(int flags); CEPH_CAP_XATTR_SHARED) #define CEPH_STAT_CAP_INLINE_DATA (CEPH_CAP_FILE_SHARED | \ CEPH_CAP_FILE_RD) +#define CEPH_STAT_RSTAT CEPH_CAP_FILE_WREXTEND #define CEPH_CAP_ANY_SHARED (CEPH_CAP_AUTH_SHARED | \ CEPH_CAP_LINK_SHARED | \ -- cgit v1.2.3 From 66850df58529eefc61cb96b895991508547503bf Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 15 May 2018 15:47:58 +0200 Subject: libceph: introduce ceph_osdc_abort_requests() This will be used by the filesystem for "umount -f". Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index b73dd7ebe585..874c31c01f80 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -347,6 +347,7 @@ struct ceph_osd_client { struct rb_root linger_map_checks; atomic_t num_requests; atomic_t num_homeless; + int abort_err; struct delayed_work timeout_work; struct delayed_work osds_timeout_work; #ifdef CONFIG_DEBUG_FS @@ -378,6 +379,7 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg); void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb); +void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err); extern void osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, u32 flags); -- cgit v1.2.3 From 88bc1922c273c95e84a8955e657401f9bc63a80b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 21 May 2018 16:00:29 +0200 Subject: libceph: defer __complete_request() to a workqueue In the common case, req->r_callback is called by handle_reply() on the ceph-msgr worker thread without any locks. If handle_reply() fails, it is called with both osd->lock and osdc->lock. In the map check case, it is called with just osdc->lock but held for write. Finally, if the request is aborted because of -ENOSPC or by ceph_osdc_abort_requests(), it is called directly on the submitter's thread, again with both locks. req->r_callback on the submitter's thread is relatively new (introduced in 4.12) and ripe for deadlocks -- e.g. writeback worker thread waiting on itself: inode_wait_for_writeback+0x26/0x40 evict+0xb5/0x1a0 iput+0x1d2/0x220 ceph_put_wrbuffer_cap_refs+0xe0/0x2c0 [ceph] writepages_finish+0x2d3/0x410 [ceph] __complete_request+0x26/0x60 [libceph] complete_request+0x2e/0x70 [libceph] __submit_request+0x256/0x330 [libceph] submit_request+0x2b/0x30 [libceph] ceph_osdc_start_request+0x25/0x40 [libceph] ceph_writepages_start+0xdfe/0x1320 [ceph] do_writepages+0x1f/0x70 __writeback_single_inode+0x45/0x330 writeback_sb_inodes+0x26a/0x600 __writeback_inodes_wb+0x92/0xc0 wb_writeback+0x274/0x330 wb_workfn+0x2d5/0x3b0 Defer __complete_request() to a workqueue in all failure cases so it's never on the same thread as ceph_osdc_start_request() and always called with no locks held. Link: http://tracker.ceph.com/issues/23978 Signed-off-by: Ilya Dryomov Acked-by: Jeff Layton Reviewed-by: "Yan, Zheng" --- include/linux/ceph/osd_client.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 874c31c01f80..d4191bde95a4 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -170,6 +170,7 @@ struct ceph_osd_request { u64 r_tid; /* unique for this client */ struct rb_node r_node; struct rb_node r_mc_node; /* map check */ + struct work_struct r_complete_work; struct ceph_osd *r_osd; struct ceph_osd_request_target r_t; @@ -360,6 +361,7 @@ struct ceph_osd_client { struct ceph_msgpool msgpool_op_reply; struct workqueue_struct *notify_wq; + struct workqueue_struct *completion_wq; }; static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag) -- cgit v1.2.3 From c843d13caefad9f2f182f38d6bfe492c9f00e086 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 30 May 2018 16:29:14 +0200 Subject: libceph: make abort_on_full a per-osdc setting The intent behind making it a per-request setting was that it would be set for writes, but not for reads. As it is, the flag is set for all fs/ceph requests except for pool perm check stat request (technically a read). ceph_osdc_abort_on_full() skips reads since the previous commit and I don't see a use case for marking individual requests. Signed-off-by: Ilya Dryomov Acked-by: Jeff Layton Reviewed-by: "Yan, Zheng" --- include/linux/ceph/osd_client.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index d4191bde95a4..0d6ee04b4c41 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -202,7 +202,6 @@ struct ceph_osd_request { struct timespec r_mtime; /* ditto */ u64 r_data_offset; /* ditto */ bool r_linger; /* don't resend on failure */ - bool r_abort_on_full; /* return ENOSPC when full */ /* internal */ unsigned long r_stamp; /* jiffies, send or check time */ @@ -348,6 +347,7 @@ struct ceph_osd_client { struct rb_root linger_map_checks; atomic_t num_requests; atomic_t num_homeless; + bool abort_on_full; /* abort w/ ENOSPC when full */ int abort_err; struct delayed_work timeout_work; struct delayed_work osds_timeout_work; -- cgit v1.2.3 From a86f009f106cba322c608785e09c8b5be8ffe8bb Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 23 May 2018 14:46:53 +0200 Subject: libceph: allocate the locator string with GFP_NOFAIL calc_target() isn't supposed to fail with anything but POOL_DNE, in which case we report that the pool doesn't exist and fail the request with -ENOENT. Doing this for -ENOMEM is at the very least confusing and also harmful -- as the preceding requests complete, a short-lived locator string allocation is likely to succeed after a wait. (We used to call ceph_object_locator_to_pg() for a pi lookup. In theory that could fail with -ENOENT, hence the "ret != -ENOENT" warning being removed.) Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index e71fb222c7c3..5675b1f09bc5 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -279,10 +279,10 @@ bool ceph_osds_changed(const struct ceph_osds *old_acting, const struct ceph_osds *new_acting, bool any_change); -int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi, - const struct ceph_object_id *oid, - const struct ceph_object_locator *oloc, - struct ceph_pg *raw_pgid); +void __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi, + const struct ceph_object_id *oid, + const struct ceph_object_locator *oloc, + struct ceph_pg *raw_pgid); int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap, const struct ceph_object_id *oid, const struct ceph_object_locator *oloc, -- cgit v1.2.3