From 663ae2cc04773608e1e741f693e41200fd4faf14 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 16 May 2016 13:18:57 +0200 Subject: rbd: get/put img_request in rbd_img_request_submit() By the time we get to checking for_each_obj_request_safe(img_request) terminating condition, all obj_requests may be complete and img_request ref, that rbd_img_request_submit() takes away from its caller, may be put. Moving the next_obj_request cursor is then a use-after-free on img_request. It's totally benign, as the value that's read is never used, but I think it's still worth fixing. Cc: Alex Elder Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 0ede6d7e2568..c3089f32a392 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2973,17 +2973,20 @@ static int rbd_img_request_submit(struct rbd_img_request *img_request) { struct rbd_obj_request *obj_request; struct rbd_obj_request *next_obj_request; + int ret = 0; dout("%s: img %p\n", __func__, img_request); - for_each_obj_request_safe(img_request, obj_request, next_obj_request) { - int ret; + rbd_img_request_get(img_request); + for_each_obj_request_safe(img_request, obj_request, next_obj_request) { ret = rbd_img_obj_request_submit(obj_request); if (ret) - return ret; + goto out_put_ireq; } - return 0; +out_put_ireq: + rbd_img_request_put(img_request); + return ret; } static void rbd_img_parent_read_callback(struct rbd_img_request *img_request) -- cgit v1.2.3 From 3ed97d6345a36a0a61e6af62ad8a66ca40f1aa2e Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 26 Apr 2016 15:05:29 +0200 Subject: libceph: make ceph_osdc_put_request() accept NULL Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 9 +++------ net/ceph/osd_client.c | 8 +++++--- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4801571f51cb..3e61fc8bb371 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1099,8 +1099,7 @@ release_pvec_pages: mapping->writeback_index = index; out: - if (req) - ceph_osdc_put_request(req); + ceph_osdc_put_request(req); ceph_put_snap_context(snapc); dout("writepages done, rc = %d\n", rc); return rc; @@ -1824,10 +1823,8 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, u32 pool) out_unlock: up_write(&mdsc->pool_perm_rwsem); - if (rd_req) - ceph_osdc_put_request(rd_req); - if (wr_req) - ceph_osdc_put_request(wr_req); + ceph_osdc_put_request(rd_req); + ceph_osdc_put_request(wr_req); out: if (!err) err = have; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 40a53a70efdf..cacce9e35f08 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -354,9 +354,11 @@ EXPORT_SYMBOL(ceph_osdc_get_request); void ceph_osdc_put_request(struct ceph_osd_request *req) { - dout("%s %p (was %d)\n", __func__, req, - atomic_read(&req->r_kref.refcount)); - kref_put(&req->r_kref, ceph_osdc_release_request); + if (req) { + dout("%s %p (was %d)\n", __func__, req, + atomic_read(&req->r_kref.refcount)); + kref_put(&req->r_kref, ceph_osdc_release_request); + } } EXPORT_SYMBOL(ceph_osdc_put_request); -- cgit v1.2.3 From 841272825b2263174120ab02b4abac9005ee1420 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 26 Apr 2016 15:39:47 +0200 Subject: libceph: grab snapc in ceph_osdc_alloc_request() ceph_osdc_build_request() is going away. Grab snapc and initialize ->r_snapid in ceph_osdc_alloc_request(). Signed-off-by: Ilya Dryomov --- net/ceph/osd_client.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index cacce9e35f08..ccb9539dc780 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -391,6 +391,8 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, req->r_osdc = osdc; req->r_mempool = use_mempool; req->r_num_ops = num_ops; + req->r_snapid = CEPH_NOSNAP; + req->r_snapc = ceph_get_snap_context(snapc); kref_init(&req->r_kref); init_completion(&req->r_completion); @@ -2457,7 +2459,7 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off, unsigned int i; req->r_snapid = snap_id; - req->r_snapc = ceph_get_snap_context(snapc); + WARN_ON(snapc != req->r_snapc); /* encode request */ msg->hdr.version = cpu_to_le16(4); @@ -2508,7 +2510,7 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off, ceph_encode_64(&p, req->r_snapc ? req->r_snapc->seq : 0); ceph_encode_32(&p, req->r_snapc ? req->r_snapc->num_snaps : 0); if (req->r_snapc) { - for (i = 0; i < snapc->num_snaps; i++) { + for (i = 0; i < req->r_snapc->num_snaps; i++) { ceph_encode_64(&p, req->r_snapc->snaps[i]); } } -- cgit v1.2.3 From 13d1ad16d05eebb4db977eb955716b9da2c19fbd Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 27 Apr 2016 14:15:51 +0200 Subject: libceph: move message allocation out of ceph_osdc_alloc_request() The size of ->r_request and ->r_reply messages depends on the size of the object name (ceph_object_id), while the size of ceph_osd_request is fixed. Move message allocation into a separate function that would have to be called after ceph_object_id and ceph_object_locator (which is also going to become variable in size with RADOS namespaces) have been filled in: req = ceph_osdc_alloc_request(...); r_base_oid> r_base_oloc> ceph_osdc_alloc_messages(req); Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 18 ++++++++- fs/ceph/addr.c | 8 ++++ fs/ceph/file.c | 7 ++++ include/linux/ceph/osd_client.h | 1 + net/ceph/osd_client.c | 88 +++++++++++++++++++++++------------------ 5 files changed, 82 insertions(+), 40 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index c3089f32a392..bda4deade82e 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1954,7 +1954,7 @@ static struct ceph_osd_request *rbd_osd_req_create( osd_req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false, GFP_NOIO); if (!osd_req) - return NULL; /* ENOMEM */ + goto fail; if (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD) osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; @@ -1967,7 +1967,14 @@ static struct ceph_osd_request *rbd_osd_req_create( osd_req->r_base_oloc.pool = ceph_file_layout_pg_pool(rbd_dev->layout); ceph_oid_set_name(&osd_req->r_base_oid, obj_request->object_name); + if (ceph_osdc_alloc_messages(osd_req, GFP_NOIO)) + goto fail; + return osd_req; + +fail: + ceph_osdc_put_request(osd_req); + return NULL; } /* @@ -2003,7 +2010,7 @@ rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request) osd_req = ceph_osdc_alloc_request(osdc, snapc, num_osd_ops, false, GFP_NOIO); if (!osd_req) - return NULL; /* ENOMEM */ + goto fail; osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; osd_req->r_callback = rbd_osd_req_callback; @@ -2012,7 +2019,14 @@ rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request) osd_req->r_base_oloc.pool = ceph_file_layout_pg_pool(rbd_dev->layout); ceph_oid_set_name(&osd_req->r_base_oid, obj_request->object_name); + if (ceph_osdc_alloc_messages(osd_req, GFP_NOIO)) + goto fail; + return osd_req; + +fail: + ceph_osdc_put_request(osd_req); + return NULL; } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 3e61fc8bb371..6fee7e0b8931 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1762,6 +1762,10 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, u32 pool) "%llx.00000000", ci->i_vino.ino); rd_req->r_base_oid.name_len = strlen(rd_req->r_base_oid.name); + err = ceph_osdc_alloc_messages(rd_req, GFP_NOFS); + if (err) + goto out_unlock; + wr_req = ceph_osdc_alloc_request(&fsc->client->osdc, NULL, 1, false, GFP_NOFS); if (!wr_req) { @@ -1775,6 +1779,10 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, u32 pool) wr_req->r_base_oloc.pool = pool; wr_req->r_base_oid = rd_req->r_base_oid; + err = ceph_osdc_alloc_messages(wr_req, GFP_NOFS); + if (err) + goto out_unlock; + /* one page should be large enough for STAT data */ pages = ceph_alloc_page_vector(1, GFP_KERNEL); if (IS_ERR(pages)) { diff --git a/fs/ceph/file.c b/fs/ceph/file.c index a79f9269831e..5d46d106bbb7 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -717,6 +717,13 @@ static void ceph_aio_retry_work(struct work_struct *work) req->r_base_oloc = orig_req->r_base_oloc; req->r_base_oid = orig_req->r_base_oid; + ret = ceph_osdc_alloc_messages(req, GFP_NOFS); + if (ret) { + ceph_osdc_put_request(req); + req = orig_req; + goto out; + } + req->r_ops[0] = orig_req->r_ops[0]; osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index cbf460927c42..66a1fcd5bff7 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -322,6 +322,7 @@ extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client * unsigned int num_ops, bool use_mempool, gfp_t gfp_flags); +int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp); extern void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off, struct ceph_snap_context *snapc, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index ccb9539dc780..d66dacc9d0d4 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -369,8 +369,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, gfp_t gfp_flags) { struct ceph_osd_request *req; - struct ceph_msg *msg; - size_t msg_size; if (use_mempool) { BUG_ON(num_ops > CEPH_OSD_SLAB_OPS); @@ -407,53 +405,59 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, req->r_base_oloc.pool = -1; req->r_target_oloc.pool = -1; - msg_size = OSD_OPREPLY_FRONT_LEN; - if (num_ops > CEPH_OSD_SLAB_OPS) { - /* ceph_osd_op and rval */ - msg_size += (num_ops - CEPH_OSD_SLAB_OPS) * - (sizeof(struct ceph_osd_op) + 4); - } + dout("%s req %p\n", __func__, req); + return req; +} +EXPORT_SYMBOL(ceph_osdc_alloc_request); - /* create reply message */ - if (use_mempool) - msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); - else - msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, msg_size, - gfp_flags, true); - if (!msg) { - ceph_osdc_put_request(req); - return NULL; - } - req->r_reply = msg; +int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp) +{ + struct ceph_osd_client *osdc = req->r_osdc; + struct ceph_msg *msg; + int msg_size; + /* create request message */ msg_size = 4 + 4 + 4; /* client_inc, osdmap_epoch, flags */ msg_size += 4 + 4 + 4 + 8; /* mtime, reassert_version */ msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */ msg_size += 1 + 8 + 4 + 4; /* pgid */ - msg_size += 4 + CEPH_MAX_OID_NAME_LEN; /* oid */ - msg_size += 2 + num_ops * sizeof(struct ceph_osd_op); + msg_size += 4 + req->r_base_oid.name_len; /* oid */ + msg_size += 2 + req->r_num_ops * sizeof(struct ceph_osd_op); msg_size += 8; /* snapid */ msg_size += 8; /* snap_seq */ - msg_size += 4 + 8 * (snapc ? snapc->num_snaps : 0); /* snaps */ + msg_size += 4 + 8 * (req->r_snapc ? req->r_snapc->num_snaps : 0); msg_size += 4; /* retry_attempt */ - /* create request message; allow space for oid */ - if (use_mempool) + if (req->r_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op, 0); else - msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp_flags, true); - if (!msg) { - ceph_osdc_put_request(req); - return NULL; - } + msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp, true); + if (!msg) + return -ENOMEM; memset(msg->front.iov_base, 0, msg->front.iov_len); - req->r_request = msg; - return req; + /* create reply message */ + msg_size = OSD_OPREPLY_FRONT_LEN; + if (req->r_num_ops > CEPH_OSD_SLAB_OPS) { + /* ceph_osd_op and rval */ + msg_size += (req->r_num_ops - CEPH_OSD_SLAB_OPS) * + (sizeof(struct ceph_osd_op) + 4); + } + + if (req->r_mempool) + msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); + else + msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, msg_size, gfp, true); + if (!msg) + return -ENOMEM; + + req->r_reply = msg; + + return 0; } -EXPORT_SYMBOL(ceph_osdc_alloc_request); +EXPORT_SYMBOL(ceph_osdc_alloc_messages); static bool osd_req_opcode_valid(u16 opcode) { @@ -828,17 +832,17 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool, GFP_NOFS); - if (!req) - return ERR_PTR(-ENOMEM); + if (!req) { + r = -ENOMEM; + goto fail; + } req->r_flags = flags; /* calculate max write size */ r = calc_layout(layout, off, plen, &objnum, &objoff, &objlen); - if (r < 0) { - ceph_osdc_put_request(req); - return ERR_PTR(r); - } + if (r) + goto fail; if (opcode == CEPH_OSD_OP_CREATE || opcode == CEPH_OSD_OP_DELETE) { osd_req_op_init(req, which, opcode, 0); @@ -864,7 +868,15 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, "%llx.%08llx", vino.ino, objnum); req->r_base_oid.name_len = strlen(req->r_base_oid.name); + r = ceph_osdc_alloc_messages(req, GFP_NOFS); + if (r) + goto fail; + return req; + +fail: + ceph_osdc_put_request(req); + return ERR_PTR(r); } EXPORT_SYMBOL(ceph_osdc_new_request); -- cgit v1.2.3 From 711da55d36a6f1eddcd340969be7223110d2f6b0 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 27 Apr 2016 18:32:56 +0200 Subject: libceph: change how osd_op_reply message size is calculated For a message pool message, preallocate a page, just like we do for osd_op. For a normal message, take ceph_object_id into account and don't bother subtracting CEPH_OSD_SLAB_OPS ceph_osd_ops. Signed-off-by: Ilya Dryomov --- net/ceph/osd_client.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index d66dacc9d0d4..75e27bd3d372 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -19,7 +19,6 @@ #include #include -#define OSD_OP_FRONT_LEN 4096 #define OSD_OPREPLY_FRONT_LEN 512 static struct kmem_cache *ceph_osd_request_cache; @@ -440,11 +439,8 @@ int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp) /* create reply message */ msg_size = OSD_OPREPLY_FRONT_LEN; - if (req->r_num_ops > CEPH_OSD_SLAB_OPS) { - /* ceph_osd_op and rval */ - msg_size += (req->r_num_ops - CEPH_OSD_SLAB_OPS) * - (sizeof(struct ceph_osd_op) + 4); - } + msg_size += req->r_base_oid.name_len; + msg_size += req->r_num_ops * sizeof(struct ceph_osd_op); if (req->r_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); @@ -2702,13 +2698,11 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) goto out; err = ceph_msgpool_init(&osdc->msgpool_op, CEPH_MSG_OSD_OP, - OSD_OP_FRONT_LEN, 10, true, - "osd_op"); + PAGE_SIZE, 10, true, "osd_op"); if (err < 0) goto out_mempool; err = ceph_msgpool_init(&osdc->msgpool_op_reply, CEPH_MSG_OSD_OPREPLY, - OSD_OPREPLY_FRONT_LEN, 10, true, - "osd_op_reply"); + PAGE_SIZE, 10, true, "osd_op_reply"); if (err < 0) goto out_msgpool; -- cgit v1.2.3 From d30291b985d1854565d7f2c82a4457869d5265e8 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 29 Apr 2016 19:54:20 +0200 Subject: libceph: variable-sized ceph_object_id Currently ceph_object_id can hold object names of up to 100 (CEPH_MAX_OID_NAME_LEN) characters. This is enough for all use cases, expect one - long rbd image names: - a format 1 header is named ".rbd" - an object that points to a format 2 header is named "rbd_id." We operate on these potentially long-named objects during rbd map, and, for format 1 images, during header refresh. (A format 2 header name is a small system-generated string.) Lift this 100 character limit by making ceph_object_id be able to point to an externally-allocated string. Apart from being able to work with almost arbitrarily-long named objects, this allows us to reduce the size of ceph_object_id from >100 bytes to 64 bytes. Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 8 +++- fs/ceph/addr.c | 6 +-- fs/ceph/file.c | 2 +- fs/ceph/ioctl.c | 2 +- include/linux/ceph/osdmap.h | 62 ++++++++++++++++++------------ net/ceph/debugfs.c | 2 +- net/ceph/osd_client.c | 16 +++++--- net/ceph/osdmap.c | 93 ++++++++++++++++++++++++++++++++++++++++++++- 8 files changed, 150 insertions(+), 41 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index bda4deade82e..3bf93a2a20f0 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1965,7 +1965,9 @@ static struct ceph_osd_request *rbd_osd_req_create( osd_req->r_priv = obj_request; osd_req->r_base_oloc.pool = ceph_file_layout_pg_pool(rbd_dev->layout); - ceph_oid_set_name(&osd_req->r_base_oid, obj_request->object_name); + if (ceph_oid_aprintf(&osd_req->r_base_oid, GFP_NOIO, "%s", + obj_request->object_name)) + goto fail; if (ceph_osdc_alloc_messages(osd_req, GFP_NOIO)) goto fail; @@ -2017,7 +2019,9 @@ rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request) osd_req->r_priv = obj_request; osd_req->r_base_oloc.pool = ceph_file_layout_pg_pool(rbd_dev->layout); - ceph_oid_set_name(&osd_req->r_base_oid, obj_request->object_name); + if (ceph_oid_aprintf(&osd_req->r_base_oid, GFP_NOIO, "%s", + obj_request->object_name)) + goto fail; if (ceph_osdc_alloc_messages(osd_req, GFP_NOIO)) goto fail; diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 6fee7e0b8931..6f28dd9bacb2 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1758,9 +1758,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, u32 pool) rd_req->r_flags = CEPH_OSD_FLAG_READ; osd_req_op_init(rd_req, 0, CEPH_OSD_OP_STAT, 0); rd_req->r_base_oloc.pool = pool; - snprintf(rd_req->r_base_oid.name, sizeof(rd_req->r_base_oid.name), - "%llx.00000000", ci->i_vino.ino); - rd_req->r_base_oid.name_len = strlen(rd_req->r_base_oid.name); + ceph_oid_printf(&rd_req->r_base_oid, "%llx.00000000", ci->i_vino.ino); err = ceph_osdc_alloc_messages(rd_req, GFP_NOFS); if (err) @@ -1777,7 +1775,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, u32 pool) CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK; osd_req_op_init(wr_req, 0, CEPH_OSD_OP_CREATE, CEPH_OSD_OP_FLAG_EXCL); wr_req->r_base_oloc.pool = pool; - wr_req->r_base_oid = rd_req->r_base_oid; + ceph_oid_copy(&wr_req->r_base_oid, &rd_req->r_base_oid); err = ceph_osdc_alloc_messages(wr_req, GFP_NOFS); if (err) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 5d46d106bbb7..9d470397e249 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -715,7 +715,7 @@ static void ceph_aio_retry_work(struct work_struct *work) CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE; req->r_base_oloc = orig_req->r_base_oloc; - req->r_base_oid = orig_req->r_base_oid; + ceph_oid_copy(&req->r_base_oid, &orig_req->r_base_oid); ret = ceph_osdc_alloc_messages(req, GFP_NOFS); if (ret) { diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index f851d8d70158..db296709784a 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -213,7 +213,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) ceph_ino(inode), dl.object_no); oloc.pool = ceph_file_layout_pg_pool(ci->i_layout); - ceph_oid_set_name(&oid, dl.object_name); + ceph_oid_printf(&oid, "%s", dl.object_name); r = ceph_oloc_oid_to_pg(osdc->osdmap, &oloc, &oid, &pgid); if (r < 0) { diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index e55c08bc3a96..777a29412706 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -64,11 +64,47 @@ struct ceph_object_locator { */ #define CEPH_MAX_OID_NAME_LEN 100 +/* + * 51-char inline_name is long enough for all cephfs and all but one + * rbd requests: in ".rbd"/"rbd_id." can be + * arbitrarily long (~PAGE_SIZE). It's done once during rbd map; all + * other rbd requests fit into inline_name. + * + * Makes ceph_object_id 64 bytes on 64-bit. + */ +#define CEPH_OID_INLINE_LEN 52 + +/* + * Both inline and external buffers have space for a NUL-terminator, + * which is carried around. It's not required though - RADOS object + * names don't have to be NUL-terminated and may contain NULs. + */ struct ceph_object_id { - char name[CEPH_MAX_OID_NAME_LEN]; + char *name; + char inline_name[CEPH_OID_INLINE_LEN]; int name_len; }; +static inline void ceph_oid_init(struct ceph_object_id *oid) +{ + oid->name = oid->inline_name; + oid->name_len = 0; +} + +static inline bool ceph_oid_empty(const struct ceph_object_id *oid) +{ + return oid->name == oid->inline_name && !oid->name_len; +} + +void ceph_oid_copy(struct ceph_object_id *dest, + const struct ceph_object_id *src); +__printf(2, 3) +void ceph_oid_printf(struct ceph_object_id *oid, const char *fmt, ...); +__printf(3, 4) +int ceph_oid_aprintf(struct ceph_object_id *oid, gfp_t gfp, + const char *fmt, ...); +void ceph_oid_destroy(struct ceph_object_id *oid); + struct ceph_pg_mapping { struct rb_node node; struct ceph_pg pgid; @@ -113,30 +149,6 @@ struct ceph_osdmap { int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3]; }; -static inline void ceph_oid_set_name(struct ceph_object_id *oid, - const char *name) -{ - int len; - - len = strlen(name); - if (len > sizeof(oid->name)) { - WARN(1, "ceph_oid_set_name '%s' len %d vs %zu, truncating\n", - name, len, sizeof(oid->name)); - len = sizeof(oid->name); - } - - memcpy(oid->name, name, len); - oid->name_len = len; -} - -static inline void ceph_oid_copy(struct ceph_object_id *dest, - struct ceph_object_id *src) -{ - BUG_ON(src->name_len > sizeof(dest->name)); - memcpy(dest->name, src->name, src->name_len); - dest->name_len = src->name_len; -} - static inline int ceph_osd_exists(struct ceph_osdmap *map, int osd) { return osd >= 0 && osd < map->max_osd && diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index b902fbc7863e..6f8413293d15 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -161,7 +161,7 @@ static int osdc_show(struct seq_file *s, void *pp) req->r_osd ? req->r_osd->o_osd : -1, req->r_pgid.pool, req->r_pgid.seed); - seq_printf(s, "%.*s", req->r_base_oid.name_len, + seq_printf(s, "%*pE", req->r_base_oid.name_len, req->r_base_oid.name); if (req->r_reassert_version.epoch) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 75e27bd3d372..95910aed8e2e 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -334,7 +334,10 @@ static void ceph_osdc_release_request(struct kref *kref) for (which = 0; which < req->r_num_ops; which++) osd_req_op_data_release(req, which); + ceph_oid_destroy(&req->r_base_oid); + ceph_oid_destroy(&req->r_target_oid); ceph_put_snap_context(req->r_snapc); + if (req->r_mempool) mempool_free(req, req->r_osdc->req_mempool); else if (req->r_num_ops <= CEPH_OSD_SLAB_OPS) @@ -401,7 +404,9 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, INIT_LIST_HEAD(&req->r_req_lru_item); INIT_LIST_HEAD(&req->r_osd_item); + ceph_oid_init(&req->r_base_oid); req->r_base_oloc.pool = -1; + ceph_oid_init(&req->r_target_oid); req->r_target_oloc.pool = -1; dout("%s req %p\n", __func__, req); @@ -415,6 +420,8 @@ int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp) struct ceph_msg *msg; int msg_size; + WARN_ON(ceph_oid_empty(&req->r_base_oid)); + /* create request message */ msg_size = 4 + 4 + 4; /* client_inc, osdmap_epoch, flags */ msg_size += 4 + 4 + 4 + 8; /* mtime, reassert_version */ @@ -859,10 +866,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, } req->r_base_oloc.pool = ceph_file_layout_pg_pool(*layout); - - snprintf(req->r_base_oid.name, sizeof(req->r_base_oid.name), - "%llx.%08llx", vino.ino, objnum); - req->r_base_oid.name_len = strlen(req->r_base_oid.name); + ceph_oid_printf(&req->r_base_oid, "%llx.%08llx", vino.ino, objnum); r = ceph_osdc_alloc_messages(req, GFP_NOFS); if (r) @@ -1410,7 +1414,7 @@ static int __calc_request_pg(struct ceph_osdmap *osdmap, req->r_target_oloc = req->r_base_oloc; /* struct */ need_check_tiering = true; } - if (req->r_target_oid.name_len == 0) { + if (ceph_oid_empty(&req->r_target_oid)) { ceph_oid_copy(&req->r_target_oid, &req->r_base_oid); need_check_tiering = true; } @@ -2501,7 +2505,7 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off, /* oid */ ceph_encode_32(&p, req->r_base_oid.name_len); memcpy(p, req->r_base_oid.name, req->r_base_oid.name_len); - dout("oid '%.*s' len %d\n", req->r_base_oid.name_len, + dout("oid %*pE len %d\n", req->r_base_oid.name_len, req->r_base_oid.name, req->r_base_oid.name_len); p += req->r_base_oid.name_len; diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 243574c8cf33..4668b871ca47 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1381,8 +1381,99 @@ bad: return ERR_PTR(err); } +void ceph_oid_copy(struct ceph_object_id *dest, + const struct ceph_object_id *src) +{ + WARN_ON(!ceph_oid_empty(dest)); + + if (src->name != src->inline_name) { + /* very rare, see ceph_object_id definition */ + dest->name = kmalloc(src->name_len + 1, + GFP_NOIO | __GFP_NOFAIL); + } + memcpy(dest->name, src->name, src->name_len + 1); + dest->name_len = src->name_len; +} +EXPORT_SYMBOL(ceph_oid_copy); +static __printf(2, 0) +int oid_printf_vargs(struct ceph_object_id *oid, const char *fmt, va_list ap) +{ + int len; + + WARN_ON(!ceph_oid_empty(oid)); + + len = vsnprintf(oid->inline_name, sizeof(oid->inline_name), fmt, ap); + if (len >= sizeof(oid->inline_name)) + return len; + + oid->name_len = len; + return 0; +} + +/* + * If oid doesn't fit into inline buffer, BUG. + */ +void ceph_oid_printf(struct ceph_object_id *oid, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + BUG_ON(oid_printf_vargs(oid, fmt, ap)); + va_end(ap); +} +EXPORT_SYMBOL(ceph_oid_printf); + +static __printf(3, 0) +int oid_aprintf_vargs(struct ceph_object_id *oid, gfp_t gfp, + const char *fmt, va_list ap) +{ + va_list aq; + int len; + + va_copy(aq, ap); + len = oid_printf_vargs(oid, fmt, aq); + va_end(aq); + + if (len) { + char *external_name; + + external_name = kmalloc(len + 1, gfp); + if (!external_name) + return -ENOMEM; + + oid->name = external_name; + WARN_ON(vsnprintf(oid->name, len + 1, fmt, ap) != len); + oid->name_len = len; + } + + return 0; +} + +/* + * If oid doesn't fit into inline buffer, allocate. + */ +int ceph_oid_aprintf(struct ceph_object_id *oid, gfp_t gfp, + const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = oid_aprintf_vargs(oid, gfp, fmt, ap); + va_end(ap); + + return ret; +} +EXPORT_SYMBOL(ceph_oid_aprintf); + +void ceph_oid_destroy(struct ceph_object_id *oid) +{ + if (oid->name != oid->inline_name) + kfree(oid->name); +} +EXPORT_SYMBOL(ceph_oid_destroy); /* * calculate file layout from given offset, length. @@ -1474,7 +1565,7 @@ int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap, pg_out->seed = ceph_str_hash(pi->object_hash, oid->name, oid->name_len); - dout("%s '%.*s' pgid %llu.%x\n", __func__, oid->name_len, oid->name, + dout("%s %*pE pgid %llu.%x\n", __func__, oid->name_len, oid->name, pg_out->pool, pg_out->seed); return 0; } -- cgit v1.2.3 From c41d13a31fefed303f734c0c5106f6dcd262168e Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 29 Apr 2016 20:01:25 +0200 Subject: rbd: use header_oid instead of header_name Switch to ceph_object_id and use ceph_oid_aprintf() instead of a bare const char *. This reduces noise in rbd_dev_header_name(). Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 57 ++++++++++++++++++++++------------------------------- 1 file changed, 24 insertions(+), 33 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 3bf93a2a20f0..f3ea927f93de 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -350,7 +350,7 @@ struct rbd_device { struct rbd_spec *spec; struct rbd_options *opts; - char *header_name; + struct ceph_object_id header_oid; struct ceph_file_layout layout; @@ -3117,7 +3117,7 @@ static int rbd_obj_notify_ack_sync(struct rbd_device *rbd_dev, u64 notify_id) struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; int ret; - obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, + obj_request = rbd_obj_request_create(rbd_dev->header_oid.name, 0, 0, OBJ_REQUEST_NODATA); if (!obj_request) return -ENOMEM; @@ -3148,7 +3148,7 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) int ret; dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__, - rbd_dev->header_name, (unsigned long long)notify_id, + rbd_dev->header_oid.name, (unsigned long long)notify_id, (unsigned int)opcode); /* @@ -3179,7 +3179,7 @@ static struct rbd_obj_request *rbd_obj_watch_request_helper( struct rbd_obj_request *obj_request; int ret; - obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, + obj_request = rbd_obj_request_create(rbd_dev->header_oid.name, 0, 0, OBJ_REQUEST_NODATA); if (!obj_request) return ERR_PTR(-ENOMEM); @@ -3612,7 +3612,7 @@ static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev) if (!ondisk) return -ENOMEM; - ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_oid.name, 0, size, ondisk); if (ret < 0) goto out; @@ -4054,6 +4054,8 @@ static void rbd_dev_release(struct device *dev) struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); bool need_put = !!rbd_dev->opts; + ceph_oid_destroy(&rbd_dev->header_oid); + rbd_put_client(rbd_dev->rbd_client); rbd_spec_put(rbd_dev->spec); kfree(rbd_dev->opts); @@ -4084,6 +4086,8 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, INIT_LIST_HEAD(&rbd_dev->node); init_rwsem(&rbd_dev->header_rwsem); + ceph_oid_init(&rbd_dev->header_oid); + rbd_dev->dev.bus = &rbd_bus_type; rbd_dev->dev.type = &rbd_device_type; rbd_dev->dev.parent = &rbd_root_dev; @@ -4132,7 +4136,7 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, __le64 size; } __attribute__ ((packed)) size_buf = { 0 }; - ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name, "rbd", "get_size", &snapid, sizeof (snapid), &size_buf, sizeof (size_buf)); @@ -4172,7 +4176,7 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) if (!reply_buf) return -ENOMEM; - ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name, "rbd", "get_object_prefix", NULL, 0, reply_buf, RBD_OBJ_PREFIX_LEN_MAX); dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); @@ -4207,7 +4211,7 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, u64 unsup; int ret; - ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name, "rbd", "get_features", &snapid, sizeof (snapid), &features_buf, sizeof (features_buf)); @@ -4269,7 +4273,7 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) } snapid = cpu_to_le64(rbd_dev->spec->snap_id); - ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name, "rbd", "get_parent", &snapid, sizeof (snapid), reply_buf, size); @@ -4372,7 +4376,7 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev) u64 stripe_count; int ret; - ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name, "rbd", "get_stripe_unit_count", NULL, 0, (char *)&striping_info_buf, size); dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); @@ -4620,7 +4624,7 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev) if (!reply_buf) return -ENOMEM; - ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name, "rbd", "get_snapcontext", NULL, 0, reply_buf, size); dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); @@ -4685,7 +4689,7 @@ static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, return ERR_PTR(-ENOMEM); snapid = cpu_to_le64(snap_id); - ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name, "rbd", "get_snapshot_name", &snapid, sizeof (snapid), reply_buf, size); @@ -5281,35 +5285,25 @@ err_out_unlock: static int rbd_dev_header_name(struct rbd_device *rbd_dev) { struct rbd_spec *spec = rbd_dev->spec; - size_t size; + int ret; /* Record the header object name for this rbd image. */ rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); if (rbd_dev->image_format == 1) - size = strlen(spec->image_name) + sizeof (RBD_SUFFIX); + ret = ceph_oid_aprintf(&rbd_dev->header_oid, GFP_KERNEL, "%s%s", + spec->image_name, RBD_SUFFIX); else - size = sizeof (RBD_HEADER_PREFIX) + strlen(spec->image_id); - - rbd_dev->header_name = kmalloc(size, GFP_KERNEL); - if (!rbd_dev->header_name) - return -ENOMEM; + ret = ceph_oid_aprintf(&rbd_dev->header_oid, GFP_KERNEL, "%s%s", + RBD_HEADER_PREFIX, spec->image_id); - if (rbd_dev->image_format == 1) - sprintf(rbd_dev->header_name, "%s%s", - spec->image_name, RBD_SUFFIX); - else - sprintf(rbd_dev->header_name, "%s%s", - RBD_HEADER_PREFIX, spec->image_id); - return 0; + return ret; } static void rbd_dev_image_release(struct rbd_device *rbd_dev) { rbd_dev_unprobe(rbd_dev); - kfree(rbd_dev->header_name); - rbd_dev->header_name = NULL; rbd_dev->image_format = 0; kfree(rbd_dev->spec->image_id); rbd_dev->spec->image_id = NULL; @@ -5348,7 +5342,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) pr_info("image %s/%s does not exist\n", rbd_dev->spec->pool_name, rbd_dev->spec->image_name); - goto out_header_name; + goto err_out_format; } } @@ -5394,7 +5388,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) goto err_out_probe; dout("discovered format %u image, header name is %s\n", - rbd_dev->image_format, rbd_dev->header_name); + rbd_dev->image_format, rbd_dev->header_oid.name); return 0; err_out_probe: @@ -5402,9 +5396,6 @@ err_out_probe: err_out_watch: if (!depth) rbd_dev_header_unwatch_sync(rbd_dev); -out_header_name: - kfree(rbd_dev->header_name); - rbd_dev->header_name = NULL; err_out_format: rbd_dev->image_format = 0; kfree(rbd_dev->spec->image_id); -- cgit v1.2.3 From 0c0a8de13f9612a663b050afa955e6668858d1eb Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:21 +0200 Subject: libceph: nuke unused fields and functions Either unused or useless: osdmap->mkfs_epoch osd->o_marked_for_keepalive monc->num_generic_requests osdc->map_waiters osdc->last_requested_map osdc->timeout_tid osd_req_op_cls_response_data() osdmap_apply_incremental() @msgr arg Signed-off-by: Ilya Dryomov --- include/linux/ceph/mon_client.h | 1 - include/linux/ceph/osd_client.h | 8 -------- include/linux/ceph/osdmap.h | 6 ++---- net/ceph/mon_client.c | 3 --- net/ceph/osd_client.c | 13 +------------ net/ceph/osdmap.c | 3 +-- 6 files changed, 4 insertions(+), 30 deletions(-) diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index e230e7ed60d3..330d045e4092 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -77,7 +77,6 @@ struct ceph_mon_client { /* pending generic requests */ struct rb_root generic_request_tree; - int num_generic_requests; u64 last_tid; /* subs, indexed with CEPH_SUB_* */ diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 66a1fcd5bff7..63854a8df183 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -37,11 +37,9 @@ struct ceph_osd { struct list_head o_osd_lru; struct ceph_auth_handshake o_auth; unsigned long lru_ttl; - int o_marked_for_keepalive; struct list_head o_keepalive_item; }; - #define CEPH_OSD_SLAB_OPS 2 #define CEPH_OSD_MAX_OPS 16 @@ -206,13 +204,10 @@ struct ceph_osd_client { struct ceph_osdmap *osdmap; /* current map */ struct rw_semaphore map_sem; - struct completion map_waiters; - u64 last_requested_map; struct mutex request_mutex; struct rb_root osds; /* osds */ struct list_head osd_lru; /* idle osds */ - u64 timeout_tid; /* tid of timeout triggering rq */ u64 last_tid; /* tid of last request */ struct rb_root requests; /* pending requests */ struct list_head req_lru; /* in-flight lru */ @@ -271,9 +266,6 @@ extern void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req, extern struct ceph_osd_data *osd_req_op_extent_osd_data( struct ceph_osd_request *osd_req, unsigned int which); -extern struct ceph_osd_data *osd_req_op_cls_response_data( - struct ceph_osd_request *osd_req, - unsigned int which); extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *, unsigned int which, diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 777a29412706..ce7a41a182d4 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -123,7 +123,6 @@ struct ceph_pg_mapping { struct ceph_osdmap { struct ceph_fsid fsid; u32 epoch; - u32 mkfs_epoch; struct ceph_timespec created, modified; u32 flags; /* CEPH_OSDMAP_* */ @@ -205,9 +204,8 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid) } extern struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end); -extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, - struct ceph_osdmap *map, - struct ceph_messenger *msgr); +struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, + struct ceph_osdmap *map); extern void ceph_osdmap_destroy(struct ceph_osdmap *map); /* calculate mapping of a file extent to an object */ diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index cf638c009cfa..3dfafdad92aa 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -579,7 +579,6 @@ static int __do_generic_request(struct ceph_mon_client *monc, u64 tid, req->tid = tid != 0 ? tid : ++monc->last_tid; req->request->hdr.tid = cpu_to_le64(req->tid); __insert_generic_request(monc, req); - monc->num_generic_requests++; ceph_con_send(&monc->con, ceph_msg_get(req->request)); mutex_unlock(&monc->mutex); @@ -587,7 +586,6 @@ static int __do_generic_request(struct ceph_mon_client *monc, u64 tid, mutex_lock(&monc->mutex); rb_erase(&req->node, &monc->generic_request_tree); - monc->num_generic_requests--; if (!err) err = req->result; @@ -914,7 +912,6 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) INIT_DELAYED_WORK(&monc->delayed_work, delayed_work); monc->generic_request_tree = RB_ROOT; - monc->num_generic_requests = 0; monc->last_tid = 0; return 0; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 95910aed8e2e..32ba09be6ee6 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -143,14 +143,6 @@ osd_req_op_extent_osd_data(struct ceph_osd_request *osd_req, } EXPORT_SYMBOL(osd_req_op_extent_osd_data); -struct ceph_osd_data * -osd_req_op_cls_response_data(struct ceph_osd_request *osd_req, - unsigned int which) -{ - return osd_req_op_data(osd_req, which, cls, response_data); -} -EXPORT_SYMBOL(osd_req_op_cls_response_data); /* ??? */ - void osd_req_op_raw_data_in_pages(struct ceph_osd_request *osd_req, unsigned int which, struct page **pages, u64 length, u32 alignment, @@ -2166,8 +2158,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) dout("applying incremental map %u len %d\n", epoch, maplen); newmap = osdmap_apply_incremental(&p, next, - osdc->osdmap, - &osdc->client->msgr); + osdc->osdmap); if (IS_ERR(newmap)) { err = PTR_ERR(newmap); goto bad; @@ -2674,8 +2665,6 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) osdc->client = client; osdc->osdmap = NULL; init_rwsem(&osdc->map_sem); - init_completion(&osdc->map_waiters); - osdc->last_requested_map = 0; mutex_init(&osdc->request_mutex); osdc->last_tid = 0; osdc->osds = RB_ROOT; diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 4668b871ca47..9a0cc072a909 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1204,8 +1204,7 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end) * decode and apply an incremental map update. */ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, - struct ceph_osdmap *map, - struct ceph_messenger *msgr) + struct ceph_osdmap *map) { struct crush_map *newcrush = NULL; struct ceph_fsid fsid; -- cgit v1.2.3 From 42a2c09f2b0b95fa147bcdb56cdc02b980b9ac5e Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:22 +0200 Subject: libceph: open-code remove_{all,old}_osds() They are called only once, from ceph_osdc_stop() and handle_osds_timeout() respectively. Signed-off-by: Ilya Dryomov --- net/ceph/osd_client.c | 51 +++++++++++++++++++++------------------------------ 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 32ba09be6ee6..c423e11d6857 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1126,18 +1126,6 @@ static void remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) } } -static void remove_all_osds(struct ceph_osd_client *osdc) -{ - dout("%s %p\n", __func__, osdc); - mutex_lock(&osdc->request_mutex); - while (!RB_EMPTY_ROOT(&osdc->osds)) { - struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds), - struct ceph_osd, o_node); - remove_osd(osdc, osd); - } - mutex_unlock(&osdc->request_mutex); -} - static void __move_osd_to_lru(struct ceph_osd_client *osdc, struct ceph_osd *osd) { @@ -1165,20 +1153,6 @@ static void __remove_osd_from_lru(struct ceph_osd *osd) list_del_init(&osd->o_osd_lru); } -static void remove_old_osds(struct ceph_osd_client *osdc) -{ - struct ceph_osd *osd, *nosd; - - dout("__remove_old_osds %p\n", osdc); - mutex_lock(&osdc->request_mutex); - list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) { - if (time_before(jiffies, osd->lru_ttl)) - break; - remove_osd(osdc, osd); - } - mutex_unlock(&osdc->request_mutex); -} - /* * reset osd connect */ @@ -1671,12 +1645,21 @@ static void handle_osds_timeout(struct work_struct *work) container_of(work, struct ceph_osd_client, osds_timeout_work.work); unsigned long delay = osdc->client->options->osd_idle_ttl / 4; + struct ceph_osd *osd, *nosd; - dout("osds timeout\n"); + dout("%s osdc %p\n", __func__, osdc); down_read(&osdc->map_sem); - remove_old_osds(osdc); - up_read(&osdc->map_sem); + mutex_lock(&osdc->request_mutex); + + list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) { + if (time_before(jiffies, osd->lru_ttl)) + break; + + remove_osd(osdc, osd); + } + mutex_unlock(&osdc->request_mutex); + up_read(&osdc->map_sem); schedule_delayed_work(&osdc->osds_timeout_work, round_jiffies_relative(delay)); } @@ -2722,11 +2705,19 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc) destroy_workqueue(osdc->notify_wq); cancel_delayed_work_sync(&osdc->timeout_work); cancel_delayed_work_sync(&osdc->osds_timeout_work); + + mutex_lock(&osdc->request_mutex); + while (!RB_EMPTY_ROOT(&osdc->osds)) { + struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds), + struct ceph_osd, o_node); + remove_osd(osdc, osd); + } + mutex_unlock(&osdc->request_mutex); + if (osdc->osdmap) { ceph_osdmap_destroy(osdc->osdmap); osdc->osdmap = NULL; } - remove_all_osds(osdc); mempool_destroy(osdc->req_mempool); ceph_msgpool_destroy(&osdc->msgpool_op); ceph_msgpool_destroy(&osdc->msgpool_op_reply); -- cgit v1.2.3 From fcd00b68bbe2bf5606cb45c2cd4a250a390bcc1f Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:22 +0200 Subject: libceph: DEFINE_RB_FUNCS macro Given struct foo { u64 id; struct rb_node bar_node; }; generate insert_bar(), erase_bar() and lookup_bar() functions with DEFINE_RB_FUNCS(bar, struct foo, id, bar_node) The key is assumed to be an integer (u64, int, etc), compared with < and >. nodefld has to be initialized with RB_CLEAR_NODE(). Start using it for MDS, MON and OSD requests and OSD sessions. Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 54 ++++++------------------ include/linux/ceph/libceph.h | 57 ++++++++++++++++++++++++++ net/ceph/mon_client.c | 52 ++++-------------------- net/ceph/osd_client.c | 97 +++++--------------------------------------- 4 files changed, 88 insertions(+), 172 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 85b8517f17a0..cff85af425d4 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -567,51 +567,23 @@ void ceph_mdsc_release_request(struct kref *kref) kfree(req); } +DEFINE_RB_FUNCS(request, struct ceph_mds_request, r_tid, r_node) + /* * lookup session, bump ref if found. * * called under mdsc->mutex. */ -static struct ceph_mds_request *__lookup_request(struct ceph_mds_client *mdsc, - u64 tid) +static struct ceph_mds_request * +lookup_get_request(struct ceph_mds_client *mdsc, u64 tid) { struct ceph_mds_request *req; - struct rb_node *n = mdsc->request_tree.rb_node; - - while (n) { - req = rb_entry(n, struct ceph_mds_request, r_node); - if (tid < req->r_tid) - n = n->rb_left; - else if (tid > req->r_tid) - n = n->rb_right; - else { - ceph_mdsc_get_request(req); - return req; - } - } - return NULL; -} -static void __insert_request(struct ceph_mds_client *mdsc, - struct ceph_mds_request *new) -{ - struct rb_node **p = &mdsc->request_tree.rb_node; - struct rb_node *parent = NULL; - struct ceph_mds_request *req = NULL; + req = lookup_request(&mdsc->request_tree, tid); + if (req) + ceph_mdsc_get_request(req); - while (*p) { - parent = *p; - req = rb_entry(parent, struct ceph_mds_request, r_node); - if (new->r_tid < req->r_tid) - p = &(*p)->rb_left; - else if (new->r_tid > req->r_tid) - p = &(*p)->rb_right; - else - BUG(); - } - - rb_link_node(&new->r_node, parent, p); - rb_insert_color(&new->r_node, &mdsc->request_tree); + return req; } /* @@ -630,7 +602,7 @@ static void __register_request(struct ceph_mds_client *mdsc, req->r_num_caps); dout("__register_request %p tid %lld\n", req, req->r_tid); ceph_mdsc_get_request(req); - __insert_request(mdsc, req); + insert_request(&mdsc->request_tree, req); req->r_uid = current_fsuid(); req->r_gid = current_fsgid(); @@ -663,8 +635,7 @@ static void __unregister_request(struct ceph_mds_client *mdsc, } } - rb_erase(&req->r_node, &mdsc->request_tree); - RB_CLEAR_NODE(&req->r_node); + erase_request(&mdsc->request_tree, req); if (req->r_unsafe_dir && req->r_got_unsafe) { struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir); @@ -1722,6 +1693,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) INIT_LIST_HEAD(&req->r_unsafe_target_item); req->r_fmode = -1; kref_init(&req->r_kref); + RB_CLEAR_NODE(&req->r_node); INIT_LIST_HEAD(&req->r_wait); init_completion(&req->r_completion); init_completion(&req->r_safe_completion); @@ -2414,7 +2386,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) /* get request, session */ tid = le64_to_cpu(msg->hdr.tid); mutex_lock(&mdsc->mutex); - req = __lookup_request(mdsc, tid); + req = lookup_get_request(mdsc, tid); if (!req) { dout("handle_reply on unknown tid %llu\n", tid); mutex_unlock(&mdsc->mutex); @@ -2604,7 +2576,7 @@ static void handle_forward(struct ceph_mds_client *mdsc, fwd_seq = ceph_decode_32(&p); mutex_lock(&mdsc->mutex); - req = __lookup_request(mdsc, tid); + req = lookup_get_request(mdsc, tid); if (!req) { dout("forward tid %llu to mds%d - req dne\n", tid, next_mds); goto out; /* dup reply? */ diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index db92a8d4926e..690985daad1c 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -180,6 +180,63 @@ static inline int calc_pages_for(u64 off, u64 len) (off >> PAGE_SHIFT); } +/* + * These are not meant to be generic - an integer key is assumed. + */ +#define DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld) \ +static void insert_##name(struct rb_root *root, type *t) \ +{ \ + struct rb_node **n = &root->rb_node; \ + struct rb_node *parent = NULL; \ + \ + BUG_ON(!RB_EMPTY_NODE(&t->nodefld)); \ + \ + while (*n) { \ + type *cur = rb_entry(*n, type, nodefld); \ + \ + parent = *n; \ + if (t->keyfld < cur->keyfld) \ + n = &(*n)->rb_left; \ + else if (t->keyfld > cur->keyfld) \ + n = &(*n)->rb_right; \ + else \ + BUG(); \ + } \ + \ + rb_link_node(&t->nodefld, parent, n); \ + rb_insert_color(&t->nodefld, root); \ +} \ +static void erase_##name(struct rb_root *root, type *t) \ +{ \ + BUG_ON(RB_EMPTY_NODE(&t->nodefld)); \ + rb_erase(&t->nodefld, root); \ + RB_CLEAR_NODE(&t->nodefld); \ +} + +#define DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) \ +static type *lookup_##name(struct rb_root *root, \ + typeof(((type *)0)->keyfld) key) \ +{ \ + struct rb_node *n = root->rb_node; \ + \ + while (n) { \ + type *cur = rb_entry(n, type, nodefld); \ + \ + if (key < cur->keyfld) \ + n = n->rb_left; \ + else if (key > cur->keyfld) \ + n = n->rb_right; \ + else \ + return cur; \ + } \ + \ + return NULL; \ +} + +#define DEFINE_RB_FUNCS(name, type, keyfld, nodefld) \ +DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld) \ +DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) + extern struct kmem_cache *ceph_inode_cachep; extern struct kmem_cache *ceph_cap_cachep; extern struct kmem_cache *ceph_cap_flush_cachep; diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 3dfafdad92aa..a426a4b03e75 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -478,45 +478,7 @@ out: /* * generic requests (currently statfs, mon_get_version) */ -static struct ceph_mon_generic_request *__lookup_generic_req( - struct ceph_mon_client *monc, u64 tid) -{ - struct ceph_mon_generic_request *req; - struct rb_node *n = monc->generic_request_tree.rb_node; - - while (n) { - req = rb_entry(n, struct ceph_mon_generic_request, node); - if (tid < req->tid) - n = n->rb_left; - else if (tid > req->tid) - n = n->rb_right; - else - return req; - } - return NULL; -} - -static void __insert_generic_request(struct ceph_mon_client *monc, - struct ceph_mon_generic_request *new) -{ - struct rb_node **p = &monc->generic_request_tree.rb_node; - struct rb_node *parent = NULL; - struct ceph_mon_generic_request *req = NULL; - - while (*p) { - parent = *p; - req = rb_entry(parent, struct ceph_mon_generic_request, node); - if (new->tid < req->tid) - p = &(*p)->rb_left; - else if (new->tid > req->tid) - p = &(*p)->rb_right; - else - BUG(); - } - - rb_link_node(&new->node, parent, p); - rb_insert_color(&new->node, &monc->generic_request_tree); -} +DEFINE_RB_FUNCS(generic_request, struct ceph_mon_generic_request, tid, node) static void release_generic_request(struct kref *kref) { @@ -551,7 +513,7 @@ static struct ceph_msg *get_generic_reply(struct ceph_connection *con, struct ceph_msg *m; mutex_lock(&monc->mutex); - req = __lookup_generic_req(monc, tid); + req = lookup_generic_request(&monc->generic_request_tree, tid); if (!req) { dout("get_generic_reply %lld dne\n", tid); *skip = 1; @@ -578,14 +540,14 @@ static int __do_generic_request(struct ceph_mon_client *monc, u64 tid, /* register request */ req->tid = tid != 0 ? tid : ++monc->last_tid; req->request->hdr.tid = cpu_to_le64(req->tid); - __insert_generic_request(monc, req); + insert_generic_request(&monc->generic_request_tree, req); ceph_con_send(&monc->con, ceph_msg_get(req->request)); mutex_unlock(&monc->mutex); err = wait_for_completion_interruptible(&req->completion); mutex_lock(&monc->mutex); - rb_erase(&req->node, &monc->generic_request_tree); + erase_generic_request(&monc->generic_request_tree, req); if (!err) err = req->result; @@ -619,7 +581,7 @@ static void handle_statfs_reply(struct ceph_mon_client *monc, dout("handle_statfs_reply %p tid %llu\n", msg, tid); mutex_lock(&monc->mutex); - req = __lookup_generic_req(monc, tid); + req = lookup_generic_request(&monc->generic_request_tree, tid); if (req) { *(struct ceph_statfs *)req->buf = reply->st; req->result = 0; @@ -651,6 +613,7 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) return -ENOMEM; kref_init(&req->kref); + RB_CLEAR_NODE(&req->node); req->buf = buf; init_completion(&req->completion); @@ -696,7 +659,7 @@ static void handle_get_version_reply(struct ceph_mon_client *monc, goto bad; mutex_lock(&monc->mutex); - req = __lookup_generic_req(monc, handle); + req = lookup_generic_request(&monc->generic_request_tree, handle); if (req) { *(u64 *)req->buf = ceph_decode_64(&p); req->result = 0; @@ -732,6 +695,7 @@ int ceph_monc_do_get_version(struct ceph_mon_client *monc, const char *what, return -ENOMEM; kref_init(&req->kref); + RB_CLEAR_NODE(&req->node); req->buf = newest; init_completion(&req->completion); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index c423e11d6857..8256051ed88f 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -875,45 +875,7 @@ EXPORT_SYMBOL(ceph_osdc_new_request); /* * We keep osd requests in an rbtree, sorted by ->r_tid. */ -static void __insert_request(struct ceph_osd_client *osdc, - struct ceph_osd_request *new) -{ - struct rb_node **p = &osdc->requests.rb_node; - struct rb_node *parent = NULL; - struct ceph_osd_request *req = NULL; - - while (*p) { - parent = *p; - req = rb_entry(parent, struct ceph_osd_request, r_node); - if (new->r_tid < req->r_tid) - p = &(*p)->rb_left; - else if (new->r_tid > req->r_tid) - p = &(*p)->rb_right; - else - BUG(); - } - - rb_link_node(&new->r_node, parent, p); - rb_insert_color(&new->r_node, &osdc->requests); -} - -static struct ceph_osd_request *__lookup_request(struct ceph_osd_client *osdc, - u64 tid) -{ - struct ceph_osd_request *req; - struct rb_node *n = osdc->requests.rb_node; - - while (n) { - req = rb_entry(n, struct ceph_osd_request, r_node); - if (tid < req->r_tid) - n = n->rb_left; - else if (tid > req->r_tid) - n = n->rb_right; - else - return req; - } - return NULL; -} +DEFINE_RB_FUNCS(request, struct ceph_osd_request, r_tid, r_node) static struct ceph_osd_request * __lookup_request_ge(struct ceph_osd_client *osdc, @@ -1101,6 +1063,8 @@ static void put_osd(struct ceph_osd *osd) } } +DEFINE_RB_FUNCS(osd, struct ceph_osd, o_osd, o_node) + /* * remove an osd from our map */ @@ -1111,8 +1075,7 @@ static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) WARN_ON(!list_empty(&osd->o_linger_requests)); list_del_init(&osd->o_osd_lru); - rb_erase(&osd->o_node, &osdc->osds); - RB_CLEAR_NODE(&osd->o_node); + erase_osd(&osdc->osds, osd); } static void remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) @@ -1188,45 +1151,6 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) return 0; } -static void __insert_osd(struct ceph_osd_client *osdc, struct ceph_osd *new) -{ - struct rb_node **p = &osdc->osds.rb_node; - struct rb_node *parent = NULL; - struct ceph_osd *osd = NULL; - - dout("__insert_osd %p osd%d\n", new, new->o_osd); - while (*p) { - parent = *p; - osd = rb_entry(parent, struct ceph_osd, o_node); - if (new->o_osd < osd->o_osd) - p = &(*p)->rb_left; - else if (new->o_osd > osd->o_osd) - p = &(*p)->rb_right; - else - BUG(); - } - - rb_link_node(&new->o_node, parent, p); - rb_insert_color(&new->o_node, &osdc->osds); -} - -static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o) -{ - struct ceph_osd *osd; - struct rb_node *n = osdc->osds.rb_node; - - while (n) { - osd = rb_entry(n, struct ceph_osd, o_node); - if (o < osd->o_osd) - n = n->rb_left; - else if (o > osd->o_osd) - n = n->rb_right; - else - return osd; - } - return NULL; -} - static void __schedule_osd_timeout(struct ceph_osd_client *osdc) { schedule_delayed_work(&osdc->timeout_work, @@ -1248,7 +1172,7 @@ static void __register_request(struct ceph_osd_client *osdc, req->r_tid = ++osdc->last_tid; req->r_request->hdr.tid = cpu_to_le64(req->r_tid); dout("__register_request %p tid %lld\n", req, req->r_tid); - __insert_request(osdc, req); + insert_request(&osdc->requests, req); ceph_osdc_get_request(req); osdc->num_requests++; if (osdc->num_requests == 1) { @@ -1270,8 +1194,7 @@ static void __unregister_request(struct ceph_osd_client *osdc, } dout("__unregister_request %p tid %lld\n", req, req->r_tid); - rb_erase(&req->r_node, &osdc->requests); - RB_CLEAR_NODE(&req->r_node); + erase_request(&osdc->requests, req); osdc->num_requests--; if (req->r_osd) { @@ -1482,7 +1405,7 @@ static int __map_request(struct ceph_osd_client *osdc, req->r_osd = NULL; } - req->r_osd = __lookup_osd(osdc, o); + req->r_osd = lookup_osd(&osdc->osds, o); if (!req->r_osd && o >= 0) { err = -ENOMEM; req->r_osd = create_osd(osdc, o); @@ -1492,7 +1415,7 @@ static int __map_request(struct ceph_osd_client *osdc, } dout("map_request osd %p is osd%d\n", req->r_osd, o); - __insert_osd(osdc, req->r_osd); + insert_osd(&osdc->osds, req->r_osd); ceph_con_open(&req->r_osd->o_con, CEPH_ENTITY_TYPE_OSD, o, @@ -1822,7 +1745,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg) /* lookup */ down_read(&osdc->map_sem); mutex_lock(&osdc->request_mutex); - req = __lookup_request(osdc, tid); + req = lookup_request(&osdc->requests, tid); if (req == NULL) { dout("handle_reply tid %llu dne\n", tid); goto bad_mutex; @@ -2880,7 +2803,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, tid = le64_to_cpu(hdr->tid); mutex_lock(&osdc->request_mutex); - req = __lookup_request(osdc, tid); + req = lookup_request(&osdc->requests, tid); if (!req) { dout("%s osd%d tid %llu unknown, skipping\n", __func__, osd->o_osd, tid); -- cgit v1.2.3 From 985c1673885b77b2e0167c6478a833817d1e2fe5 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:22 +0200 Subject: libceph: fix ceph_eversion encoding eversion_t is version+epoch in userspace and is encoded in that order. ceph_eversion is defined as epoch+version in rados.h, yet we memcpy it in __send_request(). Reoder ceph_eversion fields. Signed-off-by: Ilya Dryomov --- include/linux/ceph/rados.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 2f822dca1046..913c87c26d33 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -114,8 +114,8 @@ struct ceph_object_layout { * compound epoch+version, used by storage layer to serialize mutations */ struct ceph_eversion { - __le32 epoch; __le64 version; + __le32 epoch; } __attribute__ ((packed)); /* -- cgit v1.2.3 From d9591f5e28686277d9312d3c7422faf1368b305e Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:22 +0200 Subject: libceph: rename ceph_oloc_oid_to_pg() Rename ceph_oloc_oid_to_pg() to ceph_object_locator_to_pg(). Emphasise that returned is raw PG and return -ENOENT instead of -EIO if the pool doesn't exist. Signed-off-by: Ilya Dryomov --- fs/ceph/ioctl.c | 2 +- include/linux/ceph/osdmap.h | 9 ++++----- net/ceph/osd_client.c | 4 ++-- net/ceph/osdmap.c | 31 ++++++++++++++++--------------- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index db296709784a..cca7fff22725 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -215,7 +215,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) oloc.pool = ceph_file_layout_pg_pool(ci->i_layout); ceph_oid_printf(&oid, "%s", dl.object_name); - r = ceph_oloc_oid_to_pg(osdc->osdmap, &oloc, &oid, &pgid); + r = ceph_object_locator_to_pg(osdc->osdmap, &oid, &oloc, &pgid); if (r < 0) { up_read(&osdc->map_sem); return r; diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index ce7a41a182d4..b70440c05b49 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -213,11 +213,10 @@ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, u64 off, u64 len, u64 *bno, u64 *oxoff, u64 *oxlen); -/* calculate mapping of object to a placement group */ -extern int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap, - struct ceph_object_locator *oloc, - struct ceph_object_id *oid, - struct ceph_pg *pg_out); +int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + struct ceph_pg *raw_pgid); extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 8256051ed88f..cb9f1953f5fb 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1324,8 +1324,8 @@ static int __calc_request_pg(struct ceph_osdmap *osdmap, /* !pi is caught in ceph_oloc_oid_to_pg() */ } - return ceph_oloc_oid_to_pg(osdmap, &req->r_target_oloc, - &req->r_target_oid, pg_out); + return ceph_object_locator_to_pg(osdmap, &req->r_target_oid, + &req->r_target_oloc, pg_out); } static void __enqueue_request(struct ceph_osd_request *req) diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 9a0cc072a909..6267839cb246 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1545,30 +1545,31 @@ invalid: EXPORT_SYMBOL(ceph_calc_file_object_mapping); /* - * Calculate mapping of a (oloc, oid) pair to a PG. Should only be - * called with target's (oloc, oid), since tiering isn't taken into - * account. + * Map an object into a PG. + * + * Should only be called with target_oid and target_oloc (as opposed to + * base_oid and base_oloc), since tiering isn't taken into account. */ -int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap, - struct ceph_object_locator *oloc, - struct ceph_object_id *oid, - struct ceph_pg *pg_out) +int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + struct ceph_pg *raw_pgid) { struct ceph_pg_pool_info *pi; - pi = __lookup_pg_pool(&osdmap->pg_pools, oloc->pool); + pi = ceph_pg_pool_by_id(osdmap, oloc->pool); if (!pi) - return -EIO; + return -ENOENT; - pg_out->pool = oloc->pool; - pg_out->seed = ceph_str_hash(pi->object_hash, oid->name, - oid->name_len); + raw_pgid->pool = oloc->pool; + raw_pgid->seed = ceph_str_hash(pi->object_hash, oid->name, + oid->name_len); - dout("%s %*pE pgid %llu.%x\n", __func__, oid->name_len, oid->name, -