diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-02 13:10:25 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-02 13:10:25 -0700 |
| commit | 1081230b748de8f03f37f80c53dfa89feda9b8de (patch) | |
| tree | 7238d60e01f0843bad8f03b5d84e4220fbba5e76 /drivers | |
| parent | df910390e2db07a76c87f258475f6c96253cee6c (diff) | |
| parent | 2ca495ac27d245513c11fed70591b1838250e240 (diff) | |
| download | linux-1081230b748de8f03f37f80c53dfa89feda9b8de.tar.gz linux-1081230b748de8f03f37f80c53dfa89feda9b8de.tar.bz2 linux-1081230b748de8f03f37f80c53dfa89feda9b8de.zip | |
Merge branch 'for-4.3/core' of git://git.kernel.dk/linux-block
Pull core block updates from Jens Axboe:
"This first core part of the block IO changes contains:
- Cleanup of the bio IO error signaling from Christoph. We used to
rely on the uptodate bit and passing around of an error, now we
store the error in the bio itself.
- Improvement of the above from myself, by shrinking the bio size
down again to fit in two cachelines on x86-64.
- Revert of the max_hw_sectors cap removal from a revision again,
from Jeff Moyer. This caused performance regressions in various
tests. Reinstate the limit, bump it to a more reasonable size
instead.
- Make /sys/block/<dev>/queue/discard_max_bytes writeable, by me.
Most devices have huge trim limits, which can cause nasty latencies
when deleting files. Enable the admin to configure the size down.
We will look into having a more sane default instead of UINT_MAX
sectors.
- Improvement of the SGP gaps logic from Keith Busch.
- Enable the block core to handle arbitrarily sized bios, which
enables a nice simplification of bio_add_page() (which is an IO hot
path). From Kent.
- Improvements to the partition io stats accounting, making it
faster. From Ming Lei.
- Also from Ming Lei, a basic fixup for overflow of the sysfs pending
file in blk-mq, as well as a fix for a blk-mq timeout race
condition.
- Ming Lin has been carrying Kents above mentioned patches forward
for a while, and testing them. Ming also did a few fixes around
that.
- Sasha Levin found and fixed a use-after-free problem introduced by
the bio->bi_error changes from Christoph.
- Small blk cgroup cleanup from Viresh Kumar"
* 'for-4.3/core' of git://git.kernel.dk/linux-block: (26 commits)
blk: Fix bio_io_vec index when checking bvec gaps
block: Replace SG_GAPS with new queue limits mask
block: bump BLK_DEF_MAX_SECTORS to 2560
Revert "block: remove artifical max_hw_sectors cap"
blk-mq: fix race between timeout and freeing request
blk-mq: fix buffer overflow when reading sysfs file of 'pending'
Documentation: update notes in biovecs about arbitrarily sized bios
block: remove bio_get_nr_vecs()
fs: use helper bio_add_page() instead of open coding on bi_io_vec
block: kill merge_bvec_fn() completely
md/raid5: get rid of bio_fits_rdev()
md/raid5: split bio for chunk_aligned_read
block: remove split code in blkdev_issue_{discard,write_same}
btrfs: remove bio splitting and merge_bvec_fn() calls
bcache: remove driver private bio splitting code
block: simplify bio_add_page()
block: make generic_make_request handle arbitrarily sized bios
blk-cgroup: Drop unlikely before IS_ERR(_OR_NULL)
block: don't access bio->bi_error after bio_put()
block: shrink struct bio down to 2 cache lines again
...
Diffstat (limited to 'drivers')
75 files changed, 446 insertions, 1386 deletions
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 46c282fff104..dd73e1ff1759 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -395,7 +395,7 @@ aoeblk_gdalloc(void *vp) WARN_ON(d->flags & DEVFL_TKILL); WARN_ON(d->gd); WARN_ON(d->flags & DEVFL_UP); - blk_queue_max_hw_sectors(q, 1024); + blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS); q->backing_dev_info.name = "aoe"; q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE; d->bufpool = mp; diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 422b7d84f686..ad80c85e0857 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -1110,7 +1110,7 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail) d->ip.rq = NULL; do { bio = rq->bio; - bok = !fastfail && test_bit(BIO_UPTODATE, &bio->bi_flags); + bok = !fastfail && !bio->bi_error; } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_iter.bi_size)); /* cf. http://lkml.org/lkml/2006/10/31/28 */ @@ -1172,7 +1172,7 @@ ktiocomplete(struct frame *f) ahout->cmdstat, ahin->cmdstat, d->aoemajor, d->aoeminor); noskb: if (buf) - clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); + buf->bio->bi_error = -EIO; goto out; } @@ -1185,7 +1185,7 @@ noskb: if (buf) "aoe: runt data size in read from", (long) d->aoemajor, d->aoeminor, skb->len, n); - clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); + buf->bio->bi_error = -EIO; break; } if (n > f->iter.bi_size) { @@ -1193,7 +1193,7 @@ noskb: if (buf) "aoe: too-large data size in read from", (long) d->aoemajor, d->aoeminor, n, f->iter.bi_size); - clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); + buf->bio->bi_error = -EIO; break; } bvcpy(skb, f->buf->bio, f->iter, n); @@ -1695,7 +1695,7 @@ aoe_failbuf(struct aoedev *d, struct buf *buf) if (buf == NULL) return; buf->iter.bi_size = 0; - clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); + buf->bio->bi_error = -EIO; if (buf->nframesout == 0) aoe_end_buf(d, buf); } diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index e774c50b6842..ffd1947500c6 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -170,7 +170,7 @@ aoe_failip(struct aoedev *d) if (rq == NULL) return; while ((bio = d->ip.nxbio)) { - clear_bit(BIO_UPTODATE, &bio->bi_flags); + bio->bi_error = -EIO; d->ip.nxbio = bio->bi_next; n = (unsigned long) rq->special; rq->special = (void *) --n; diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 64ab4951e9d6..f9ab74505e69 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -331,14 +331,12 @@ static void brd_make_request(struct request_queue *q, struct bio *bio) struct bio_vec bvec; sector_t sector; struct bvec_iter iter; - int err = -EIO; sector = bio->bi_iter.bi_sector; if (bio_end_sector(bio) > get_capacity(bdev->bd_disk)) - goto out; + goto io_error; if (unlikely(bio->bi_rw & REQ_DISCARD)) { - err = 0; discard_from_brd(brd, sector, bio->bi_iter.bi_size); goto out; } @@ -349,15 +347,20 @@ static void brd_make_request(struct request_queue *q, struct bio *bio) bio_for_each_segment(bvec, bio, iter) { unsigned int len = bvec.bv_len; + int err; + err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset, rw, sector); if (err) - break; + goto io_error; sector += len >> SECTOR_SHIFT; } out: - bio_endio(bio, err); + bio_endio(bio); + return; +io_error: + bio_io_error(bio); } static int brd_rw_page(struct block_device *bdev, sector_t sector, @@ -500,7 +503,7 @@ static struct brd_device *brd_alloc(int i) blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE); brd->brd_queue->limits.discard_granularity = PAGE_SIZE; - brd->brd_queue->limits.max_discard_sectors = UINT_MAX; + blk_queue_max_discard_sectors(brd->brd_queue, UINT_MAX); brd->brd_queue->limits.discard_zeroes_data = 1; queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue); diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 1318e3217cb0..b3868e7a1ffd 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -175,11 +175,11 @@ static int _drbd_md_sync_page_io(struct drbd_device *device, atomic_inc(&device->md_io.in_use); /* drbd_md_put_buffer() is in the completion handler */ device->md_io.submit_jif = jiffies; if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) - bio_endio(bio, -EIO); + bio_io_error(bio); else submit_bio(rw, bio); wait_until_done_or_force_detached(device, bdev, &device->md_io.done); - if (bio_flagged(bio, BIO_UPTODATE)) + if (!bio->bi_error) err = device->md_io.error; out: diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 434c77dcc99e..e5e0f19ceda0 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -941,36 +941,27 @@ static void drbd_bm_aio_ctx_destroy(struct kref *kref) } /* bv_page may be a copy, or may be the original */ -static void drbd_bm_endio(struct bio *bio, int error) +static void drbd_bm_endio(struct bio *bio) { struct drbd_bm_aio_ctx *ctx = bio->bi_private; struct drbd_device *device = ctx->device; struct drbd_bitmap *b = device->bitmap; unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page); - int uptodate = bio_flagged(bio, BIO_UPTODATE); - - - /* strange behavior of some lower level drivers... - * fail the request by clearing the uptodate flag, - * but do not return any error?! - * do we want to WARN() on this? */ - if (!error && !uptodate) - error = -EIO; if ((ctx->flags & BM_AIO_COPY_PAGES) == 0 && !bm_test_page_unchanged(b->bm_pages[idx])) drbd_warn(device, "bitmap page idx %u changed during IO!\n", idx); - if (error) { + if (bio->bi_error) { /* ctx error will hold the completed-last non-zero error code, * in case error codes differ. */ - ctx->error = error; + ctx->error = bio->bi_error; bm_set_page_io_err(b->bm_pages[idx]); /* Not identical to on disk version of it. * Is BM_PAGE_IO_ERROR enough? */ if (__ratelimit(&drbd_ratelimit_state)) drbd_err(device, "IO ERROR %d on bitmap page idx %u\n", - error, idx); + bio->bi_error, idx); } else { bm_clear_page_io_err(b->bm_pages[idx]); dynamic_drbd_dbg(device, "bitmap page idx %u completed\n", idx); @@ -1031,7 +1022,7 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) { bio->bi_rw |= rw; - bio_endio(bio, -EIO); + bio_io_error(bio); } else { submit_bio(rw, bio); /* this should not count as user activity and cause the diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index efd19c2da9c2..015c6e91b756 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1450,7 +1450,6 @@ extern void do_submit(struct work_struct *ws); extern void __drbd_make_request(struct drbd_device *, struct bio *, unsigned long); extern void drbd_make_request(struct request_queue *q, struct bio *bio); extern int drbd_read_remote(struct drbd_device *device, struct drbd_request *req); -extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec); extern int is_valid_ar_handle(struct drbd_request *, sector_t); @@ -1481,9 +1480,9 @@ extern int drbd_khelper(struct drbd_device *device, char *cmd); /* drbd_worker.c */ /* bi_end_io handlers */ -extern void drbd_md_endio(struct bio *bio, int error); -extern void drbd_peer_request_endio(struct bio *bio, int error); -extern void drbd_request_endio(struct bio *bio, int error); +extern void drbd_md_endio(struct bio *bio); +extern void drbd_peer_request_endio(struct bio *bio); +extern void drbd_request_endio(struct bio *bio); extern int drbd_worker(struct drbd_thread *thi); enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor); void drbd_resync_after_changed(struct drbd_device *device); @@ -1604,12 +1603,13 @@ static inline void drbd_generic_make_request(struct drbd_device *device, __release(local); if (!bio->bi_bdev) { drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n"); - bio_endio(bio, -ENODEV); + bio->bi_error = -ENODEV; + bio_endio(bio); return; } if (drbd_insert_fault(device, fault_type)) - bio_endio(bio, -EIO); + bio_io_error(bio); else generic_make_request(bio); } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a1518539b858..74d97f4bac34 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2774,7 +2774,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig This triggers a max_bio_size message upon first attach or connect */ blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8); blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); - blk_queue_merge_bvec(q, drbd_merge_bvec); q->queue_lock = &resource->req_lock; device->md_io.page = alloc_page(GFP_KERNEL); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 74df8cfad414..e80cbefbc2b5 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1156,14 +1156,14 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi /* For now, don't allow more than one activity log extent worth of data * to be discarded in one go. We may need to rework drbd_al_begin_io() * to allow for even larger discard ranges */ - q->limits.max_discard_sectors = DRBD_MAX_DISCARD_SECTORS; + blk_queue_max_discard_sectors(q, DRBD_MAX_DISCARD_SECTORS); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); /* REALLY? Is stacking secdiscard "legal"? */ if (blk_queue_secdiscard(b)) queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q); } else { - q->limits.max_discard_sectors = 0; + blk_queue_max_discard_sectors(q, 0); queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); queue_flag_clear_unlocked(QUEUE_FLAG_SECDISCARD, q); } diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 3907202fb9d9..211592682169 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -201,7 +201,8 @@ void start_new_tl_epoch(struct drbd_connection *connection) void complete_master_bio(struct drbd_device *device, struct bio_and_error *m) { - bio_endio(m->bio, m->error); + m->bio->bi_error = m->error; + bio_endio(m->bio); dec_ap_bio(device); } @@ -1153,12 +1154,12 @@ drbd_submit_req_private_bio(struct drbd_request *req) rw == WRITE ? DRBD_FAULT_DT_WR : rw == READ ? DRBD_FAULT_DT_RD : DRBD_FAULT_DT_RA)) - bio_endio(bio, -EIO); + bio_io_error(bio); else generic_make_request(bio); put_ldev(device); } else - bio_endio(bio, -EIO); + bio_io_error(bio); } static void drbd_queue_write(struct drbd_device *device, struct drbd_request *req) @@ -1191,7 +1192,8 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long /* only pass the error to the upper layers. * if user cannot handle io errors, that's not our business. */ drbd_err(device, "could not kmalloc() req\n"); - bio_endio(bio, -ENOMEM); + bio->bi_error = -ENOMEM; + bio_endio(bio); return ERR_PTR(-ENOMEM); } req->start_jif = start_jif; @@ -1497,6 +1499,8 @@ void drbd_make_request(struct request_queue *q, struct bio *bio) struct drbd_device *device = (struct drbd_device *) q->queuedata; unsigned long start_jif; + blk_queue_split(q, &bio, q->bio_split); + start_jif = jiffies; /* @@ -1508,41 +1512,6 @@ void drbd_make_request(struct request_queue *q, struct bio *bio) __drbd_make_request(device, bio, start_jif); } -/* This is called by bio_add_page(). - * - * q->max_hw_sectors and other global limits are already enforced there. - * - * We need to call down to our lower level device, - * in case it has special restrictions. - * - * We also may need to enforce configured max-bio-bvecs limits. - * - * As long as the BIO is empty we have to allow at least one bvec, - * regardless of size and offset, so no need to ask lower levels. - */ -int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec) -{ - struct drbd_device *device = (struct drbd_device *) q->queuedata; - unsigned int bio_size = bvm->bi_size; - int limit = DRBD_MAX_BIO_SIZE; - int backing_limit; - - if (bio_size && get_ldev(device)) { - unsigned int max_hw_sectors = queue_max_hw_sectors(q); - struct request_queue * const b = - device->ldev->backing_bdev->bd_disk->queue; - if (b->merge_bvec_fn) { - bvm->bi_bdev = device->ldev->backing_bdev; - backing_limit = b->merge_bvec_fn(b, bvm, bvec); - limit = min(limit, backing_limit); - } - put_ldev(device); - if ((limit >> 9) > max_hw_sectors) - limit = max_hw_sectors << 9; - } - return limit; -} - void request_timer_fn(unsigned long data) { struct drbd_device *device = (struct drbd_device *) data; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index d0fae55d871d..5578c1477ba6 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -65,12 +65,12 @@ rwlock_t global_state_lock; /* used for synchronous meta data and bitmap IO * submitted by drbd_md_sync_page_io() */ -void drbd_md_endio(struct bio *bio, int error) +void drbd_md_endio(struct bio *bio) { struct drbd_device *device |
