diff options
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r-- | drivers/md/dm.c | 121 |
1 files changed, 94 insertions, 27 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index c987f9ad24a4..fa6839141118 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -590,7 +590,6 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) atomic_set(&io->io_count, 2); this_cpu_inc(*md->pending_io); io->orig_bio = bio; - io->split_bio = NULL; io->md = md; spin_lock_init(&io->lock); io->start_time = jiffies; @@ -880,13 +879,35 @@ static int __noflush_suspending(struct mapped_device *md) return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); } +static void dm_requeue_add_io(struct dm_io *io, bool first_stage) +{ + struct mapped_device *md = io->md; + + if (first_stage) { + struct dm_io *next = md->requeue_list; + + md->requeue_list = io; + io->next = next; + } else { + bio_list_add_head(&md->deferred, io->orig_bio); + } +} + +static void dm_kick_requeue(struct mapped_device *md, bool first_stage) +{ + if (first_stage) + queue_work(md->wq, &md->requeue_work); + else + queue_work(md->wq, &md->work); +} + /* * Return true if the dm_io's original bio is requeued. * io->status is updated with error if requeue disallowed. */ -static bool dm_handle_requeue(struct dm_io *io) +static bool dm_handle_requeue(struct dm_io *io, bool first_stage) { - struct bio *bio = io->split_bio ? io->split_bio : io->orig_bio; + struct bio *bio = io->orig_bio; bool handle_requeue = (io->status == BLK_STS_DM_REQUEUE); bool handle_polled_eagain = ((io->status == BLK_STS_AGAIN) && (bio->bi_opf & REQ_POLLED)); @@ -912,8 +933,8 @@ static bool dm_handle_requeue(struct dm_io *io) spin_lock_irqsave(&md->deferred_lock, flags); if ((__noflush_suspending(md) && !WARN_ON_ONCE(dm_is_zone_write(md, bio))) || - handle_polled_eagain) { - bio_list_add_head(&md->deferred, bio); + handle_polled_eagain || first_stage) { + dm_requeue_add_io(io, first_stage); requeued = true; } else { /* @@ -926,19 +947,21 @@ static bool dm_handle_requeue(struct dm_io *io) } if (requeued) - queue_work(md->wq, &md->work); + dm_kick_requeue(md, first_stage); return requeued; } -static void dm_io_complete(struct dm_io *io) +static void __dm_io_complete(struct dm_io *io, bool first_stage) { - struct bio *bio = io->split_bio ? io->split_bio : io->orig_bio; + struct bio *bio = io->orig_bio; struct mapped_device *md = io->md; blk_status_t io_error; bool requeued; - requeued = dm_handle_requeue(io); + requeued = dm_handle_requeue(io, first_stage); + if (requeued && first_stage) + return; io_error = io->status; if (dm_io_flagged(io, DM_IO_ACCOUNTED)) @@ -978,6 +1001,58 @@ static void dm_io_complete(struct dm_io *io) } } +static void dm_wq_requeue_work(struct work_struct *work) +{ + struct mapped_device *md = container_of(work, struct mapped_device, + requeue_work); + unsigned long flags; + struct dm_io *io; + + /* reuse deferred lock to simplify dm_handle_requeue */ + spin_lock_irqsave(&md->deferred_lock, flags); + io = md->requeue_list; + md->requeue_list = NULL; + spin_unlock_irqrestore(&md->deferred_lock, flags); + + while (io) { + struct dm_io *next = io->next; + + dm_io_rewind(io, &md->queue->bio_split); + + io->next = NULL; + __dm_io_complete(io, false); + io = next; + } +} + +/* + * Two staged requeue: + * + * 1) io->orig_bio points to the real original bio, and the part mapped to + * this io must be requeued, instead of other parts of the original bio. + * + * 2) io->orig_bio points to new cloned bio which matches the requeued dm_io. + */ +static void dm_io_complete(struct dm_io *io) +{ + bool first_requeue; + + /* + * Only dm_io that has been split needs two stage requeue, otherwise + * we may run into long bio clone chain during suspend and OOM could + * be triggered. + * + * Also flush data dm_io won't be marked as DM_IO_WAS_SPLIT, so they + * also aren't handled via the first stage requeue. + */ + if (dm_io_flagged(io, DM_IO_WAS_SPLIT)) + first_requeue = true; + else + first_requeue = false; + + __dm_io_complete(io, first_requeue); +} + /* * Decrements the number of outstanding ios that a bio has been * cloned into, completing the original io if necc. @@ -1256,6 +1331,7 @@ out: void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) { struct dm_target_io *tio = clone_to_tio(bio); + struct dm_io *io = tio->io; unsigned bio_sectors = bio_sectors(bio); BUG_ON(dm_tio_flagged(tio, DM_TIO_IS_DUPLICATE_BIO)); @@ -1271,8 +1347,9 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) * __split_and_process_bio() may have already saved mapped part * for accounting but it is being reduced so update accordingly. */ - dm_io_set_flag(tio->io, DM_IO_WAS_SPLIT); - tio->io->sectors = n_sectors; + dm_io_set_flag(io, DM_IO_WAS_SPLIT); + io->sectors = n_sectors; + io->sector_offset = bio_sectors(io->orig_bio); } EXPORT_SYMBOL_GPL(dm_accept_partial_bio); @@ -1395,17 +1472,7 @@ static void setup_split_accounting(struct clone_info *ci, unsigned len) */ dm_io_set_flag(io, DM_IO_WAS_SPLIT); io->sectors = len; - } - - if (static_branch_unlikely(&stats_enabled) && - unlikely(dm_stats_used(&io->md->stats))) { - /* - * Save bi_sector in terms of its offset from end of - * original bio, only needed for DM-stats' benefit. - * - saved regardless of whether split needed so that - * dm_accept_partial_bio() doesn't need to. - */ - io->sector_offset = bio_end_sector(ci->bio) - ci->sector; + io->sector_offset = bio_sectors(ci->bio); } } @@ -1705,11 +1772,9 @@ static void dm_split_and_process_bio(struct mapped_device *md, * Remainder must be passed to submit_bio_noacct() so it gets handled * *after* bios already submitted have been completely processed. */ - WARN_ON_ONCE(!dm_io_flagged(io, DM_IO_WAS_SPLIT)); - io->split_bio = bio_split(bio, io->sectors, GFP_NOIO, - &md->queue->bio_split); - bio_chain(io->split_bio, bio); - trace_block_split(io->split_bio, bio->bi_iter.bi_sector); + bio_trim(bio, io->sectors, ci.sector_count); + trace_block_split(bio, bio->bi_iter.bi_sector); + bio_inc_remaining(bio); submit_bio_noacct(bio); out: /* @@ -1985,9 +2050,11 @@ static struct mapped_device *alloc_dev(int minor) init_waitqueue_head(&md->wait); INIT_WORK(&md->work, dm_wq_work); + INIT_WORK(&md->requeue_work, dm_wq_requeue_work); init_waitqueue_head(&md->eventq); init_completion(&md->kobj_holder.completion); + md->requeue_list = NULL; md->swap_bios = get_swap_bios(); sema_init(&md->swap_bios_semaphore, md->swap_bios); mutex_init(&md->swap_bios_lock); |