summaryrefslogtreecommitdiff
path: root/drivers/md/dm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r--drivers/md/dm.c121
1 files changed, 94 insertions, 27 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index c987f9ad24a4..fa6839141118 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -590,7 +590,6 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
atomic_set(&io->io_count, 2);
this_cpu_inc(*md->pending_io);
io->orig_bio = bio;
- io->split_bio = NULL;
io->md = md;
spin_lock_init(&io->lock);
io->start_time = jiffies;
@@ -880,13 +879,35 @@ static int __noflush_suspending(struct mapped_device *md)
return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
}
+static void dm_requeue_add_io(struct dm_io *io, bool first_stage)
+{
+ struct mapped_device *md = io->md;
+
+ if (first_stage) {
+ struct dm_io *next = md->requeue_list;
+
+ md->requeue_list = io;
+ io->next = next;
+ } else {
+ bio_list_add_head(&md->deferred, io->orig_bio);
+ }
+}
+
+static void dm_kick_requeue(struct mapped_device *md, bool first_stage)
+{
+ if (first_stage)
+ queue_work(md->wq, &md->requeue_work);
+ else
+ queue_work(md->wq, &md->work);
+}
+
/*
* Return true if the dm_io's original bio is requeued.
* io->status is updated with error if requeue disallowed.
*/
-static bool dm_handle_requeue(struct dm_io *io)
+static bool dm_handle_requeue(struct dm_io *io, bool first_stage)
{
- struct bio *bio = io->split_bio ? io->split_bio : io->orig_bio;
+ struct bio *bio = io->orig_bio;
bool handle_requeue = (io->status == BLK_STS_DM_REQUEUE);
bool handle_polled_eagain = ((io->status == BLK_STS_AGAIN) &&
(bio->bi_opf & REQ_POLLED));
@@ -912,8 +933,8 @@ static bool dm_handle_requeue(struct dm_io *io)
spin_lock_irqsave(&md->deferred_lock, flags);
if ((__noflush_suspending(md) &&
!WARN_ON_ONCE(dm_is_zone_write(md, bio))) ||
- handle_polled_eagain) {
- bio_list_add_head(&md->deferred, bio);
+ handle_polled_eagain || first_stage) {
+ dm_requeue_add_io(io, first_stage);
requeued = true;
} else {
/*
@@ -926,19 +947,21 @@ static bool dm_handle_requeue(struct dm_io *io)
}
if (requeued)
- queue_work(md->wq, &md->work);
+ dm_kick_requeue(md, first_stage);
return requeued;
}
-static void dm_io_complete(struct dm_io *io)
+static void __dm_io_complete(struct dm_io *io, bool first_stage)
{
- struct bio *bio = io->split_bio ? io->split_bio : io->orig_bio;
+ struct bio *bio = io->orig_bio;
struct mapped_device *md = io->md;
blk_status_t io_error;
bool requeued;
- requeued = dm_handle_requeue(io);
+ requeued = dm_handle_requeue(io, first_stage);
+ if (requeued && first_stage)
+ return;
io_error = io->status;
if (dm_io_flagged(io, DM_IO_ACCOUNTED))
@@ -978,6 +1001,58 @@ static void dm_io_complete(struct dm_io *io)
}
}
+static void dm_wq_requeue_work(struct work_struct *work)
+{
+ struct mapped_device *md = container_of(work, struct mapped_device,
+ requeue_work);
+ unsigned long flags;
+ struct dm_io *io;
+
+ /* reuse deferred lock to simplify dm_handle_requeue */
+ spin_lock_irqsave(&md->deferred_lock, flags);
+ io = md->requeue_list;
+ md->requeue_list = NULL;
+ spin_unlock_irqrestore(&md->deferred_lock, flags);
+
+ while (io) {
+ struct dm_io *next = io->next;
+
+ dm_io_rewind(io, &md->queue->bio_split);
+
+ io->next = NULL;
+ __dm_io_complete(io, false);
+ io = next;
+ }
+}
+
+/*
+ * Two staged requeue:
+ *
+ * 1) io->orig_bio points to the real original bio, and the part mapped to
+ * this io must be requeued, instead of other parts of the original bio.
+ *
+ * 2) io->orig_bio points to new cloned bio which matches the requeued dm_io.
+ */
+static void dm_io_complete(struct dm_io *io)
+{
+ bool first_requeue;
+
+ /*
+ * Only dm_io that has been split needs two stage requeue, otherwise
+ * we may run into long bio clone chain during suspend and OOM could
+ * be triggered.
+ *
+ * Also flush data dm_io won't be marked as DM_IO_WAS_SPLIT, so they
+ * also aren't handled via the first stage requeue.
+ */
+ if (dm_io_flagged(io, DM_IO_WAS_SPLIT))
+ first_requeue = true;
+ else
+ first_requeue = false;
+
+ __dm_io_complete(io, first_requeue);
+}
+
/*
* Decrements the number of outstanding ios that a bio has been
* cloned into, completing the original io if necc.
@@ -1256,6 +1331,7 @@ out:
void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
{
struct dm_target_io *tio = clone_to_tio(bio);
+ struct dm_io *io = tio->io;
unsigned bio_sectors = bio_sectors(bio);
BUG_ON(dm_tio_flagged(tio, DM_TIO_IS_DUPLICATE_BIO));
@@ -1271,8 +1347,9 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
* __split_and_process_bio() may have already saved mapped part
* for accounting but it is being reduced so update accordingly.
*/
- dm_io_set_flag(tio->io, DM_IO_WAS_SPLIT);
- tio->io->sectors = n_sectors;
+ dm_io_set_flag(io, DM_IO_WAS_SPLIT);
+ io->sectors = n_sectors;
+ io->sector_offset = bio_sectors(io->orig_bio);
}
EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
@@ -1395,17 +1472,7 @@ static void setup_split_accounting(struct clone_info *ci, unsigned len)
*/
dm_io_set_flag(io, DM_IO_WAS_SPLIT);
io->sectors = len;
- }
-
- if (static_branch_unlikely(&stats_enabled) &&
- unlikely(dm_stats_used(&io->md->stats))) {
- /*
- * Save bi_sector in terms of its offset from end of
- * original bio, only needed for DM-stats' benefit.
- * - saved regardless of whether split needed so that
- * dm_accept_partial_bio() doesn't need to.
- */
- io->sector_offset = bio_end_sector(ci->bio) - ci->sector;
+ io->sector_offset = bio_sectors(ci->bio);
}
}
@@ -1705,11 +1772,9 @@ static void dm_split_and_process_bio(struct mapped_device *md,
* Remainder must be passed to submit_bio_noacct() so it gets handled
* *after* bios already submitted have been completely processed.
*/
- WARN_ON_ONCE(!dm_io_flagged(io, DM_IO_WAS_SPLIT));
- io->split_bio = bio_split(bio, io->sectors, GFP_NOIO,
- &md->queue->bio_split);
- bio_chain(io->split_bio, bio);
- trace_block_split(io->split_bio, bio->bi_iter.bi_sector);
+ bio_trim(bio, io->sectors, ci.sector_count);
+ trace_block_split(bio, bio->bi_iter.bi_sector);
+ bio_inc_remaining(bio);
submit_bio_noacct(bio);
out:
/*
@@ -1985,9 +2050,11 @@ static struct mapped_device *alloc_dev(int minor)
init_waitqueue_head(&md->wait);
INIT_WORK(&md->work, dm_wq_work);
+ INIT_WORK(&md->requeue_work, dm_wq_requeue_work);
init_waitqueue_head(&md->eventq);
init_completion(&md->kobj_holder.completion);
+ md->requeue_list = NULL;
md->swap_bios = get_swap_bios();
sema_init(&md->swap_bios_semaphore, md->swap_bios);
mutex_init(&md->swap_bios_lock);