diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-08-29 20:21:42 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-08-29 20:21:42 -0700 |
commit | 3d3dfeb3aec7b612d266d500c82054f1fded4980 (patch) | |
tree | 11649eab5c74deb74e6e0879613e8053ae3b9970 /drivers/md/raid1.c | |
parent | c1b7fcf3f6d94c2c3528bf77054bf174a5ef63d7 (diff) | |
parent | 146afeb235ccec10c17ad8ea26327c0c79dbd968 (diff) | |
download | linux-3d3dfeb3aec7b612d266d500c82054f1fded4980.tar.gz linux-3d3dfeb3aec7b612d266d500c82054f1fded4980.tar.bz2 linux-3d3dfeb3aec7b612d266d500c82054f1fded4980.zip |
Merge tag 'for-6.6/block-2023-08-28' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
"Pretty quiet round for this release. This contains:
- Add support for zoned storage to ublk (Andreas, Ming)
- Series improving performance for drivers that mark themselves as
needing a blocking context for issue (Bart)
- Cleanup the flush logic (Chengming)
- sed opal keyring support (Greg)
- Fixes and improvements to the integrity support (Jinyoung)
- Add some exports for bcachefs that we can hopefully delete again in
the future (Kent)
- deadline throttling fix (Zhiguo)
- Series allowing building the kernel without buffer_head support
(Christoph)
- Sanitize the bio page adding flow (Christoph)
- Write back cache fixes (Christoph)
- MD updates via Song:
- Fix perf regression for raid0 large sequential writes (Jan)
- Fix split bio iostat for raid0 (David)
- Various raid1 fixes (Heinz, Xueshi)
- raid6test build fixes (WANG)
- Deprecate bitmap file support (Christoph)
- Fix deadlock with md sync thread (Yu)
- Refactor md io accounting (Yu)
- Various non-urgent fixes (Li, Yu, Jack)
- Various fixes and cleanups (Arnd, Azeem, Chengming, Damien, Li,
Ming, Nitesh, Ruan, Tejun, Thomas, Xu)"
* tag 'for-6.6/block-2023-08-28' of git://git.kernel.dk/linux: (113 commits)
block: use strscpy() to instead of strncpy()
block: sed-opal: keyring support for SED keys
block: sed-opal: Implement IOC_OPAL_REVERT_LSP
block: sed-opal: Implement IOC_OPAL_DISCOVERY
blk-mq: prealloc tags when increase tagset nr_hw_queues
blk-mq: delete redundant tagset map update when fallback
blk-mq: fix tags leak when shrink nr_hw_queues
ublk: zoned: support REQ_OP_ZONE_RESET_ALL
md: raid0: account for split bio in iostat accounting
md/raid0: Fix performance regression for large sequential writes
md/raid0: Factor out helper for mapping and submitting a bio
md raid1: allow writebehind to work on any leg device set WriteMostly
md/raid1: hold the barrier until handle_read_error() finishes
md/raid1: free the r1bio before waiting for blocked rdev
md/raid1: call free_r1bio() before allow_barrier() in raid_end_bio_io()
blk-cgroup: Fix NULL deref caused by blkg_policy_data being installed before init
drivers/rnbd: restore sysfs interface to rnbd-client
md/raid5-cache: fix null-ptr-deref for r5l_flush_stripe_to_raid()
raid6: test: only check for Altivec if building on powerpc hosts
raid6: test: make sure all intermediate and artifact files are .gitignored
...
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r-- | drivers/md/raid1.c | 86 |
1 files changed, 50 insertions, 36 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index dd25832eb045..4b30a1742162 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -304,8 +304,6 @@ static void call_bio_endio(struct r1bio *r1_bio) if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) bio->bi_status = BLK_STS_IOERR; - if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) - bio_end_io_acct(bio, r1_bio->start_time); bio_endio(bio); } @@ -313,6 +311,7 @@ static void raid_end_bio_io(struct r1bio *r1_bio) { struct bio *bio = r1_bio->master_bio; struct r1conf *conf = r1_bio->mddev->private; + sector_t sector = r1_bio->sector; /* if nobody has done the final endio yet, do it now */ if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) { @@ -323,13 +322,13 @@ static void raid_end_bio_io(struct r1bio *r1_bio) call_bio_endio(r1_bio); } + + free_r1bio(r1_bio); /* * Wake up any possible resync thread that waits for the device * to go idle. All I/Os, even write-behind writes, are done. */ - allow_barrier(conf, r1_bio->sector); - - free_r1bio(r1_bio); + allow_barrier(conf, sector); } /* @@ -791,11 +790,17 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect return best_disk; } +static void wake_up_barrier(struct r1conf *conf) +{ + if (wq_has_sleeper(&conf->wait_barrier)) + wake_up(&conf->wait_barrier); +} + static void flush_bio_list(struct r1conf *conf, struct bio *bio) { /* flush any pending bitmap writes to disk before proceeding w/ I/O */ raid1_prepare_flush_writes(conf->mddev->bitmap); - wake_up(&conf->wait_barrier); + wake_up_barrier(conf); while (bio) { /* submit pending writes */ struct bio *next = bio->bi_next; @@ -972,7 +977,7 @@ static bool _wait_barrier(struct r1conf *conf, int idx, bool nowait) * In case freeze_array() is waiting for * get_unqueued_pending() == extra */ - wake_up(&conf->wait_barrier); + wake_up_barrier(conf); /* Wait for the barrier in same barrier unit bucket to drop. */ /* Return false when nowait flag is set */ @@ -1015,7 +1020,7 @@ static bool wait_read_barrier(struct r1conf *conf, sector_t sector_nr, bool nowa * In case freeze_array() is waiting for * get_unqueued_pending() == extra */ - wake_up(&conf->wait_barrier); + wake_up_barrier(conf); /* Wait for array to be unfrozen */ /* Return false when nowait flag is set */ @@ -1044,7 +1049,7 @@ static bool wait_barrier(struct r1conf *conf, sector_t sector_nr, bool nowait) static void _allow_barrier(struct r1conf *conf, int idx) { atomic_dec(&conf->nr_pending[idx]); - wake_up(&conf->wait_barrier); + wake_up_barrier(conf); } static void allow_barrier(struct r1conf *conf, sector_t sector_nr) @@ -1173,7 +1178,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) spin_lock_irq(&conf->device_lock); bio_list_merge(&conf->pending_bio_list, &plug->pending); spin_unlock_irq(&conf->device_lock); - wake_up(&conf->wait_barrier); + wake_up_barrier(conf); md_wakeup_thread(mddev->thread); kfree(plug); return; @@ -1303,10 +1308,10 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, } r1_bio->read_disk = rdisk; - - if (!r1bio_existed && blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) - r1_bio->start_time = bio_start_io_acct(bio); - + if (!r1bio_existed) { + md_account_bio(mddev, &bio); + r1_bio->master_bio = bio; + } read_bio = bio_alloc_clone(mirror->rdev->bdev, bio, gfp, &mddev->bio_set); @@ -1373,6 +1378,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, return; } + retry_write: r1_bio = alloc_r1bio(mddev, bio); r1_bio->sectors = max_write_sectors; @@ -1388,7 +1394,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, */ disks = conf->raid_disks * 2; - retry_write: blocked_rdev = NULL; rcu_read_lock(); max_sectors = r1_bio->sectors; @@ -1468,7 +1473,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, for (j = 0; j < i; j++) if (r1_bio->bios[j]) rdev_dec_pending(conf->mirrors[j].rdev, mddev); - r1_bio->state = 0; + free_r1bio(r1_bio); allow_barrier(conf, bio->bi_iter.bi_sector); if (bio->bi_opf & REQ_NOWAIT) { @@ -1500,8 +1505,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, r1_bio->sectors = max_sectors; } - if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) - r1_bio->start_time = bio_start_io_acct(bio); + md_account_bio(mddev, &bio); + r1_bio->master_bio = bio; atomic_set(&r1_bio->remaining, 1); atomic_set(&r1_bio->behind_remaining, 0); @@ -1518,8 +1523,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, * Not if there are too many, or cannot * allocate memory, or a reader on WriteMostly * is waiting for behind writes to flush */ - if (bitmap && - test_bit(WriteMostly, &rdev->flags) && + if (bitmap && write_behind && (atomic_read(&bitmap->behind_writes) < mddev->bitmap_info.max_write_behind) && !waitqueue_active(&bitmap->behind_wait)) { @@ -1576,7 +1580,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, r1_bio_write_done(r1_bio); /* In case raid1d snuck in to freeze_array */ - wake_up(&conf->wait_barrier); + wake_up_barrier(conf); } static bool raid1_make_request(struct mddev *mddev, struct bio *bio) @@ -1766,7 +1770,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) { struct r1conf *conf = mddev->private; int err = -EEXIST; - int mirror = 0; + int mirror = 0, repl_slot = -1; struct raid1_info *p; int first = 0; int last = conf->raid_disks - 1; @@ -1809,17 +1813,21 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) break; } if (test_bit(WantReplacement, &p->rdev->flags) && - p[conf->raid_disks].rdev == NULL) { - /* Add this device as a replacement */ - clear_bit(In_sync, &rdev->flags); - set_bit(Replacement, &rdev->flags); - rdev->raid_disk = mirror; - err = 0; - conf->fullsync = 1; - rcu_assign_pointer(p[conf->raid_disks].rdev, rdev); - break; - } + p[conf->raid_disks].rdev == NULL && repl_slot < 0) + repl_slot = mirror; } + + if (err && repl_slot >= 0) { + /* Add this device as a replacement */ + p = conf->mirrors + repl_slot; + clear_bit(In_sync, &rdev->flags); + set_bit(Replacement, &rdev->flags); + rdev->raid_disk = repl_slot; + err = 0; + conf->fullsync = 1; + rcu_assign_pointer(p[conf->raid_disks].rdev, rdev); + } + print_conf(conf); return err; } @@ -1829,6 +1837,10 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev) struct r1conf *conf = mddev->private; int err = 0; int number = rdev->raid_disk; + + if (unlikely(number >= conf->raid_disks)) + goto abort; + struct raid1_info *p = conf->mirrors + number; if (rdev != p->rdev) @@ -2299,7 +2311,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk, d++; if (d == conf->raid_disks * 2) d = 0; - } while (!success && d != read_disk); + } while (d != read_disk); if (!success) { /* Cannot read from anywhere - mark it bad */ @@ -2498,6 +2510,7 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) struct mddev *mddev = conf->mddev; struct bio *bio; struct md_rdev *rdev; + sector_t sector; clear_bit(R1BIO_ReadError, &r1_bio->state); /* we got a read error. Maybe the drive is bad. Maybe just @@ -2527,12 +2540,13 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) } rdev_dec_pending(rdev, conf->mddev); - allow_barrier(conf, r1_bio->sector); + sector = r1_bio->sector; bio = r1_bio->master_bio; /* Reuse the old r1_bio so that the IO_BLOCKED settings are preserved */ r1_bio->state = 0; raid1_read_request(mddev, bio, r1_bio->sectors, r1_bio); + allow_barrier(conf, sector); } static void raid1d(struct md_thread *thread) @@ -3144,7 +3158,7 @@ static int raid1_run(struct mddev *mddev) * RAID1 needs at least one disk in active */ if (conf->raid_disks - mddev->degraded < 1) { - md_unregister_thread(&conf->thread); + md_unregister_thread(mddev, &conf->thread); ret = -EINVAL; goto abort; } @@ -3171,7 +3185,7 @@ static int raid1_run(struct mddev *mddev) ret = md_integrity_register(mddev); if (ret) { - md_unregister_thread(&mddev->thread); + md_unregister_thread(mddev, &mddev->thread); goto abort; } return 0; |