diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-11-30 15:47:29 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-11-30 15:47:29 -0800 |
| commit | cfd47302ac64b595beb0a67a337b81942146448a (patch) | |
| tree | bf2e3c251f9f333aa99e52013328b5f77ab77a82 | |
| parent | dd54fcced81d479d77acbeb4eea74b9ab9276bff (diff) | |
| parent | 82734209bedd65a8b508844bab652b464379bfdd (diff) | |
| download | linux-cfd47302ac64b595beb0a67a337b81942146448a.tar.gz linux-cfd47302ac64b595beb0a67a337b81942146448a.tar.bz2 linux-cfd47302ac64b595beb0a67a337b81942146448a.zip | |
Merge tag 'block-6.13-20242901' of git://git.kernel.dk/linux
Pull more block updates from Jens Axboe:
- NVMe pull request via Keith:
- Use correct srcu list traversal (Breno)
- Scatter-gather support for metadata (Keith)
- Fabrics shutdown race condition fix (Nilay)
- Persistent reservations updates (Guixin)
- Add the required bits for MD atomic write support for raid0/1/10
- Correct return value for unknown opcode in ublk
- Fix deadlock with zone revalidation
- Fix for the io priority request vs bio cleanups
- Use the correct unsigned int type for various limit helpers
- Fix for a race in loop
- Cleanup blk_rq_prep_clone() to prevent uninit-value warning and make
it easier for actual humans to read
- Fix potential UAF when iterating tags
- A few fixes for bfq-iosched UAF issues
- Fix for brd discard not decrementing the allocated page count
- Various little fixes and cleanups
* tag 'block-6.13-20242901' of git://git.kernel.dk/linux: (36 commits)
brd: decrease the number of allocated pages which discarded
block, bfq: fix bfqq uaf in bfq_limit_depth()
block: Don't allow an atomic write be truncated in blkdev_write_iter()
mq-deadline: don't call req_get_ioprio from the I/O completion handler
block: Prevent potential deadlock in blk_revalidate_disk_zones()
block: Remove extra part pointer NULLify in blk_rq_init()
nvme: tuning pr code by using defined structs and macros
nvme: introduce change ptpl and iekey definition
block: return bool from get_disk_ro and bdev_read_only
block: remove a duplicate definition for bdev_read_only
block: return bool from blk_rq_aligned
block: return unsigned int from blk_lim_dma_alignment_and_pad
block: return unsigned int from queue_dma_alignment
block: return unsigned int from bdev_io_opt
block: req->bio is always set in the merge code
block: don't bother checking the data direction for merges
block: blk-mq: fix uninit-value in blk_rq_prep_clone and refactor
Revert "block, bfq: merge bfq_release_process_ref() into bfq_put_cooperator()"
md/raid10: Atomic write support
md/raid1: Atomic write support
...
| -rw-r--r-- | block/bfq-cgroup.c | 1 | ||||
| -rw-r--r-- | block/bfq-iosched.c | 43 | ||||
| -rw-r--r-- | block/blk-merge.c | 35 | ||||
| -rw-r--r-- | block/blk-mq.c | 14 | ||||
| -rw-r--r-- | block/blk-settings.c | 141 | ||||
| -rw-r--r-- | block/blk-sysfs.c | 6 | ||||
| -rw-r--r-- | block/blk-zoned.c | 14 | ||||
| -rw-r--r-- | block/fops.c | 5 | ||||
| -rw-r--r-- | block/genhd.c | 9 | ||||
| -rw-r--r-- | block/mq-deadline.c | 13 | ||||
| -rw-r--r-- | drivers/block/brd.c | 4 | ||||
| -rw-r--r-- | drivers/block/loop.c | 30 | ||||
| -rw-r--r-- | drivers/block/ublk_drv.c | 2 | ||||
| -rw-r--r-- | drivers/md/raid0.c | 1 | ||||
| -rw-r--r-- | drivers/md/raid1.c | 20 | ||||
| -rw-r--r-- | drivers/md/raid10.c | 20 | ||||
| -rw-r--r-- | drivers/nvme/host/core.c | 22 | ||||
| -rw-r--r-- | drivers/nvme/host/ioctl.c | 12 | ||||
| -rw-r--r-- | drivers/nvme/host/multipath.c | 21 | ||||
| -rw-r--r-- | drivers/nvme/host/nvme.h | 10 | ||||
| -rw-r--r-- | drivers/nvme/host/pci.c | 147 | ||||
| -rw-r--r-- | drivers/nvme/host/pr.c | 122 | ||||
| -rw-r--r-- | drivers/nvme/host/rdma.c | 4 | ||||
| -rw-r--r-- | drivers/nvme/target/admin-cmd.c | 7 | ||||
| -rw-r--r-- | include/linux/blkdev.h | 20 | ||||
| -rw-r--r-- | include/linux/nvme.h | 14 | ||||
| -rw-r--r-- | rust/kernel/block/mq/gen_disk.rs | 2 |
27 files changed, 547 insertions, 192 deletions
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index e831aedb4643..9fb9f3533150 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -736,6 +736,7 @@ static void bfq_sync_bfqq_move(struct bfq_data *bfqd, */ bfq_put_cooperator(sync_bfqq); bic_set_bfqq(bic, NULL, true, act_idx); + bfq_release_process_ref(bfqd, sync_bfqq); } } diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 0747d9d0e48c..95dd7b795935 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -582,23 +582,31 @@ static struct request *bfq_choose_req(struct bfq_data *bfqd, #define BFQ_LIMIT_INLINE_DEPTH 16 #ifdef CONFIG_BFQ_GROUP_IOSCHED -static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit) +static bool bfqq_request_over_limit(struct bfq_data *bfqd, + struct bfq_io_cq *bic, blk_opf_t opf, + unsigned int act_idx, int limit) { - struct bfq_data *bfqd = bfqq->bfqd; - struct bfq_entity *entity = &bfqq->entity; struct bfq_entity *inline_entities[BFQ_LIMIT_INLINE_DEPTH]; struct bfq_entity **entities = inline_entities; - int depth, level, alloc_depth = BFQ_LIMIT_INLINE_DEPTH; - int class_idx = bfqq->ioprio_class - 1; + int alloc_depth = BFQ_LIMIT_INLINE_DEPTH; struct bfq_sched_data *sched_data; + struct bfq_entity *entity; + struct bfq_queue *bfqq; unsigned long wsum; bool ret = false; - - if (!entity->on_st_or_in_serv) - return false; + int depth; + int level; retry: spin_lock_irq(&bfqd->lock); + bfqq = bic_to_bfqq(bic, op_is_sync(opf), act_idx); + if (!bfqq) + goto out; + + entity = &bfqq->entity; + if (!entity->on_st_or_in_serv) + goto out; + /* +1 for bfqq entity, root cgroup not included */ depth = bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css.cgroup->level + 1; if (depth > alloc_depth) { @@ -643,7 +651,7 @@ retry: * class. */ wsum = 0; - for (i = 0; i <= class_idx; i++) { + for (i = 0; i <= bfqq->ioprio_class - 1; i++) { wsum = wsum * IOPRIO_BE_NR + sched_data->service_tree[i].wsum; } @@ -666,7 +674,9 @@ out: return ret; } #else -static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit) +static bool bfqq_request_over_limit(struct bfq_data *bfqd, + struct bfq_io_cq *bic, blk_opf_t opf, + unsigned int act_idx, int limit) { return false; } @@ -704,8 +714,9 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) } for (act_idx = 0; bic && act_idx < bfqd->num_actuators; act_idx++) { - struct bfq_queue *bfqq = - bic_to_bfqq(bic, op_is_sync(opf), act_idx); + /* Fast path to check if bfqq is already allocated. */ + if (!bic_to_bfqq(bic, op_is_sync(opf), act_idx)) + continue; /* * Does queue (or any parent entity) exceed number of @@ -713,7 +724,7 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) * limit depth so that it cannot consume more * available requests and thus starve other entities. */ - if (bfqq && bfqq_request_over_limit(bfqq, limit)) { + if (bfqq_request_over_limit(bfqd, bic, opf, act_idx, limit)) { depth = 1; break; } @@ -5434,8 +5445,6 @@ void bfq_put_cooperator(struct bfq_queue *bfqq) bfq_put_queue(__bfqq); __bfqq = next; } - - bfq_release_process_ref(bfqq->bfqd, bfqq); } static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) @@ -5448,6 +5457,8 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, bfqq->ref); bfq_put_cooperator(bfqq); + + bfq_release_process_ref(bfqd, bfqq); } static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync, @@ -6734,6 +6745,8 @@ bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq) bic_set_bfqq(bic, NULL, true, bfqq->actuator_idx); bfq_put_cooperator(bfqq); + + bfq_release_process_ref(bfqq->bfqd, bfqq); return NULL; } diff --git a/block/blk-merge.c b/block/blk-merge.c index e0b28e9298c9..e01383c6e534 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -864,17 +864,10 @@ static struct request *attempt_merge(struct request_queue *q, if (req_op(req) != req_op(next)) return NULL; - if (rq_data_dir(req) != rq_data_dir(next)) + if (req->bio->bi_write_hint != next->bio->bi_write_hint) + return NULL; + if (req->bio->bi_ioprio != next->bio->bi_ioprio) return NULL; - - if (req->bio && next->bio) { - /* Don't merge requests with different write hints. */ - if (req->bio->bi_write_hint != next->bio->bi_write_hint) - return NULL; - if (req->bio->bi_ioprio != next->bio->bi_ioprio) - return NULL; - } - if (!blk_atomic_write_mergeable_rqs(req, next)) return NULL; @@ -986,30 +979,16 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) if (req_op(rq) != bio_op(bio)) return false; - /* different data direction or already started, don't merge */ - if (bio_data_dir(bio) != rq_data_dir(rq)) - return false; - - /* don't merge across cgroup boundaries */ if (!blk_cgroup_mergeable(rq, bio)) return false; - - /* only merge integrity protected bio into ditto rq */ if (blk_integrity_merge_bio(rq->q, rq, bio) == false) return false; - - /* Only merge if the crypt contexts are compatible */ if (!bio_crypt_rq_ctx_compatible(rq, bio)) return false; - - if (rq->bio) { - /* Don't merge requests with different write hints. */ - if (rq->bio->bi_write_hint != bio->bi_write_hint) - return false; - if (rq->bio->bi_ioprio != bio->bi_ioprio) - return false; - } - + if (rq->bio->bi_write_hint != bio->bi_write_hint) + return false; + if (rq->bio->bi_ioprio != bio->bi_ioprio) + return false; if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false) return false; diff --git a/block/blk-mq.c b/block/blk-mq.c index 270cfd9fc6b0..424239c075e2 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -388,7 +388,6 @@ void blk_rq_init(struct request_queue *q, struct request *rq) rq->tag = BLK_MQ_NO_TAG; rq->internal_tag = BLK_MQ_NO_TAG; rq->start_time_ns = blk_time_get_ns(); - rq->part = NULL; blk_crypto_rq_set_defaults(rq); } EXPORT_SYMBOL(blk_rq_init); @@ -3273,19 +3272,21 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, int (*bio_ctr)(struct bio *, struct bio *, void *), void *data) { - struct bio *bio, *bio_src; + struct bio *bio_src; if (!bs) bs = &fs_bio_set; __rq_for_each_bio(bio_src, rq_src) { - bio = bio_alloc_clone(rq->q->disk->part0, bio_src, gfp_mask, - bs); + struct bio *bio = bio_alloc_clone(rq->q->disk->part0, bio_src, + gfp_mask, bs); if (!bio) goto free_and_out; - if (bio_ctr && bio_ctr(bio, bio_src, data)) + if (bio_ctr && bio_ctr(bio, bio_src, data)) { + bio_put(bio); goto free_and_out; + } if (rq->bio) { rq->biotail->bi_next = bio; @@ -3293,7 +3294,6 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, } else { rq->bio = rq->biotail = bio; } - bio = NULL; } /* Copy attributes of the original request to the clone request. */ @@ -3311,8 +3311,6 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, return 0; free_and_out: - if (bio) - bio_put(bio); blk_rq_unprep_clone(rq); return -ENOMEM; diff --git a/block/blk-settings.c b/block/blk-settings.c index f1d4dfdc37a7..8f09e33f41f6 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -178,9 +178,26 @@ static void blk_validate_atomic_write_limits(struct queue_limits *lim) if (!lim->atomic_write_hw_max) goto unsupported; + if (WARN_ON_ONCE(!is_power_of_2(lim->atomic_write_hw_unit_min))) + goto unsupported; + + if (WARN_ON_ONCE(!is_power_of_2(lim->atomic_write_hw_unit_max))) + goto unsupported; + + if (WARN_ON_ONCE(lim->atomic_write_hw_unit_min > + lim->atomic_write_hw_unit_max)) + goto unsupported; + + if (WARN_ON_ONCE(lim->atomic_write_hw_unit_max > + lim->atomic_write_hw_max)) + goto unsupported; + boundary_sectors = lim->atomic_write_hw_boundary >> SECTOR_SHIFT; if (boundary_sectors) { + if (WARN_ON_ONCE(lim->atomic_write_hw_max > + lim->atomic_write_hw_boundary)) + goto unsupported; /* * A feature of boundary support is that it disallows bios to * be merged which would result in a merged request which @@ -249,6 +266,13 @@ int blk_validate_limits(struct queue_limits *lim) lim->io_min = lim->physical_block_size; /* + * The optimal I/O size may not be aligned to physical block size + * (because it may be limited by dma engines which have no clue about + * block size of the disks attached to them), so we round it down here. + */ + lim->io_opt = round_down(lim->io_opt, lim->physical_block_size); + + /* * max_hw_sectors has a somewhat weird default for historical reason, * but driver really should set their own instead of relying on this * value. @@ -458,8 +482,6 @@ static unsigned int queue_limit_discard_alignment( /* Why are these in bytes, not sectors? */ alignment = lim->discard_alignment >> SECTOR_SHIFT; granularity = lim->discard_granularity >> SECTOR_SHIFT; - if (!granularity) - return 0; /* Offset of the partition start in 'granularity' sectors */ offset = sector_div(sector, granularity); @@ -479,6 +501,119 @@ static unsigned int blk_round_down_sectors(unsigned int sectors, unsigned int lb return sectors; } +/* Check if second and later bottom devices are compliant */ +static bool blk_stack_atomic_writes_tail(struct queue_limits *t, + struct queue_limits *b) +{ + /* We're not going to support different boundary sizes.. yet */ + if (t->atomic_write_hw_boundary != b->atomic_write_hw_boundary) + return false; + + /* Can't support this */ + if (t->atomic_write_hw_unit_min > b->atomic_write_hw_unit_max) + return false; + + /* Or this */ + if (t->atomic_write_hw_unit_max < b->atomic_write_hw_unit_min) + return false; + + t->atomic_write_hw_max = min(t->atomic_write_hw_max, + b->atomic_write_hw_max); + t->atomic_write_hw_unit_min = max(t->atomic_write_hw_unit_min, + b->atomic_write_hw_unit_min); + t->atomic_write_hw_unit_max = min(t->atomic_write_hw_unit_max, + b->atomic_write_hw_unit_max); + return true; +} + +/* Check for valid boundary of first bottom device */ +static bool blk_stack_atomic_writes_boundary_head(struct queue_limits *t, + struct queue_limits *b) +{ + /* + * Ensure atomic write boundary is aligned with chunk sectors. Stacked + * devices store chunk sectors in t->io_min. + */ + if (b->atomic_write_hw_boundary > t->io_min && + b->atomic_write_hw_boundary % t->io_min) + return false; + if (t->io_min > b->atomic_write_hw_boundary && + t->io_min % b->atomic_write_hw_boundary) + return false; + + t->atomic_write_hw_boundary = b->atomic_write_hw_boundary; + return true; +} + + +/* Check stacking of first bottom device */ +static bool blk_stack_atomic_writes_head(struct queue_limits *t, + struct queue_limits *b) +{ + if (b->atomic_write_hw_boundary && + !blk_stack_atomic_writes_boundary_head(t, b)) + return false; + + if (t->io_min <= SECTOR_SIZE) { + /* No chunk sectors, so use bottom device values directly */ + t->atomic_write_hw_unit_max = b->atomic_write_hw_unit_max; + t->atomic_write_hw_unit_min = b->atomic_write_hw_unit_min; + t->atomic_write_hw_max = b->atomic_write_hw_max; + return true; + } + + /* + * Find values for limits which work for chunk size. + * b->atomic_write_hw_unit_{min, max} may not be aligned with chunk + * size (t->io_min), as chunk size is not restricted to a power-of-2. + * So we need to find highest power-of-2 which works for the chunk + * size. + * As an example scenario, we could have b->unit_max = 16K and + * t->io_min = 24K. For this case, reduce t->unit_max to a value + * aligned with both limits, i.e. 8K in this example. + */ + t->atomic_write_hw_unit_max = b->atomic_write_hw_unit_max; + while (t->io_min % t->atomic_write_hw_unit_max) + t->atomic_write_hw_unit_max /= 2; + + t->atomic_write_hw_unit_min = min(b->atomic_write_hw_unit_min, + t->atomic_write_hw_unit_max); + t->atomic_write_hw_max = min(b->atomic_write_hw_max, t->io_min); + + return true; +} + +static void blk_stack_atomic_writes_limits(struct queue_limits *t, + struct queue_limits *b) +{ + if (!(t->features & BLK_FEAT_ATOMIC_WRITES_STACKED)) + goto unsupported; + + if (!b->atomic_write_unit_min) + goto unsupported; + + /* + * If atomic_write_hw_max is set, we have already stacked 1x bottom + * device, so check for compliance. + */ + if (t->atomic_write_hw_max) { + if (!blk_stack_atomic_writes_tail(t, b)) + goto unsupported; + return; + } + + if (!blk_stack_atomic_writes_head(t, b)) + goto unsupported; + return; + +unsupported: + t->atomic_write_hw_max = 0; + t->atomic_write_hw_unit_max = 0; + t->atomic_write_hw_unit_min = 0; + t->atomic_write_hw_boundary = 0; + t->features &= ~BLK_FEAT_ATOMIC_WRITES_STACKED; +} + /** * blk_stack_limits - adjust queue_limits for stacked devices * @t: the stacking driver limits (top device) @@ -639,6 +774,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->zone_write_granularity = 0; t->max_zone_append_sectors = 0; } + blk_stack_atomic_writes_limits(t, b); + return ret; } EXPORT_SYMBOL(blk_stack_limits); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index d80a202cd170..4241aea84161 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -810,10 +810,8 @@ int blk_register_queue(struct gendisk *disk) * faster to shut down and is made fully functional here as * request_queues for non-existent devices never get registered. */ - if (!blk_queue_init_done(q)) { - blk_queue_flag_set(QUEUE_FLAG_INIT_DONE, q); - percpu_ref_switch_to_percpu(&q->q_usage_counter); - } + blk_queue_flag_set(QUEUE_FLAG_INIT_DONE, q); + percpu_ref_switch_to_percpu(&q->q_usage_counter); return ret; diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 70211751df16..263e28b72053 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -1551,6 +1551,7 @@ static int disk_update_zone_resources(struct gendisk *disk, unsigned int nr_seq_zones, nr_conv_zones; unsigned int pool_size; struct queue_limits lim; + int ret; disk->nr_zones = args->nr_zones; disk->zone_capacity = args->zone_capacity; @@ -1601,7 +1602,11 @@ static int disk_update_zone_resources(struct gendisk *disk, } commit: - return queue_limits_commit_update(q, &lim); + blk_mq_freeze_queue(q); + ret = queue_limits_commit_update(q, &lim); + blk_mq_unfreeze_queue(q); + + return ret; } static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx, @@ -1816,14 +1821,15 @@ int blk_revalidate_disk_zones(struct gendisk *disk) * Set the new disk zone parameters only once the queue is frozen and * all I/Os are completed. */ - blk_mq_freeze_queue(q); if (ret > 0) ret = disk_update_zone_resources(disk, &args); else pr_warn("%s: failed to revalidate zones\n", disk->disk_name); - if (ret) + if (ret) { + blk_mq_freeze_queue(q); disk_free_zone_resources(disk); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q); + } return ret; } diff --git a/block/fops.c b/block/fops.c index 2d01c9007681..13a67940d040 100644 --- a/block/fops.c +++ b/block/fops.c @@ -677,6 +677,7 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct inode *bd_inode = bdev_file_inode(file); struct block_device *bdev = I_BDEV(bd_inode); + bool atomic = iocb->ki_flags & IOCB_ATOMIC; loff_t size = bdev_nr_bytes(bdev); size_t shorted = 0; ssize_t ret; @@ -696,7 +697,7 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT) return -EOPNOTSUPP; - if (iocb->ki_flags & IOCB_ATOMIC) { + if (atomic) { ret = generic_atomic_write_valid(iocb, from); if (ret) return ret; @@ -704,6 +705,8 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) size -= iocb->ki_pos; if (iov_iter_count(from) > size) { + if (atomic) + return -EINVAL; shorted = iov_iter_count(from) - size; iov_iter_truncate(from, size); } diff --git a/block/genhd.c b/block/genhd.c index 9130e163e191..79230c109fca 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -742,13 +742,10 @@ void del_gendisk(struct gendisk *disk) * If the disk does not own the queue, allow using passthrough requests * again. Else leave the queue frozen to fail all I/O. */ - if (!test_bit(GD_OWNS_QUEUE, &disk->state)) { - blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q); + if (!test_bit(GD_OWNS_QUEUE, &disk->state)) __blk_mq_unfreeze_queue(q, true); - } else { - if (queue_is_mq(q)) - blk_mq_exit_queue(q); - } + else if (queue_is_mq(q)) + blk_mq_exit_queue(q); if (start_drain) blk_unfreeze_release_lock(q, true, queue_dying); diff --git a/block/mq-deadline.c b/block/mq-deadline.c index acdc28756d9d..91b3789f710e 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -685,10 +685,9 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, prio = ioprio_class_to_prio[ioprio_class]; per_prio = &dd->per_prio[prio]; - if (!rq->elv.priv[0]) { + if (!rq->elv.priv[0]) per_prio->stats.inserted++; - rq->elv.priv[0] = (void *)(uintptr_t)1; - } + rq->elv.priv[0] = per_prio; if (blk_mq_sched_try_insert_merge(q, rq, free)) return; @@ -753,18 +752,14 @@ static void dd_prepare_request(struct request *rq) */ static void dd_finish_request(struct request *rq) { - struct request_queue *q = rq->q; - struct deadline_data *dd = q->elevator->elevator_data; - const u8 ioprio_class = dd_rq_ioclass(rq); - const enum dd_prio prio = ioprio_class_to_prio[ioprio_class]; - struct dd_per_prio *per_prio = &dd->per_prio[prio]; + struct dd_per_prio *per_prio = rq->elv.priv[0]; /* * The block layer core may call dd_finish_request() without having * called dd_insert_requests(). Skip requests that bypassed I/O * scheduling. See also blk_mq_request_bypass_insert(). */ - if (rq->elv.priv[0]) + if (per_prio) atomic_inc(&per_prio->stats.completed); } diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 5a95671d8151..292f127cae0a 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -231,8 +231,10 @@ static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size) xa_lock(&brd->brd_pages); while (size >= PAGE_SIZE && aligned_sector < rd_size * 2) { page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT); - if (page) + if (page) { __free_page(page); + brd->brd_nr_pages--; + } aligned_sector += PAGE_SECTORS; size -= PAGE_SIZE; } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index fe9bb4fb5f1b..8f6761c27c68 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -770,12 +770,11 @@ static void loop_sysfs_exit(struct loop_device *lo) &loop_attribute_group); } -static void loop_config_discard(struct loop_device *lo, - struct queue_limits *lim) +static void loop_get_discard_config(struct loop_device *lo, + u32 *granularity, u32 *max_discard_sectors) { struct file *file = lo->lo_backing_file; struct inode *inode = file->f_mapping->host; - u32 granularity = 0, max_discard_sectors = 0; struct kstatfs sbuf; /* @@ -788,24 +787,17 @@ static void loop_config_discard(struct loop_device *lo, if (S_ISBLK(inode->i_mode)) { struct block_device *bdev = I_BDEV(inode); - max_discard_sectors = bdev_write_zeroes_sectors(bdev); - granularity = bdev_discard_granularity(bdev); + *max_discard_sectors = bdev_write_zeroes_sectors(bdev); + *granularity = bdev_discard_granularity(bdev); /* * We use punch hole to reclaim the free space used by the * image a.k.a. discard. */ } else if (file->f_op->fallocate && !vfs_statfs(&file->f_path, &sbuf)) { - max_discard_sectors = UINT_MAX >> 9; - granularity = sbuf.f_bsize; + *max_discard_sectors = UINT_MAX >> 9; + *granularity = sbuf.f_bsize; } - - lim->max_hw_discard_sectors = max_discard_sectors; - lim->max_write_zeroes_sectors = max_discard_sectors; - if (max_discard_sectors) - lim->discard_granularity = granularity; - else - lim->discard_granularity = 0; } struct loop_worker { @@ -991,6 +983,7 @@ static int loop_reconfigure_limits(struct loop_device *lo, unsigned int bsize) struct inode *inode = file->f_mapping->host; struct block_device *backing_bdev = NULL; struct queue_limits lim; + u32 granularity = 0, max_discard_sectors = 0; if (S_ISBLK(inode->i_mode)) backing_bdev = I_BDEV(inode); @@ -1000,6 +993,8 @@ static int loop_reconfigure_limits(struct loop_device *lo, unsigned int bsize) if (!bsize) bsize = loop_default_blocksize(lo, backing_bdev); + loop_get_discard_config(lo, &granularity, &max_discard_sectors); + lim = queue_limits_start_update(lo->lo_queue); lim.logical_block_size = bsize; lim.physical_block_size = bsize; @@ -1009,7 +1004,12 @@ static int loop_reconfigure_limits(struct loop_device *lo, unsigned int bsize) lim.features |= BLK_FEAT_WRITE_CACHE; if (backing_bdev && !bdev_nonrot(backing_bdev)) lim.features |= BLK_FEAT_ROTATIONAL; - loop_config_discard(lo, &lim); + lim.max_hw_discard_sectors = max_discard_sectors; + lim.max_write_zeroes_sectors = max_discard_sectors; + if (max_discard_sectors) + lim.discard_granularity = granularity; + else + lim.discard_granularity = 0; return queue_limits_commit_update(lo->lo_queue, &lim); } diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index c6d18cd8af44..d4aed12dd436 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -3041,7 +3041,7 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, ret = ublk_ctrl_end_recovery(ub, cmd); break; default: - ret = -ENOTSUPP; + ret = -EOPNOTSUPP; break; } diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index baaf5f8b80ae..7049ec7fb8eb 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -384,6 +384,7 @@ static int raid0_set_limits(struct mddev *mddev) lim.max_write_zeroes_sectors = mddev->chunk_sectors; lim.io_min = mddev->chunk_sectors << 9; lim.io_opt = lim.io_min * mddev->raid_disks; + lim.features |= BLK_FEAT_ATOMIC_WRITES_STACKED; err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); if (err) { queue_limits_cancel_update(mddev->gendisk->queue); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a5adf08ee174..519c56f0ee3d 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1571,7 +1571,21 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, continue; } if (is_bad) { - int good_sectors = first_bad - r1_bio->sector; + int good_sectors; + + /* + * We cannot atomically write this, so just + * error in that case. It could be possible to + * atomically write other mirrors, but the + * complexity of supporting that is not worth + * the benefit. + */ + if (bio->bi_opf & REQ_ATOMIC) { + error = -EIO; + goto err_handle; + } + + good_sectors = first_bad - r1_bio->sector; if (good_sectors < max_sectors) max_sectors = good_sectors; } @@ -1657,7 +1671,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, mbio->bi_iter.bi_sector = (r1_bio->sector + rdev->data_offset); mbio->bi_end_io = raid1_end_write_request; - mbio->bi_opf = bio_op(bio) | (bio->bi_opf & (REQ_SYNC | REQ_FUA)); + mbio->bi_opf = bio_op(bio) | + (bio->bi_opf & (REQ_SYNC | REQ_FUA | REQ_ATOMIC)); if (test_bit(FailFast, &rdev->flags) && !test_bit(WriteMostly, &rdev->flags) && conf->raid_disks - mddev->degraded > 1) @@ -3224,6 +3239,7 @@ static int raid1_set_limits(struct mddev *mddev) md_init_stacking_limits(&lim); lim.max_write_zeroes_sectors = 0; + lim.features |= BLK_FEAT_ATOMIC_WRITES_STACKED; err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); if (err) { queue_limits_cancel_update(mddev->gendisk->queue); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 18989231791a..7d7a8a2524dc 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1255,6 +1255,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, |
