diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-11-18 16:50:08 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-11-18 16:50:08 -0800 |
| commit | 77a0cfafa9af9c0d5b43534eb90d530c189edca1 (patch) | |
| tree | 30e513ad7c9732213cede4c00bb1651e1a08b023 /block | |
| parent | 3d1b536c13f7cd966aa660d1730855b26d01c9ae (diff) | |
| parent | 88d47f629313730f26a3b00224d1e1a5e3b7bb79 (diff) | |
| download | linux-77a0cfafa9af9c0d5b43534eb90d530c189edca1.tar.gz linux-77a0cfafa9af9c0d5b43534eb90d530c189edca1.tar.bz2 linux-77a0cfafa9af9c0d5b43534eb90d530c189edca1.zip | |
Merge tag 'for-6.13/block-20241118' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- NVMe updates via Keith:
- Use uring_cmd helper (Pavel)
- Host Memory Buffer allocation enhancements (Christoph)
- Target persistent reservation support (Guixin)
- Persistent reservation tracing (Guixen)
- NVMe 2.1 specification support (Keith)
- Rotational Meta Support (Matias, Wang, Keith)
- Volatile cache detection enhancment (Guixen)
- MD updates via Song:
- Maintainers update
- raid5 sync IO fix
- Enhance handling of faulty and blocked devices
- raid5-ppl atomic improvement
- md-bitmap fix
- Support for manually defining embedded partition tables
- Zone append fixes and cleanups
- Stop sending the queued requests in the plug list to the driver
->queue_rqs() handle in reverse order.
- Zoned write plug cleanups
- Cleanups disk stats tracking and add support for disk stats for
passthrough IO
- Add preparatory support for file system atomic writes
- Add lockdep support for queue freezing. Already found a bunch of
issues, and some fixes for that are in here. More will be coming.
- Fix race between queue stopping/quiescing and IO queueing
- ublk recovery improvements
- Fix ublk mmap for 64k pages
- Various fixes and cleanups
* tag 'for-6.13/block-20241118' of git://git.kernel.dk/linux: (118 commits)
MAINTAINERS: Update git tree for mdraid subsystem
block: make struct rq_list available for !CONFIG_BLOCK
block/genhd: use seq_put_decimal_ull for diskstats decimal values
block: don't reorder requests in blk_mq_add_to_batch
block: don't reorder requests in blk_add_rq_to_plug
block: add a rq_list type
block: remove rq_list_move
virtio_blk: reverse request order in virtio_queue_rqs
nvme-pci: reverse request order in nvme_queue_rqs
btrfs: validate queue limits
block: export blk_validate_limits
nvmet: add tracing of reservation commands
nvme: parse reservation commands's action and rtype to string
nvmet: report ns's vwc not present
md/raid5: Increase r5conf.cache_name size
block: remove the ioprio field from struct request
block: remove the write_hint field from struct request
nvme: check ns's volatile write cache not present
nvme: add rotational support
nvme: use command set independent id ns if available
...
Diffstat (limited to 'block')
| -rw-r--r-- | block/bio-integrity.c | 13 | ||||
| -rw-r--r-- | block/bio.c | 81 | ||||
| -rw-r--r-- | block/blk-core.c | 26 | ||||
| -rw-r--r-- | block/blk-crypto-fallback.c | 2 | ||||
| -rw-r--r-- | block/blk-integrity.c | 4 | ||||
| -rw-r--r-- | block/blk-ioc.c | 9 | ||||
| -rw-r--r-- | block/blk-merge.c | 107 | ||||
| -rw-r--r-- | block/blk-mq.c | 307 | ||||
| -rw-r--r-- | block/blk-mq.h | 15 | ||||
| -rw-r--r-- | block/blk-rq-qos.c | 4 | ||||
| -rw-r--r-- | block/blk-settings.c | 40 | ||||
| -rw-r--r-- | block/blk-sysfs.c | 80 | ||||
| -rw-r--r-- | block/blk-throttle.c | 76 | ||||
| -rw-r--r-- | block/blk-zoned.c | 68 | ||||
| -rw-r--r-- | block/blk.h | 52 | ||||
| -rw-r--r-- | block/elevator.c | 18 | ||||
| -rw-r--r-- | block/elevator.h | 4 | ||||
| -rw-r--r-- | block/genhd.c | 136 | ||||
| -rw-r--r-- | block/partitions/Kconfig | 9 | ||||
| -rw-r--r-- | block/partitions/Makefile | 1 | ||||
| -rw-r--r-- | block/partitions/check.h | 1 | ||||
| -rw-r--r-- | block/partitions/cmdline.c | 3 | ||||
| -rw-r--r-- | block/partitions/core.c | 8 | ||||
| -rw-r--r-- | block/partitions/of.c | 110 | ||||
| -rw-r--r-- | block/sed-opal.c | 26 |
25 files changed, 760 insertions, 440 deletions
diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 88e3ad73c385..2a4bd6611692 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -199,7 +199,7 @@ EXPORT_SYMBOL(bio_integrity_add_page); static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec, int nr_vecs, unsigned int len, - unsigned int direction, u32 seed) + unsigned int direction) { bool write = direction == ITER_SOURCE; struct bio_integrity_payload *bip; @@ -247,7 +247,6 @@ static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec, } bip->bip_flags |= BIP_COPY_USER; - bip->bip_iter.bi_sector = seed; bip->bip_vcnt = nr_vecs; return 0; free_bip: @@ -258,7 +257,7 @@ free_buf: } static int bio_integrity_init_user(struct bio *bio, struct bio_vec *bvec, - int nr_vecs, unsigned int len, u32 seed) + int nr_vecs, unsigned int len) { struct bio_integrity_payload *bip; @@ -267,7 +266,6 @@ static int bio_integrity_init_user(struct bio *bio, struct bio_vec *bvec, return PTR_ERR(bip); memcpy(bip->bip_vec, bvec, nr_vecs * sizeof(*bvec)); - bip->bip_iter.bi_sector = seed; bip->bip_iter.bi_size = len; bip->bip_vcnt = nr_vecs; return 0; @@ -303,8 +301,7 @@ static unsigned int bvec_from_pages(struct bio_vec *bvec, struct page **pages, return nr_bvecs; } -int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes, - u32 seed) +int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes) { struct request_queue *q = bdev_get_queue(bio->bi_bdev); unsigned int align = blk_lim_dma_alignment_and_pad(&q->limits); @@ -350,9 +347,9 @@ int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes, if (copy) ret = bio_integrity_copy_user(bio, bvec, nr_bvecs, bytes, - direction, seed); + direction); else - ret = bio_integrity_init_user(bio, bvec, nr_bvecs, bytes, seed); + ret = bio_integrity_init_user(bio, bvec, nr_bvecs, bytes); if (ret) goto release_pages; if (bvec != stack_vec) diff --git a/block/bio.c b/block/bio.c index ac4d77c88932..699a78c85c75 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1065,39 +1065,6 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, EXPORT_SYMBOL(bio_add_pc_page); /** - * bio_add_zone_append_page - attempt to add page to zone-append bio - * @bio: destination bio - * @page: page to add - * @len: vec entry length - * @offset: vec entry offset - * - * Attempt to add a page to the bio_vec maplist of a bio that will be submitted - * for a zone-append request. This can fail for a number of reasons, such as the - * bio being full or the target block device is not a zoned block device or - * other limitations of the target block device. The target block device must - * allow bio's up to PAGE_SIZE, so it is always possible to add a single page - * to an empty bio. - * - * Returns: number of bytes added to the bio, or 0 in case of a failure. - */ -int bio_add_zone_append_page(struct bio *bio, struct page *page, - unsigned int len, unsigned int offset) -{ - struct request_queue *q = bdev_get_queue(bio->bi_bdev); - bool same_page = false; - - if (WARN_ON_ONCE(bio_op(bio) != REQ_OP_ZONE_APPEND)) - return 0; - - if (WARN_ON_ONCE(!bdev_is_zoned(bio->bi_bdev))) - return 0; - - return bio_add_hw_page(q, bio, page, len, offset, - queue_max_zone_append_sectors(q), &same_page); -} -EXPORT_SYMBOL_GPL(bio_add_zone_append_page); - -/** * __bio_add_page - add page(s) to a bio in a new segment * @bio: destination bio * @page: start page to add @@ -1206,21 +1173,12 @@ EXPORT_SYMBOL_GPL(__bio_release_pages); void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter) { - size_t size = iov_iter_count(iter); - WARN_ON_ONCE(bio->bi_max_vecs); - if (bio_op(bio) == REQ_OP_ZONE_APPEND) { - struct request_queue *q = bdev_get_queue(bio->bi_bdev); - size_t max_sectors = queue_max_zone_append_sectors(q); - - size = min(size, max_sectors << SECTOR_SHIFT); - } - bio->bi_vcnt = iter->nr_segs; bio->bi_io_vec = (struct bio_vec *)iter->bvec; bio->bi_iter.bi_bvec_done = iter->iov_offset; - bio->bi_iter.bi_size = size; + bio->bi_iter.bi_size = iov_iter_count(iter); bio_set_flag(bio, BIO_CLONED); } @@ -1245,20 +1203,6 @@ static int bio_iov_add_folio(struct bio *bio, struct folio *folio, size_t len, return 0; } -static int bio_iov_add_zone_append_folio(struct bio *bio, struct folio *folio, - size_t len, size_t offset) -{ - struct request_queue *q = bdev_get_queue(bio->bi_bdev); - bool same_page = false; - - if (bio_add_hw_folio(q, bio, folio, len, offset, - queue_max_zone_append_sectors(q), &same_page) != len) - return -EINVAL; - if (same_page && bio_flagged(bio, BIO_PAGE_PINNED)) - unpin_user_folio(folio, 1); - return 0; -} - static unsigned int get_contig_folio_len(unsigned int *num_pages, struct page **pages, unsigned int i, struct folio *folio, size_t left, @@ -1365,14 +1309,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) len = get_contig_folio_len(&num_pages, pages, i, folio, left, offset); - if (bio_op(bio) == REQ_OP_ZONE_APPEND) { - ret = bio_iov_add_zone_append_folio(bio, folio, len, - folio_offset); - if (ret) - break; - } else - bio_iov_add_folio(bio, folio, len, folio_offset); - + bio_iov_add_folio(bio, folio, len, folio_offset); offset = 0; } @@ -1728,16 +1665,22 @@ struct bio *bio_split(struct bio *bio, int sectors, { struct bio *split; - BUG_ON(sectors <= 0); - BUG_ON(sectors >= bio_sectors(bio)); + if (WARN_ON_ONCE(sectors <= 0)) + return ERR_PTR(-EINVAL); + if (WARN_ON_ONCE(sectors >= bio_sectors(bio))) + return ERR_PTR(-EINVAL); /* Zone append commands cannot be split */ if (WARN_ON_ONCE(bio_op(bio) == REQ_OP_ZONE_APPEND)) - return NULL; + return ERR_PTR(-EINVAL); + + /* atomic writes cannot be split */ + if (bio->bi_opf & REQ_ATOMIC) + return ERR_PTR(-EINVAL); split = bio_alloc_clone(bio->bi_bdev, bio, gfp, bs); if (!split) - return NULL; + return ERR_PTR(-ENOMEM); split->bi_iter.bi_size = sectors << 9; diff --git a/block/blk-core.c b/block/blk-core.c index bc5e8c5eaac9..666efe8fa202 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -261,6 +261,8 @@ static void blk_free_queue(struct request_queue *q) blk_mq_release(q); ida_free(&blk_queue_ida, q->id); + lockdep_unregister_key(&q->io_lock_cls_key); + lockdep_unregister_key(&q->q_lock_cls_key); call_rcu(&q->rcu_head, blk_free_queue_rcu); } @@ -278,18 +280,20 @@ void blk_put_queue(struct request_queue *q) } EXPORT_SYMBOL(blk_put_queue); -void blk_queue_start_drain(struct request_queue *q) +bool blk_queue_start_drain(struct request_queue *q) { /* * When queue DYING flag is set, we need to block new req * entering queue, so we call blk_freeze_queue_start() to * prevent I/O from crossing blk_queue_enter(). */ - blk_freeze_queue_start(q); + bool freeze = __blk_freeze_queue_start(q, current); if (queue_is_mq(q)) blk_mq_wake_waiters(q); /* Make blk_queue_enter() reexamine the DYING flag. */ wake_up_all(&q->mq_freeze_wq); + + return freeze; } /** @@ -321,6 +325,8 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) return -ENODEV; } + rwsem_acquire_read(&q->q_lockdep_map, 0, 0, _RET_IP_); + rwsem_release(&q->q_lockdep_map, _RET_IP_); return 0; } @@ -352,6 +358,8 @@ int __bio_queue_enter(struct request_queue *q, struct bio *bio) goto dead; } + rwsem_acquire_read(&q->io_lockdep_map, 0, 0, _RET_IP_); + rwsem_release(&q->io_lockdep_map, _RET_IP_); return 0; dead: bio_io_error(bio); @@ -441,6 +449,12 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id) PERCPU_REF_INIT_ATOMIC, GFP_KERNEL); if (error) goto fail_stats; + lockdep_register_key(&q->io_lock_cls_key); + lockdep_register_key(&q->q_lock_cls_key); + lockdep_init_map(&q->io_lockdep_map, "&q->q_usage_counter(io)", + &q->io_lock_cls_key, 0); + lockdep_init_map(&q->q_lockdep_map, "&q->q_usage_counter(queue)", + &q->q_lock_cls_key, 0); q->nr_requests = BLKDEV_DEFAULT_RQ; @@ -593,7 +607,7 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q, return BLK_STS_IOERR; /* Make sure the BIO is small enough and will not get split */ - if (nr_sectors > queue_max_zone_append_sectors(q)) + if (nr_sectors > q->limits.max_zone_append_sectors) return BLK_STS_IOERR; bio->bi_opf |= REQ_NOMERGE; @@ -1106,8 +1120,8 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios) return; plug->cur_ktime = 0; - plug->mq_list = NULL; - plug->cached_rq = NULL; + rq_list_init(&plug->mq_list); + rq_list_init(&plug->cached_rqs); plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT); plug->rq_count = 0; plug->multiple_queues = false; @@ -1203,7 +1217,7 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule) * queue for cached requests, we don't want a blocked task holding * up a queue freeze/quiesce event. */ - if (unlikely(!rq_list_empty(plug->cached_rq))) + if (unlikely(!rq_list_empty(&plug->cached_rqs))) blk_mq_free_plug_rqs(plug); plug->cur_ktime = 0; diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c index b1e7415f8439..29a205482617 100644 --- a/block/blk-crypto-fallback.c +++ b/block/blk-crypto-fallback.c @@ -226,7 +226,7 @@ static bool blk_crypto_fallback_split_bio_if_needed(struct bio **bio_ptr) split_bio = bio_split(bio, num_sectors, GFP_NOIO, &crypto_bio_split); - if (!split_bio) { + if (IS_ERR(split_bio)) { bio->bi_status = BLK_STS_RESOURCE; return false; } diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 83b696ba0cac..b180cac61a9d 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -113,9 +113,9 @@ new_segment: EXPORT_SYMBOL(blk_rq_map_integrity_sg); int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf, - ssize_t bytes, u32 seed) + ssize_t bytes) { - int ret = bio_integrity_map_user(rq->bio, ubuf, bytes, seed); + int ret = bio_integrity_map_user(rq->bio, ubuf, bytes); if (ret) return ret; diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 25dd4db11121..ce82770c72ab 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -32,13 +32,6 @@ static void get_io_context(struct io_context *ioc) atomic_long_inc(&ioc->refcount); } -static void icq_free_icq_rcu(struct rcu_head *head) -{ - struct io_cq *icq = container_of(head, struct io_cq, __rcu_head); - - kmem_cache_free(icq->__rcu_icq_cache, icq); -} - /* * Exit an icq. Called with ioc locked for blk-mq, and with both ioc * and queue locked for legacy. @@ -102,7 +95,7 @@ static void ioc_destroy_icq(struct io_cq *icq) */ icq->__rcu_icq_cache = et->icq_cache; icq->flags |= ICQ_DESTROYED; - call_rcu(&icq->__rcu_head, icq_free_icq_rcu); + kfree_rcu(icq, __rcu_head); } /* diff --git a/block/blk-merge.c b/block/blk-merge.c index ad763ec313b6..e0b28e9298c9 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -107,17 +107,18 @@ static unsigned int bio_allowed_max_sectors(const struct queue_limits *lim) static struct bio *bio_submit_split(struct bio *bio, int split_sectors) { - if (unlikely(split_sectors < 0)) { - bio->bi_status = errno_to_blk_status(split_sectors); - bio_endio(bio); - return NULL; - } + if (unlikely(split_sectors < 0)) + goto error; if (split_sectors) { struct bio *split; split = bio_split(bio, split_sectors, GFP_NOIO, &bio->bi_bdev->bd_disk->bio_split); + if (IS_ERR(split)) { + split_sectors = PTR_ERR(split); + goto error; + } split->bi_opf |= REQ_NOMERGE; blkcg_bio_issue_init(split); bio_chain(split, bio); @@ -128,6 +129,10 @@ static struct bio *bio_submit_split(struct bio *bio, int split_sectors) } return bio; +error: + bio->bi_status = errno_to_blk_status(split_sectors); + bio_endio(bio); + return NULL; } struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim, @@ -166,17 +171,6 @@ struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim, return bio_submit_split(bio, split_sectors); } -struct bio *bio_split_write_zeroes(struct bio *bio, - const struct queue_limits *lim, unsigned *nsegs) -{ - *nsegs = 0; - if (!lim->max_write_zeroes_sectors) - return bio; - if (bio_sectors(bio) <= lim->max_write_zeroes_sectors) - return bio; - return bio_submit_split(bio, lim->max_write_zeroes_sectors); -} - static inline unsigned int blk_boundary_sectors(const struct queue_limits *lim, bool is_atomic) { @@ -211,7 +205,9 @@ static inline unsigned get_max_io_size(struct bio *bio, * We ignore lim->max_sectors for atomic writes because it may less * than the actual bio size, which we cannot tolerate. */ - if (is_atomic) + if (bio_op(bio) == REQ_OP_WRITE_ZEROES) + max_sectors = lim->max_write_zeroes_sectors; + else if (is_atomic) max_sectors = lim->atomic_write_max_sectors; else max_sectors = lim->max_sectors; @@ -296,6 +292,14 @@ static bool bvec_split_segs(const struct queue_limits *lim, return len > 0 || bv->bv_len > max_len; } +static unsigned int bio_split_alignment(struct bio *bio, + const struct queue_limits *lim) +{ + if (op_is_write(bio_op(bio)) && lim->zone_write_granularity) + return lim->zone_write_granularity; + return lim->logical_block_size; +} + /** * bio_split_rw_at - check if and where to split a read/write bio * @bio: [in] bio to be split @@ -358,7 +362,7 @@ split: * split size so that each bio is properly block size aligned, even if * we do not use the full hardware limits. */ - bytes = ALIGN_DOWN(bytes, lim->logical_block_size); + bytes = ALIGN_DOWN(bytes, bio_split_alignment(bio, lim)); /* * Bio splitting may cause subtle trouble such as hang when doing sync @@ -388,16 +392,35 @@ struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim, struct bio *bio_split_zone_append(struct bio *bio, const struct queue_limits *lim, unsigned *nr_segs) { - unsigned int max_sectors = queue_limits_max_zone_append_sectors(lim); int split_sectors; split_sectors = bio_split_rw_at(bio, lim, nr_segs, - max_sectors << SECTOR_SHIFT); + lim->max_zone_append_sectors << SECTOR_SHIFT); if (WARN_ON_ONCE(split_sectors > 0)) split_sectors = -EINVAL; return bio_submit_split(bio, split_sectors); } +struct bio *bio_split_write_zeroes(struct bio *bio, + const struct queue_limits *lim, unsigned *nsegs) +{ + unsigned int max_sectors = get_max_io_size(bio, lim); + + *nsegs = 0; + + /* + * An unset limit should normally not happen, as bio submission is keyed + * off having a non-zero limit. But SCSI can clear the limit in the + * I/O completion handler, and we can race and see this. Splitting to a + * zero limit obviously doesn't make sense, so band-aid it here. + */ + if (!max_sectors) + return bio; + if (bio_sectors(bio) <= max_sectors) + return bio; + return bio_submit_split(bio, max_sectors); +} + /** * bio_split_to_limits - split a bio to fit the queue limits * @bio: bio to be split @@ -411,10 +434,9 @@ struct bio *bio_split_zone_append(struct bio *bio, */ struct bio *bio_split_to_limits(struct bio *bio) { - const struct queue_limits *lim = &bdev_get_queue(bio->bi_bdev)->limits; unsigned int nr_segs; - return __bio_split_to_limits(bio, lim, &nr_segs); + return __bio_split_to_limits(bio, bdev_limits(bio->bi_bdev), &nr_segs); } EXPORT_SYMBOL(bio_split_to_limits); @@ -797,7 +819,7 @@ static inline void blk_update_mixed_merge(struct request *req, static void blk_account_io_merge_request(struct request *req) { - if (blk_do_io_stat(req)) { + if (req->rq_flags & RQF_IO_STAT) { part_stat_lock(); part_stat_inc(req->part, merges[op_stat_group(req_op(req))]); part_stat_local_dec(req->part, @@ -845,12 +867,13 @@ static struct request *attempt_merge(struct request_queue *q, if (rq_data_dir(req) != rq_data_dir(next)) return NULL; - /* Don't merge requests with different write hints. */ - if (req->write_hint != next->write_hint) - return NULL; - - if (req->ioprio != next->ioprio) - return NULL; + if (req->bio && next->bio) { + /* Don't merge requests with different write hints. */ + if (req->bio->bi_write_hint != next->bio->bi_write_hint) + return NULL; + if (req->bio->bi_ioprio != next->bio->bi_ioprio) + return NULL; + } if (!blk_atomic_write_mergeable_rqs(req, next)) return NULL; @@ -979,12 +1002,13 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) if (!bio_crypt_rq_ctx_compatible(rq, bio)) return false; - /* Don't merge requests with different write hints. */ - if (rq->write_hint != bio->bi_write_hint) - return false; - - if (rq->ioprio != bio_prio(bio)) - return false; + if (rq->bio) { + /* Don't merge requests with different write hints. */ + if (rq->bio->bi_write_hint != bio->bi_write_hint) + return false; + if (rq->bio->bi_ioprio != bio->bi_ioprio) + return false; + } if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false) return false; @@ -1005,12 +1029,11 @@ enum elv_merge blk_try_merge(struct request *rq, struct bio *bio) static void blk_account_io_merge_bio(struct request *req) { - if (!blk_do_io_stat(req)) - return; - - part_stat_lock(); - part_stat_inc(req->part, merges[op_stat_group(req_op(req))]); - part_stat_unlock(); + if (req->rq_flags & RQF_IO_STAT) { + part_stat_lock(); + part_stat_inc(req->part, merges[op_stat_group(req_op(req))]); + part_stat_unlock(); + } } enum bio_merge_status bio_attempt_back_merge(struct request *req, @@ -1156,7 +1179,7 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, struct blk_plug *plug = current->plug; struct request *rq; - if (!plug || rq_list_empty(plug->mq_list)) + if (!plug || rq_list_empty(&plug->mq_list)) return false; rq_list_for_each(&plug->mq_list, rq) { diff --git a/block/blk-mq.c b/block/blk-mq.c index cf626e061dd7..270cfd9fc6b0 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -92,7 +92,7 @@ static bool blk_mq_check_inflight(struct request *rq, void *priv) { struct mq_inflight *mi = priv; - if (rq->part && blk_do_io_stat(rq) && + if (rq->rq_flags & RQF_IO_STAT && (!bdev_is_partition(mi->part) || rq->part == mi->part) && blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) mi->inflight[rq_data_dir(rq)]++; @@ -120,9 +120,59 @@ void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part, inflight[1] = mi.inflight[1]; } -void blk_freeze_queue_start(struct request_queue *q) +#ifdef CONFIG_LOCKDEP +static bool blk_freeze_set_owner(struct request_queue *q, + struct task_struct *owner) +{ + if (!owner) + return false; + + if (!q->mq_freeze_depth) { + q->mq_freeze_owner = owner; + q->mq_freeze_owner_depth = 1; + return true; + } + + if (owner == q->mq_freeze_owner) + q->mq_freeze_owner_depth += 1; + return false; +} + +/* verify the last unfreeze in owner context */ +static bool blk_unfreeze_check_owner(struct request_queue *q) +{ + if (!q->mq_freeze_owner) + return false; + if (q->mq_freeze_owner != current) + return false; + if (--q->mq_freeze_owner_depth == 0) { + q->mq_freeze_owner = NULL; + return true; + } + return false; +} + +#else + +static bool blk_freeze_set_owner(struct request_queue *q, + struct task_struct *owner) { + return false; +} + +static bool blk_unfreeze_check_owner(struct request_queue *q) +{ + return false; +} +#endif + +bool __blk_freeze_queue_start(struct request_queue *q, + struct task_struct *owner) +{ + bool freeze; + mutex_lock(&q->mq_freeze_lock); + freeze = blk_freeze_set_owner(q, owner); if (++q->mq_freeze_depth == 1) { percpu_ref_kill(&q->q_usage_counter); mutex_unlock(&q->mq_freeze_lock); @@ -131,6 +181,14 @@ void blk_freeze_queue_start(struct request_queue *q) } else { mutex_unlock(&q->mq_freeze_lock); } + + return freeze; +} + +void blk_freeze_queue_start(struct request_queue *q) +{ + if (__blk_freeze_queue_start(q, current)) + blk_freeze_acquire_lock(q, false, false); } EXPORT_SYMBOL_GPL(blk_freeze_queue_start); @@ -149,35 +207,17 @@ int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, } EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout); -/* - * Guarantee no request is in use, so we can change any data structure of - * the queue afterward. - */ -void blk_freeze_queue(struct request_queue *q) +void blk_mq_freeze_queue(struct request_queue *q) { - /* - * In the !blk_mq case we are only calling this to kill the - * q_usage_counter, otherwise this increases the freeze depth - * and waits for it to return to zero. For this reason there is - * no blk_unfreeze_queue(), and blk_freeze_queue() is not - * exported to drivers as the only user for unfreeze is blk_mq. - */ blk_freeze_queue_start(q); blk_mq_freeze_queue_wait(q); } - -void blk_mq_freeze_queue(struct request_queue *q) -{ - /* - * ...just an alias to keep freeze and unfreeze actions balanced - * in the blk_mq_* namespace - */ - blk_freeze_queue(q); -} EXPORT_SYMBOL_GPL(blk_mq_freeze_queue); -void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic) +bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic) { + bool unfreeze; + mutex_lock(&q->mq_freeze_lock); if (force_atomic) q->q_usage_counter.data->force_atomic = true; @@ -187,16 +227,40 @@ void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic) percpu_ref_resurrect(&q->q_usage_counter); wake_up_all(&q->mq_freeze_wq); } + unfreeze = blk_unfreeze_check_owner(q); mutex_unlock(&q->mq_freeze_lock); + + return unfreeze; } void blk_mq_unfreeze_queue(struct request_queue *q) { - __blk_mq_unfreeze_queue(q, false); + if (__blk_mq_unfreeze_queue(q, false)) + blk_unfreeze_release_lock(q, false, false); } EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); /* + * non_owner variant of blk_freeze_queue_start + * + * Unlike blk_freeze_queue_start, the queue doesn't need to be unfrozen + * by the same task. This is fragile and should not be used if at all + * possible. + */ +void blk_freeze_queue_start_non_owner(struct request_queue *q) +{ + __blk_freeze_queue_start(q, NULL); +} +EXPORT_SYMBOL_GPL(blk_freeze_queue_start_non_owner); + +/* non_owner variant of blk_mq_unfreeze_queue */ +void blk_mq_unfreeze_queue_non_owner(struct request_queue *q) +{ + __blk_mq_unfreeze_queue(q, false); +} +EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue_non_owner); + +/* * FIXME: replace the scsi_internal_device_*block_nowait() calls in the * mpt3sas driver such that this function can be removed. */ @@ -283,8 +347,9 @@ void blk_mq_quiesce_tagset(struct blk_mq_tag_set *set) if (!blk_queue_skip_tagset_quiesce(q)) blk_mq_quiesce_queue_nowait(q); } - blk_mq_wait_quiesce_done(set); mutex_unlock(&set->tag_list_lock); + + blk_mq_wait_quiesce_done(set); } EXPORT_SYMBOL_GPL(blk_mq_quiesce_tagset); @@ -331,14 +396,9 @@ EXPORT_SYMBOL(blk_rq_init); /* Set start and alloc time when the allocated request is actually used */ static inline void blk_mq_rq_time_init(struct request *rq, u64 alloc_time_ns) { - if (blk_mq_need_time_stamp(rq)) - rq->start_time_ns = blk_time_get_ns(); - else - rq->start_time_ns = 0; - #ifdef CONFIG_BLK_RQ_ALLOC_TIME if (blk_queue_rq_alloc_time(rq->q)) - rq->alloc_time_ns = alloc_time_ns ?: rq->start_time_ns; + rq->alloc_time_ns = alloc_time_ns; else rq->alloc_time_ns = 0; #endif @@ -359,8 +419,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, if (data->flags & BLK_MQ_REQ_PM) data->rq_flags |= RQF_PM; - if (blk_queue_io_stat(q)) - data->rq_flags |= RQF_IO_STAT; rq->rq_flags = data->rq_flags; if (data->rq_flags & RQF_SCHED_TAGS) { @@ -420,7 +478,7 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data) prefetch(tags->static_rqs[tag]); tag_mask &= ~(1UL << i); rq = blk_mq_rq_ctx_init(data, tags, tag); - rq_list_add(data->cached_rq, rq); + rq_list_add_head(data->cached_rqs, rq); nr++; } if (!(data->rq_flags & RQF_SCHED_TAGS)) @@ -429,7 +487,7 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data) percpu_ref_get_many(&data->q->q_usage_counter, nr - 1); data->nr_tags -= nr; - return rq_list_pop(data->cached_rq); + return rq_list_pop(data->cached_rqs); } static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data) @@ -526,7 +584,7 @@ static struct request *blk_mq_rq_cache_fill(struct request_queue *q, .flags = flags, .cmd_flags = opf, .nr_tags = plug->nr_ios, - .cached_rq = &plug->cached_rq, + .cached_rqs = &plug->cached_rqs, }; struct request *rq; @@ -551,14 +609,14 @@ static struct request *blk_mq_alloc_cached_request(struct request_queue *q, if (!plug) return NULL; |
