diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-01-20 19:38:46 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-01-20 19:38:46 -0800 |
| commit | 1cbfb828e05171ca2dd77b5988d068e6872480fe (patch) | |
| tree | bfb33c9ad8840908058649ba2e261bdb7e5f7ee9 /block | |
| parent | 3d3a9c8b89d4f8a3785e06ffd15405c670696f02 (diff) | |
| parent | 554b22864cc79e28cd65e3a6e1d0d1dfa8581c68 (diff) | |
| download | linux-1cbfb828e05171ca2dd77b5988d068e6872480fe.tar.gz linux-1cbfb828e05171ca2dd77b5988d068e6872480fe.tar.bz2 linux-1cbfb828e05171ca2dd77b5988d068e6872480fe.zip | |
Merge tag 'for-6.14/block-20250118' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- NVMe pull requests via Keith:
- Target support for PCI-Endpoint transport (Damien)
- TCP IO queue spreading fixes (Sagi, Chaitanya)
- Target handling for "limited retry" flags (Guixen)
- Poll type fix (Yongsoo)
- Xarray storage error handling (Keisuke)
- Host memory buffer free size fix on error (Francis)
- MD pull requests via Song:
- Reintroduce md-linear (Yu Kuai)
- md-bitmap refactor and fix (Yu Kuai)
- Replace kmap_atomic with kmap_local_page (David Reaver)
- Quite a few queue freeze and debugfs deadlock fixes
Ming introduced lockdep support for this in the 6.13 kernel, and it
has (unsurprisingly) uncovered quite a few issues
- Use const attributes for IO schedulers
- Remove bio ioprio wrappers
- Fixes for stacked device atomic write support
- Refactor queue affinity helpers, in preparation for better supporting
isolated CPUs
- Cleanups of loop O_DIRECT handling
- Cleanup of BLK_MQ_F_* flags
- Add rotational support for null_blk
- Various fixes and cleanups
* tag 'for-6.14/block-20250118' of git://git.kernel.dk/linux: (106 commits)
block: Don't trim an atomic write
block: Add common atomic writes enable flag
md/md-linear: Fix a NULL vs IS_ERR() bug in linear_add()
block: limit disk max sectors to (LLONG_MAX >> 9)
block: Change blk_stack_atomic_writes_limits() unit_min check
block: Ensure start sector is aligned for stacking atomic writes
blk-mq: Move more error handling into blk_mq_submit_bio()
block: Reorder the request allocation code in blk_mq_submit_bio()
nvme: fix bogus kzalloc() return check in nvme_init_effects_log()
md/md-bitmap: move bitmap_{start, end}write to md upper layer
md/raid5: implement pers->bitmap_sector()
md: add a new callback pers->bitmap_sector()
md/md-bitmap: remove the last parameter for bimtap_ops->endwrite()
md/md-bitmap: factor behind write counters out from bitmap_{start/end}write()
md: Replace deprecated kmap_atomic() with kmap_local_page()
md: reintroduce md-linear
partitions: ldm: remove the initial kernel-doc notation
blk-cgroup: rwstat: fix kernel-doc warnings in header file
blk-cgroup: fix kernel-doc warnings in header file
nbd: fix partial sending
...
Diffstat (limited to 'block')
| -rw-r--r-- | block/Makefile | 2 | ||||
| -rw-r--r-- | block/bfq-iosched.c | 2 | ||||
| -rw-r--r-- | block/bio.c | 111 | ||||
| -rw-r--r-- | block/blk-cgroup-rwstat.h | 5 | ||||
| -rw-r--r-- | block/blk-cgroup.h | 10 | ||||
| -rw-r--r-- | block/blk-core.c | 21 | ||||
| -rw-r--r-- | block/blk-integrity.c | 4 | ||||
| -rw-r--r-- | block/blk-map.c | 128 | ||||
| -rw-r--r-- | block/blk-merge.c | 177 | ||||
| -rw-r--r-- | block/blk-mq-cpumap.c | 37 | ||||
| -rw-r--r-- | block/blk-mq-debugfs.c | 27 | ||||
| -rw-r--r-- | block/blk-mq-pci.c | 46 | ||||
| -rw-r--r-- | block/blk-mq-sched.c | 3 | ||||
| -rw-r--r-- | block/blk-mq-tag.c | 41 | ||||
| -rw-r--r-- | block/blk-mq-virtio.c | 46 | ||||
| -rw-r--r-- | block/blk-mq.c | 71 | ||||
| -rw-r--r-- | block/blk-mq.h | 11 | ||||
| -rw-r--r-- | block/blk-settings.c | 42 | ||||
| -rw-r--r-- | block/blk-sysfs.c | 140 | ||||
| -rw-r--r-- | block/blk-zoned.c | 65 | ||||
| -rw-r--r-- | block/blk.h | 33 | ||||
| -rw-r--r-- | block/bsg-lib.c | 2 | ||||
| -rw-r--r-- | block/elevator.c | 35 | ||||
| -rw-r--r-- | block/elevator.h | 2 | ||||
| -rw-r--r-- | block/genhd.c | 63 | ||||
| -rw-r--r-- | block/kyber-iosched.c | 2 | ||||
| -rw-r--r-- | block/mq-deadline.c | 2 | ||||
| -rw-r--r-- | block/partitions/ldm.h | 2 |
28 files changed, 442 insertions, 688 deletions
diff --git a/block/Makefile b/block/Makefile index ddfd21c1a9ff..33748123710b 100644 --- a/block/Makefile +++ b/block/Makefile @@ -27,8 +27,6 @@ bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o obj-$(CONFIG_IOSCHED_BFQ) += bfq.o obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o -obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o -obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o obj-$(CONFIG_BLK_WBT) += blk-wbt.o obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index cad16c163611..167542201603 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -7622,7 +7622,7 @@ static ssize_t bfq_low_latency_store(struct elevator_queue *e, #define BFQ_ATTR(name) \ __ATTR(name, 0644, bfq_##name##_show, bfq_##name##_store) -static struct elv_fs_entry bfq_attrs[] = { +static const struct elv_fs_entry bfq_attrs[] = { BFQ_ATTR(fifo_expire_sync), BFQ_ATTR(fifo_expire_async), BFQ_ATTR(back_seek_max), diff --git a/block/bio.c b/block/bio.c index d5bdc31d88d3..f0c416e5931d 100644 --- a/block/bio.c +++ b/block/bio.c @@ -946,8 +946,11 @@ static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page, /* * Try to merge a page into a segment, while obeying the hardware segment - * size limit. This is not for normal read/write bios, but for passthrough - * or Zone Append operations that we can't split. + * size limit. + * + * This is kept around for the integrity metadata, which is still tries + * to build the initial bio to the hardware limit and doesn't have proper + * helpers to split. Hopefully this will go away soon. */ bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv, struct page *page, unsigned len, unsigned offset, @@ -965,106 +968,6 @@ bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv, } /** - * bio_add_hw_page - attempt to add a page to a bio with hw constraints - * @q: the target queue - * @bio: destination bio - * @page: page to add - * @len: vec entry length - * @offset: vec entry offset - * @max_sectors: maximum number of sectors that can be added - * @same_page: return if the segment has been merged inside the same page - * - * Add a page to a bio while respecting the hardware max_sectors, max_segment - * and gap limitations. - */ -int bio_add_hw_page(struct request_queue *q, struct bio *bio, - struct page *page, unsigned int len, unsigned int offset, - unsigned int max_sectors, bool *same_page) -{ - unsigned int max_size = max_sectors << SECTOR_SHIFT; - - if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) - return 0; - - len = min3(len, max_size, queue_max_segment_size(q)); - if (len > max_size - bio->bi_iter.bi_size) - return 0; - - if (bio->bi_vcnt > 0) { - struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; - - if (bvec_try_merge_hw_page(q, bv, page, len, offset, - same_page)) { - bio->bi_iter.bi_size += len; - return len; - } - - if (bio->bi_vcnt >= - min(bio->bi_max_vecs, queue_max_segments(q))) - return 0; - - /* - * If the queue doesn't support SG gaps and adding this segment - * would create a gap, disallow it. - */ - if (bvec_gap_to_prev(&q->limits, bv, offset)) - return 0; - } - - bvec_set_page(&bio->bi_io_vec[bio->bi_vcnt], page, len, offset); - bio->bi_vcnt++; - bio->bi_iter.bi_size += len; - return len; -} - -/** - * bio_add_hw_folio - attempt to add a folio to a bio with hw constraints - * @q: the target queue - * @bio: destination bio - * @folio: folio to add - * @len: vec entry length - * @offset: vec entry offset in the folio - * @max_sectors: maximum number of sectors that can be added - * @same_page: return if the segment has been merged inside the same folio - * - * Add a folio to a bio while respecting the hardware max_sectors, max_segment - * and gap limitations. - */ -int bio_add_hw_folio(struct request_queue *q, struct bio *bio, - struct folio *folio, size_t len, size_t offset, - unsigned int max_sectors, bool *same_page) -{ - if (len > UINT_MAX || offset > UINT_MAX) - return 0; - return bio_add_hw_page(q, bio, folio_page(folio, 0), len, offset, - max_sectors, same_page); -} - -/** - * bio_add_pc_page - attempt to add page to passthrough bio - * @q: the target queue - * @bio: destination bio - * @page: page to add - * @len: vec entry length - * @offset: vec entry offset - * - * Attempt to add a page to the bio_vec maplist. This can fail for a - * number of reasons, such as the bio being full or target block device - * limitations. The target block device must allow bio's up to PAGE_SIZE, - * so it is always possible to add a single page to an empty bio. - * - * This should only be used by passthrough bios. - */ -int bio_add_pc_page(struct request_queue *q, struct bio *bio, - struct page *page, unsigned int len, unsigned int offset) -{ - bool same_page = false; - return bio_add_hw_page(q, bio, page, len, offset, - queue_max_hw_sectors(q), &same_page); -} -EXPORT_SYMBOL(bio_add_pc_page); - -/** * __bio_add_page - add page(s) to a bio in a new segment * @bio: destination bio * @page: start page to add @@ -1707,6 +1610,10 @@ EXPORT_SYMBOL(bio_split); */ void bio_trim(struct bio *bio, sector_t offset, sector_t size) { + /* We should never trim an atomic write */ + if (WARN_ON_ONCE(bio->bi_opf & REQ_ATOMIC && size)) + return; + if (WARN_ON_ONCE(offset > BIO_MAX_SECTORS || size > BIO_MAX_SECTORS || offset + size > bio_sectors(bio))) return; diff --git a/block/blk-cgroup-rwstat.h b/block/blk-cgroup-rwstat.h index 022527b0b043..703a16fe1404 100644 --- a/block/blk-cgroup-rwstat.h +++ b/block/blk-cgroup-rwstat.h @@ -52,7 +52,7 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, /** * blkg_rwstat_add - add a value to a blkg_rwstat * @rwstat: target blkg_rwstat - * @op: REQ_OP and flags + * @opf: REQ_OP and flags * @val: value to add * * Add @val to @rwstat. The counters are chosen according to @rw. The @@ -83,8 +83,9 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, /** * blkg_rwstat_read - read the current values of a blkg_rwstat * @rwstat: blkg_rwstat to read + * @result: where to put the current values * - * Read the current snapshot of @rwstat and return it in the aux counts. + * Read the current snapshot of @rwstat and return it in the @result counts. */ static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat, struct blkg_rwstat_sample *result) diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index b9e3265c1eb3..2c4663bd993a 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -225,7 +225,9 @@ void blkg_conf_exit(struct blkg_conf_ctx *ctx); /** * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg - * @return: true if this bio needs to be submitted with the root blkg context. + * @bio: the target &bio + * + * Return: true if this bio needs to be submitted with the root blkg context. * * In order to avoid priority inversions we sometimes need to issue a bio as if * it were attached to the root blkg, and then backcharge to the actual owning @@ -245,7 +247,7 @@ static inline bool bio_issue_as_root_blkg(struct bio *bio) * @q: request_queue of interest * * Lookup blkg for the @blkcg - @q pair. - + * * Must be called in a RCU critical section. */ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, @@ -268,7 +270,7 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, } /** - * blkg_to_pdata - get policy private data + * blkg_to_pd - get policy private data * @blkg: blkg of interest * @pol: policy of interest * @@ -287,7 +289,7 @@ static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, } /** - * pdata_to_blkg - get blkg associated with policy private data + * pd_to_blkg - get blkg associated with policy private data * @pd: policy private data of interest * * @pd is policy private data. Determine the blkg it's associated with. diff --git a/block/blk-core.c b/block/blk-core.c index 666efe8fa202..32fb28a6372c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -629,8 +629,14 @@ static void __submit_bio(struct bio *bio) blk_mq_submit_bio(bio); } else if (likely(bio_queue_enter(bio) == 0)) { struct gendisk *disk = bio->bi_bdev->bd_disk; - - disk->fops->submit_bio(bio); + + if ((bio->bi_opf & REQ_POLLED) && + !(disk->queue->limits.features & BLK_FEAT_POLL)) { + bio->bi_status = BLK_STS_NOTSUPP; + bio_endio(bio); + } else { + disk->fops->submit_bio(bio); + } blk_queue_exit(disk->queue); } @@ -805,12 +811,6 @@ void submit_bio_noacct(struct bio *bio) } } - if (!(q->limits.features & BLK_FEAT_POLL) && - (bio->bi_opf & REQ_POLLED)) { - bio_clear_polled(bio); - goto not_supported; - } - switch (bio_op(bio)) { case REQ_OP_READ: break; @@ -935,7 +935,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) return 0; q = bdev_get_queue(bdev); - if (cookie == BLK_QC_T_NONE || !(q->limits.features & BLK_FEAT_POLL)) + if (cookie == BLK_QC_T_NONE) return 0; blk_flush_plug(current->plug, false); @@ -956,7 +956,8 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) } else { struct gendisk *disk = q->disk; - if (disk && disk->fops->poll_bio) + if ((q->limits.features & BLK_FEAT_POLL) && disk && + disk->fops->poll_bio) ret = disk->fops->poll_bio(bio, iob, flags); } blk_queue_exit(q); diff --git a/block/blk-integrity.c b/block/blk-integrity.c index b180cac61a9d..013469faa5e7 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -218,9 +218,7 @@ static ssize_t flag_store(struct device *dev, const char *page, size_t count, else lim.integrity.flags |= flag; - blk_mq_freeze_queue(q); - err = queue_limits_commit_update(q, &lim); - blk_mq_unfreeze_queue(q); + err = queue_limits_commit_update_frozen(q, &lim); if (err) return err; return count; diff --git a/block/blk-map.c b/block/blk-map.c index 894009b2d881..d2f22744b3d1 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -189,7 +189,7 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, } } - if (bio_add_pc_page(rq->q, bio, page, bytes, offset) < bytes) { + if (bio_add_page(bio, page, bytes, offset) < bytes) { if (!map_data) __free_page(page); break; @@ -272,86 +272,27 @@ static struct bio *blk_rq_map_bio_alloc(struct request *rq, static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, gfp_t gfp_mask) { - iov_iter_extraction_t extraction_flags = 0; - unsigned int max_sectors = queue_max_hw_sectors(rq->q); unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS); struct bio *bio; int ret; - int j; if (!iov_iter_count(iter)) return -EINVAL; bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask); - if (bio == NULL) + if (!bio) return -ENOMEM; - - if (blk_queue_pci_p2pdma(rq->q)) - extraction_flags |= ITER_ALLOW_P2PDMA; - if (iov_iter_extract_will_pin(iter)) - bio_set_flag(bio, BIO_PAGE_PINNED); - - while (iov_iter_count(iter)) { - struct page *stack_pages[UIO_FASTIOV]; - struct page **pages = stack_pages; - ssize_t bytes; - size_t offs; - int npages; - - if (nr_vecs > ARRAY_SIZE(stack_pages)) - pages = NULL; - - bytes = iov_iter_extract_pages(iter, &pages, LONG_MAX, - nr_vecs, extraction_flags, &offs); - if (unlikely(bytes <= 0)) { - ret = bytes ? bytes : -EFAULT; - goto out_unmap; - } - - npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); - - if (unlikely(offs & queue_dma_alignment(rq->q))) - j = 0; - else { - for (j = 0; j < npages; j++) { - struct page *page = pages[j]; - unsigned int n = PAGE_SIZE - offs; - bool same_page = false; - - if (n > bytes) - n = bytes; - - if (!bio_add_hw_page(rq->q, bio, page, n, offs, - max_sectors, &same_page)) - break; - - if (same_page) - bio_release_page(bio, page); - bytes -= n; - offs = 0; - } - } - /* - * release the pages we didn't map into the bio, if any - */ - while (j < npages) - bio_release_page(bio, pages[j++]); - if (pages != stack_pages) - kvfree(pages); - /* couldn't stuff something into bio? */ - if (bytes) { - iov_iter_revert(iter, bytes); - break; - } - } - + ret = bio_iov_iter_get_pages(bio, iter); + if (ret) + goto out_put; ret = blk_rq_append_bio(rq, bio); if (ret) - goto out_unmap; + goto out_release; return 0; - out_unmap: +out_release: bio_release_pages(bio, false); +out_put: blk_mq_map_bio_put(bio); return ret; } @@ -422,8 +363,7 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data, page = virt_to_page(data); else page = vmalloc_to_page(data); - if (bio_add_pc_page(q, bio, page, bytes, - offset) < bytes) { + if (bio_add_page(bio, page, bytes, offset) < bytes) { /* we don't support partial mappings */ bio_uninit(bio); kfree(bio); @@ -507,7 +447,7 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data, if (!reading) memcpy(page_address(page), p, bytes); - if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) + if (bio_add_page(bio, page, bytes, 0) < bytes) break; len -= bytes; @@ -536,24 +476,33 @@ cleanup: */ int blk_rq_append_bio(struct request *rq, struct bio *bio) { - struct bvec_iter iter; - struct bio_vec bv; + const struct queue_limits *lim = &rq->q->limits; + unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT; unsigned int nr_segs = 0; + int ret; - bio_for_each_bvec(bv, bio, iter) - nr_segs++; + /* check that the data layout matches the hardware restrictions */ + ret = bio_split_rw_at(bio, lim, &nr_segs, max_bytes); + if (ret) { + /* if we would have to split the bio, copy instead */ + if (ret > 0) + ret = -EREMOTEIO; + return ret; + } - if (!rq->bio) { - blk_rq_bio_prep(rq, bio, nr_segs); - } else { + if (rq->bio) { if (!ll_back_merge_fn(rq, bio, nr_segs)) return -EINVAL; rq->biotail->bi_next = bio; rq->biotail = bio; - rq->__data_len += (bio)->bi_iter.bi_size; + rq->__data_len += bio->bi_iter.bi_size; bio_crypt_free_ctx(bio); + return 0; } + rq->nr_phys_segments = nr_segs; + rq->bio = rq->biotail = bio; + rq->__data_len = bio->bi_iter.bi_size; return 0; } EXPORT_SYMBOL(blk_rq_append_bio); @@ -561,9 +510,7 @@ EXPORT_SYMBOL(blk_rq_append_bio); /* Prepare bio for passthrough IO given ITER_BVEC iter */ static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter) { - const struct queue_limits *lim = &rq->q->limits; - unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT; - unsigned int nsegs; + unsigned int max_bytes = rq->q->limits.max_hw_sectors << SECTOR_SHIFT; struct bio *bio; int ret; @@ -576,18 +523,10 @@ static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter) return -ENOMEM; bio_iov_bvec_set(bio, iter); - /* check that the data layout matches the hardware restrictions */ - ret = bio_split_rw_at(bio, lim, &nsegs, max_bytes); - if (ret) { - /* if we would have to split the bio, copy instead */ - if (ret > 0) - ret = -EREMOTEIO; + ret = blk_rq_append_bio(rq, bio); + if (ret) blk_mq_map_bio_put(bio); - return ret; - } - - blk_rq_bio_prep(rq, bio, nsegs); - return 0; + return ret; } /** @@ -644,8 +583,11 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, ret = bio_copy_user_iov(rq, map_data, &i, gfp_mask); else ret = bio_map_user_iov(rq, &i, gfp_mask); - if (ret) + if (ret) { + if (ret == -EREMOTEIO) + ret = -EINVAL; goto unmap_rq; + } if (!bio) bio = rq->bio; } while (iov_iter_count(&i)); diff --git a/block/blk-merge.c b/block/blk-merge.c index e01383c6e534..15cd231d560c 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -473,137 +473,100 @@ unsigned int blk_recalc_rq_segments(struct request *rq) return nr_phys_segs; } -static inline struct scatterlist *blk_next_sg(struct scatterlist **sg, - struct scatterlist *sglist) -{ - if (!*sg) - return sglist; +struct phys_vec { + phys_addr_t paddr; + u32 len; +}; - /* - * If the driver previously mapped a shorter list, we could see a - * termination bit prematurely unless it fully inits the sg table - * on each mapping. We KNOW that there must be more entries here - * or the driver would be buggy, so force clear the termination bit - * to avoid doing a full sg_init_table() in drivers for each command. - */ - sg_unmark_end(*sg); - return sg_next(*sg); -} - -static unsigned blk_bvec_map_sg(struct request_queue *q, - struct bio_vec *bvec, struct scatterlist *sglist, - struct scatterlist **sg) +static bool blk_map_iter_next(struct request *req, + struct req_iterator *iter, struct phys_vec *vec) { - unsigned nbytes = bvec->bv_len; - unsigned nsegs = 0, total = 0; - - while (nbytes > 0) { - unsigned offset = bvec->bv_offset + total; - unsigned len = get_max_segment_size(&q->limits, - bvec_phys(bvec) + total, nbytes); - struct page *page = bvec->bv_page; - - /* - * Unfortunately a fair number of drivers barf on scatterlists - * that have an offset larger than PAGE_SIZE, despite other - * subsystems dealing with that invariant just fine. For now - * stick to the legacy format where we never present those from - * the block layer, but the code below should be removed once - * these offenders (mostly MMC/SD drivers) are fixed. - */ - page += (offset >> PAGE_SHIFT); - offset &= ~PAGE_MASK; - - *sg = blk_next_sg(sg, sglist); - sg_set_page(*sg, page, len, offset); + unsigned int max_size; + struct bio_vec bv; - total += len; - nbytes -= len; - nsegs++; + if (req->rq_flags & RQF_SPECIAL_PAYLOAD) { + if (!iter->bio) + return false; + vec->paddr = bvec_phys(&req->special_vec); + vec->len = req->special_vec.bv_len; + iter->bio = NULL; + return true; } - return nsegs; -} - -static inline int __blk_bvec_map_sg(struct bio_vec bv, - struct scatterlist *sglist, struct scatterlist **sg) -{ - *sg = blk_next_sg(sg, sglist); - sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset); - return 1; -} - -/* only try to merge bvecs into one sg if they are from two bios */ -static inline bool -__blk_segment_map_sg_merge(struct request_queue *q, struct bio_vec *bvec, - struct bio_vec *bvprv, struct scatterlist **sg) -{ - - int nbytes = bvec->bv_len; - - if (!*sg) + if (!iter->iter.bi_size) return false; - if ((*sg)->length + nbytes > queue_max_segment_size(q)) - return false; + bv = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter); + vec->paddr = bvec_phys(&bv); + max_size = get_max_segment_size(&req->q->limits, vec->paddr, UINT_MAX); + bv.bv_len = min(bv.bv_len, max_size); + bio_advance_iter_single(iter->bio, &iter->iter, bv.bv_len); - if (!biovec_phys_mergeable(q, bvprv, bvec)) - return false; + /* + * If we are entirely done with this bi_io_vec entry, check if the next + * one could be merged into it. This typically happens when moving to + * the next bio, but some callers also don't pack bvecs tight. + */ + while (!iter->iter.bi_size || !iter->iter.bi_bvec_done) { + struct bio_vec next; + + if (!iter->iter.bi_size) { + if (!iter->bio->bi_next) + break; + iter->bio = iter->bio->bi_next; + iter->iter = iter->bio->bi_iter; + } - (*sg)->length += nbytes; + next = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter); + if (bv.bv_len + next.bv_len > max_size || + !biovec_phys_mergeable(req->q, &bv, &next)) + break; + + bv.bv_len += next.bv_len; + bio_advance_iter_single(iter->bio, &iter->iter, next.bv_len); + } + vec->len = bv.bv_len; return true; } -static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, - struct scatterlist *sglist, - struct scatterlist **sg) +static inline struct scatterlist *blk_next_sg(struct scatterlist **sg, + struct scatterlist *sglist) { - struct bio_vec bvec, bvprv = { NULL }; - struct bvec_iter iter; - int nsegs = 0; - bool new_bio = false; - - for_each_bio(bio) { - bio_for_each_bvec(bvec, bio, iter) { - /* - * Only try to merge bvecs from two bios given we - * have done bio internal merge when adding pages - * to bio - */ - if (new_bio && - __blk_segment_map_sg_merge(q, &bvec, &bvprv, sg)) - goto next_bvec; - - if (bvec.bv_offset + bvec.bv_len <= PAGE_SIZE) - nsegs += __blk_bvec_map_sg(bvec, sglist, sg); - else - nsegs += blk_bvec_map_sg(q, &bvec, sglist, sg); - next_bvec: - new_bio = false; - } - if (likely(bio->bi_iter.bi_size)) { - bvprv = bvec; - new_bio = true; - } - } + if (!*sg) + return sglist; - return nsegs; + /* + * If the driver previously mapped a shorter list, we could see a + * termination bit prematurely unless it fully inits the sg table + * on each mapping. We KNOW that there must be more entries here + * or the driver would be buggy, so force clear the termination bit + * to avoid doing a full sg_init_table() in drivers for each command. + */ + sg_unmark_end(*sg); + return sg_next(*sg); } /* - * map a request to scatterlist, return number of sg entries setup. Caller - * must make sure sg can hold rq->nr_phys_segments entries + * Map a request to scatterlist, return number of sg entries setup. Caller + * must make sure sg can hold rq->nr_phys_segments entries. */ int __blk_rq_map_sg(struct request_queue *q, struct request *rq, struct scatterlist *sglist, struct scatterlist **last_sg) { + struct req_iterator iter = { + .bio = rq->bio, + .iter = rq->bio->bi_iter, + }; + struct phys_vec vec; int nsegs = 0; - if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) - nsegs = __blk_bvec_map_sg(rq->special_vec, sglist, last_sg); - else if (rq->bio) - nsegs = __blk_bios_map_sg(q, rq->bio, sglist, last_sg); + while (blk_map_iter_next(rq, &iter, &vec)) { + *last_sg = blk_next_sg(last_sg, sglist); + sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len, + offset_in_page(vec.paddr)); + nsegs++; + } if (*last_sg) sg_mark_end(*last_sg); diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 9638b25fd521..ad8d6a363f24 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c @@ -11,6 +11,7 @@ #include <linux/smp.h> #include <linux/cpu.h> #include <linux/group_cpus.h> +#include <linux/device/bus.h> #include "blk.h" #include "blk-mq.h" @@ -54,3 +55,39 @@ int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int index) return NUMA_NO_NODE; } + +/** + * blk_mq_map_hw_queues - Create CPU to hardware queue mapping + * @qmap: CPU to hardware queue map + * @dev: The device to map queues + * @offset: Queue offset to use for the device + * + * Create a CPU to hardware queue mapping in @qmap. The struct bus_type + * irq_get_affinity callback will be used to retrieve the affinity. + */ +void blk_mq_map_hw_queues(struct blk_mq_queue_map *qmap, + struct device *dev, unsigned int offset) + +{ + const struct cpumask *mask; + unsigned int queue, cpu; + + if (!dev->bus->irq_get_affinity) + goto fallback; + + for (queue = 0; queue < qmap->nr_queues; queue++) { + mask = dev->bus->irq_get_affinity(dev, queue + offset); + if (!mask) + goto fallback; + + for_each_cpu(cpu, mask) + qmap->mq_map[cpu] = qmap->queue_offset + queue; + } + + return; + +fallback: + WARN_ON_ONCE(qmap->nr_queues > 1); + blk_mq_clear_mq_map(qmap); +} +EXPORT_SYMBOL_GPL(blk_mq_map_hw_queues); diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 5463697a8442..adf5f0697b6b 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -172,21 +172,13 @@ static int hctx_state_show(void *data, struct seq_file *m) return 0; } -#define BLK_TAG_ALLOC_NAME(name) [BLK_TAG_ALLOC_##name] = #name -static const char *c |
