summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-01-20 19:38:46 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-01-20 19:38:46 -0800
commit1cbfb828e05171ca2dd77b5988d068e6872480fe (patch)
treebfb33c9ad8840908058649ba2e261bdb7e5f7ee9 /block
parent3d3a9c8b89d4f8a3785e06ffd15405c670696f02 (diff)
parent554b22864cc79e28cd65e3a6e1d0d1dfa8581c68 (diff)
downloadlinux-1cbfb828e05171ca2dd77b5988d068e6872480fe.tar.gz
linux-1cbfb828e05171ca2dd77b5988d068e6872480fe.tar.bz2
linux-1cbfb828e05171ca2dd77b5988d068e6872480fe.zip
Merge tag 'for-6.14/block-20250118' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: - NVMe pull requests via Keith: - Target support for PCI-Endpoint transport (Damien) - TCP IO queue spreading fixes (Sagi, Chaitanya) - Target handling for "limited retry" flags (Guixen) - Poll type fix (Yongsoo) - Xarray storage error handling (Keisuke) - Host memory buffer free size fix on error (Francis) - MD pull requests via Song: - Reintroduce md-linear (Yu Kuai) - md-bitmap refactor and fix (Yu Kuai) - Replace kmap_atomic with kmap_local_page (David Reaver) - Quite a few queue freeze and debugfs deadlock fixes Ming introduced lockdep support for this in the 6.13 kernel, and it has (unsurprisingly) uncovered quite a few issues - Use const attributes for IO schedulers - Remove bio ioprio wrappers - Fixes for stacked device atomic write support - Refactor queue affinity helpers, in preparation for better supporting isolated CPUs - Cleanups of loop O_DIRECT handling - Cleanup of BLK_MQ_F_* flags - Add rotational support for null_blk - Various fixes and cleanups * tag 'for-6.14/block-20250118' of git://git.kernel.dk/linux: (106 commits) block: Don't trim an atomic write block: Add common atomic writes enable flag md/md-linear: Fix a NULL vs IS_ERR() bug in linear_add() block: limit disk max sectors to (LLONG_MAX >> 9) block: Change blk_stack_atomic_writes_limits() unit_min check block: Ensure start sector is aligned for stacking atomic writes blk-mq: Move more error handling into blk_mq_submit_bio() block: Reorder the request allocation code in blk_mq_submit_bio() nvme: fix bogus kzalloc() return check in nvme_init_effects_log() md/md-bitmap: move bitmap_{start, end}write to md upper layer md/raid5: implement pers->bitmap_sector() md: add a new callback pers->bitmap_sector() md/md-bitmap: remove the last parameter for bimtap_ops->endwrite() md/md-bitmap: factor behind write counters out from bitmap_{start/end}write() md: Replace deprecated kmap_atomic() with kmap_local_page() md: reintroduce md-linear partitions: ldm: remove the initial kernel-doc notation blk-cgroup: rwstat: fix kernel-doc warnings in header file blk-cgroup: fix kernel-doc warnings in header file nbd: fix partial sending ...
Diffstat (limited to 'block')
-rw-r--r--block/Makefile2
-rw-r--r--block/bfq-iosched.c2
-rw-r--r--block/bio.c111
-rw-r--r--block/blk-cgroup-rwstat.h5
-rw-r--r--block/blk-cgroup.h10
-rw-r--r--block/blk-core.c21
-rw-r--r--block/blk-integrity.c4
-rw-r--r--block/blk-map.c128
-rw-r--r--block/blk-merge.c177
-rw-r--r--block/blk-mq-cpumap.c37
-rw-r--r--block/blk-mq-debugfs.c27
-rw-r--r--block/blk-mq-pci.c46
-rw-r--r--block/blk-mq-sched.c3
-rw-r--r--block/blk-mq-tag.c41
-rw-r--r--block/blk-mq-virtio.c46
-rw-r--r--block/blk-mq.c71
-rw-r--r--block/blk-mq.h11
-rw-r--r--block/blk-settings.c42
-rw-r--r--block/blk-sysfs.c140
-rw-r--r--block/blk-zoned.c65
-rw-r--r--block/blk.h33
-rw-r--r--block/bsg-lib.c2
-rw-r--r--block/elevator.c35
-rw-r--r--block/elevator.h2
-rw-r--r--block/genhd.c63
-rw-r--r--block/kyber-iosched.c2
-rw-r--r--block/mq-deadline.c2
-rw-r--r--block/partitions/ldm.h2
28 files changed, 442 insertions, 688 deletions
diff --git a/block/Makefile b/block/Makefile
index ddfd21c1a9ff..33748123710b 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -27,8 +27,6 @@ bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
obj-$(CONFIG_IOSCHED_BFQ) += bfq.o
obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
-obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o
-obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o
obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o
obj-$(CONFIG_BLK_WBT) += blk-wbt.o
obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index cad16c163611..167542201603 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -7622,7 +7622,7 @@ static ssize_t bfq_low_latency_store(struct elevator_queue *e,
#define BFQ_ATTR(name) \
__ATTR(name, 0644, bfq_##name##_show, bfq_##name##_store)
-static struct elv_fs_entry bfq_attrs[] = {
+static const struct elv_fs_entry bfq_attrs[] = {
BFQ_ATTR(fifo_expire_sync),
BFQ_ATTR(fifo_expire_async),
BFQ_ATTR(back_seek_max),
diff --git a/block/bio.c b/block/bio.c
index d5bdc31d88d3..f0c416e5931d 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -946,8 +946,11 @@ static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page,
/*
* Try to merge a page into a segment, while obeying the hardware segment
- * size limit. This is not for normal read/write bios, but for passthrough
- * or Zone Append operations that we can't split.
+ * size limit.
+ *
+ * This is kept around for the integrity metadata, which is still tries
+ * to build the initial bio to the hardware limit and doesn't have proper
+ * helpers to split. Hopefully this will go away soon.
*/
bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
struct page *page, unsigned len, unsigned offset,
@@ -965,106 +968,6 @@ bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
}
/**
- * bio_add_hw_page - attempt to add a page to a bio with hw constraints
- * @q: the target queue
- * @bio: destination bio
- * @page: page to add
- * @len: vec entry length
- * @offset: vec entry offset
- * @max_sectors: maximum number of sectors that can be added
- * @same_page: return if the segment has been merged inside the same page
- *
- * Add a page to a bio while respecting the hardware max_sectors, max_segment
- * and gap limitations.
- */
-int bio_add_hw_page(struct request_queue *q, struct bio *bio,
- struct page *page, unsigned int len, unsigned int offset,
- unsigned int max_sectors, bool *same_page)
-{
- unsigned int max_size = max_sectors << SECTOR_SHIFT;
-
- if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
- return 0;
-
- len = min3(len, max_size, queue_max_segment_size(q));
- if (len > max_size - bio->bi_iter.bi_size)
- return 0;
-
- if (bio->bi_vcnt > 0) {
- struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
-
- if (bvec_try_merge_hw_page(q, bv, page, len, offset,
- same_page)) {
- bio->bi_iter.bi_size += len;
- return len;
- }
-
- if (bio->bi_vcnt >=
- min(bio->bi_max_vecs, queue_max_segments(q)))
- return 0;
-
- /*
- * If the queue doesn't support SG gaps and adding this segment
- * would create a gap, disallow it.
- */
- if (bvec_gap_to_prev(&q->limits, bv, offset))
- return 0;
- }
-
- bvec_set_page(&bio->bi_io_vec[bio->bi_vcnt], page, len, offset);
- bio->bi_vcnt++;
- bio->bi_iter.bi_size += len;
- return len;
-}
-
-/**
- * bio_add_hw_folio - attempt to add a folio to a bio with hw constraints
- * @q: the target queue
- * @bio: destination bio
- * @folio: folio to add
- * @len: vec entry length
- * @offset: vec entry offset in the folio
- * @max_sectors: maximum number of sectors that can be added
- * @same_page: return if the segment has been merged inside the same folio
- *
- * Add a folio to a bio while respecting the hardware max_sectors, max_segment
- * and gap limitations.
- */
-int bio_add_hw_folio(struct request_queue *q, struct bio *bio,
- struct folio *folio, size_t len, size_t offset,
- unsigned int max_sectors, bool *same_page)
-{
- if (len > UINT_MAX || offset > UINT_MAX)
- return 0;
- return bio_add_hw_page(q, bio, folio_page(folio, 0), len, offset,
- max_sectors, same_page);
-}
-
-/**
- * bio_add_pc_page - attempt to add page to passthrough bio
- * @q: the target queue
- * @bio: destination bio
- * @page: page to add
- * @len: vec entry length
- * @offset: vec entry offset
- *
- * Attempt to add a page to the bio_vec maplist. This can fail for a
- * number of reasons, such as the bio being full or target block device
- * limitations. The target block device must allow bio's up to PAGE_SIZE,
- * so it is always possible to add a single page to an empty bio.
- *
- * This should only be used by passthrough bios.
- */
-int bio_add_pc_page(struct request_queue *q, struct bio *bio,
- struct page *page, unsigned int len, unsigned int offset)
-{
- bool same_page = false;
- return bio_add_hw_page(q, bio, page, len, offset,
- queue_max_hw_sectors(q), &same_page);
-}
-EXPORT_SYMBOL(bio_add_pc_page);
-
-/**
* __bio_add_page - add page(s) to a bio in a new segment
* @bio: destination bio
* @page: start page to add
@@ -1707,6 +1610,10 @@ EXPORT_SYMBOL(bio_split);
*/
void bio_trim(struct bio *bio, sector_t offset, sector_t size)
{
+ /* We should never trim an atomic write */
+ if (WARN_ON_ONCE(bio->bi_opf & REQ_ATOMIC && size))
+ return;
+
if (WARN_ON_ONCE(offset > BIO_MAX_SECTORS || size > BIO_MAX_SECTORS ||
offset + size > bio_sectors(bio)))
return;
diff --git a/block/blk-cgroup-rwstat.h b/block/blk-cgroup-rwstat.h
index 022527b0b043..703a16fe1404 100644
--- a/block/blk-cgroup-rwstat.h
+++ b/block/blk-cgroup-rwstat.h
@@ -52,7 +52,7 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
/**
* blkg_rwstat_add - add a value to a blkg_rwstat
* @rwstat: target blkg_rwstat
- * @op: REQ_OP and flags
+ * @opf: REQ_OP and flags
* @val: value to add
*
* Add @val to @rwstat. The counters are chosen according to @rw. The
@@ -83,8 +83,9 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
/**
* blkg_rwstat_read - read the current values of a blkg_rwstat
* @rwstat: blkg_rwstat to read
+ * @result: where to put the current values
*
- * Read the current snapshot of @rwstat and return it in the aux counts.
+ * Read the current snapshot of @rwstat and return it in the @result counts.
*/
static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat,
struct blkg_rwstat_sample *result)
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index b9e3265c1eb3..2c4663bd993a 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -225,7 +225,9 @@ void blkg_conf_exit(struct blkg_conf_ctx *ctx);
/**
* bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
- * @return: true if this bio needs to be submitted with the root blkg context.
+ * @bio: the target &bio
+ *
+ * Return: true if this bio needs to be submitted with the root blkg context.
*
* In order to avoid priority inversions we sometimes need to issue a bio as if
* it were attached to the root blkg, and then backcharge to the actual owning
@@ -245,7 +247,7 @@ static inline bool bio_issue_as_root_blkg(struct bio *bio)
* @q: request_queue of interest
*
* Lookup blkg for the @blkcg - @q pair.
-
+ *
* Must be called in a RCU critical section.
*/
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
@@ -268,7 +270,7 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
}
/**
- * blkg_to_pdata - get policy private data
+ * blkg_to_pd - get policy private data
* @blkg: blkg of interest
* @pol: policy of interest
*
@@ -287,7 +289,7 @@ static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
}
/**
- * pdata_to_blkg - get blkg associated with policy private data
+ * pd_to_blkg - get blkg associated with policy private data
* @pd: policy private data of interest
*
* @pd is policy private data. Determine the blkg it's associated with.
diff --git a/block/blk-core.c b/block/blk-core.c
index 666efe8fa202..32fb28a6372c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -629,8 +629,14 @@ static void __submit_bio(struct bio *bio)
blk_mq_submit_bio(bio);
} else if (likely(bio_queue_enter(bio) == 0)) {
struct gendisk *disk = bio->bi_bdev->bd_disk;
-
- disk->fops->submit_bio(bio);
+
+ if ((bio->bi_opf & REQ_POLLED) &&
+ !(disk->queue->limits.features & BLK_FEAT_POLL)) {
+ bio->bi_status = BLK_STS_NOTSUPP;
+ bio_endio(bio);
+ } else {
+ disk->fops->submit_bio(bio);
+ }
blk_queue_exit(disk->queue);
}
@@ -805,12 +811,6 @@ void submit_bio_noacct(struct bio *bio)
}
}
- if (!(q->limits.features & BLK_FEAT_POLL) &&
- (bio->bi_opf & REQ_POLLED)) {
- bio_clear_polled(bio);
- goto not_supported;
- }
-
switch (bio_op(bio)) {
case REQ_OP_READ:
break;
@@ -935,7 +935,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
return 0;
q = bdev_get_queue(bdev);
- if (cookie == BLK_QC_T_NONE || !(q->limits.features & BLK_FEAT_POLL))
+ if (cookie == BLK_QC_T_NONE)
return 0;
blk_flush_plug(current->plug, false);
@@ -956,7 +956,8 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
} else {
struct gendisk *disk = q->disk;
- if (disk && disk->fops->poll_bio)
+ if ((q->limits.features & BLK_FEAT_POLL) && disk &&
+ disk->fops->poll_bio)
ret = disk->fops->poll_bio(bio, iob, flags);
}
blk_queue_exit(q);
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index b180cac61a9d..013469faa5e7 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -218,9 +218,7 @@ static ssize_t flag_store(struct device *dev, const char *page, size_t count,
else
lim.integrity.flags |= flag;
- blk_mq_freeze_queue(q);
- err = queue_limits_commit_update(q, &lim);
- blk_mq_unfreeze_queue(q);
+ err = queue_limits_commit_update_frozen(q, &lim);
if (err)
return err;
return count;
diff --git a/block/blk-map.c b/block/blk-map.c
index 894009b2d881..d2f22744b3d1 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -189,7 +189,7 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
}
}
- if (bio_add_pc_page(rq->q, bio, page, bytes, offset) < bytes) {
+ if (bio_add_page(bio, page, bytes, offset) < bytes) {
if (!map_data)
__free_page(page);
break;
@@ -272,86 +272,27 @@ static struct bio *blk_rq_map_bio_alloc(struct request *rq,
static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
gfp_t gfp_mask)
{
- iov_iter_extraction_t extraction_flags = 0;
- unsigned int max_sectors = queue_max_hw_sectors(rq->q);
unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS);
struct bio *bio;
int ret;
- int j;
if (!iov_iter_count(iter))
return -EINVAL;
bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask);
- if (bio == NULL)
+ if (!bio)
return -ENOMEM;
-
- if (blk_queue_pci_p2pdma(rq->q))
- extraction_flags |= ITER_ALLOW_P2PDMA;
- if (iov_iter_extract_will_pin(iter))
- bio_set_flag(bio, BIO_PAGE_PINNED);
-
- while (iov_iter_count(iter)) {
- struct page *stack_pages[UIO_FASTIOV];
- struct page **pages = stack_pages;
- ssize_t bytes;
- size_t offs;
- int npages;
-
- if (nr_vecs > ARRAY_SIZE(stack_pages))
- pages = NULL;
-
- bytes = iov_iter_extract_pages(iter, &pages, LONG_MAX,
- nr_vecs, extraction_flags, &offs);
- if (unlikely(bytes <= 0)) {
- ret = bytes ? bytes : -EFAULT;
- goto out_unmap;
- }
-
- npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
-
- if (unlikely(offs & queue_dma_alignment(rq->q)))
- j = 0;
- else {
- for (j = 0; j < npages; j++) {
- struct page *page = pages[j];
- unsigned int n = PAGE_SIZE - offs;
- bool same_page = false;
-
- if (n > bytes)
- n = bytes;
-
- if (!bio_add_hw_page(rq->q, bio, page, n, offs,
- max_sectors, &same_page))
- break;
-
- if (same_page)
- bio_release_page(bio, page);
- bytes -= n;
- offs = 0;
- }
- }
- /*
- * release the pages we didn't map into the bio, if any
- */
- while (j < npages)
- bio_release_page(bio, pages[j++]);
- if (pages != stack_pages)
- kvfree(pages);
- /* couldn't stuff something into bio? */
- if (bytes) {
- iov_iter_revert(iter, bytes);
- break;
- }
- }
-
+ ret = bio_iov_iter_get_pages(bio, iter);
+ if (ret)
+ goto out_put;
ret = blk_rq_append_bio(rq, bio);
if (ret)
- goto out_unmap;
+ goto out_release;
return 0;
- out_unmap:
+out_release:
bio_release_pages(bio, false);
+out_put:
blk_mq_map_bio_put(bio);
return ret;
}
@@ -422,8 +363,7 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data,
page = virt_to_page(data);
else
page = vmalloc_to_page(data);
- if (bio_add_pc_page(q, bio, page, bytes,
- offset) < bytes) {
+ if (bio_add_page(bio, page, bytes, offset) < bytes) {
/* we don't support partial mappings */
bio_uninit(bio);
kfree(bio);
@@ -507,7 +447,7 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data,
if (!reading)
memcpy(page_address(page), p, bytes);
- if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
+ if (bio_add_page(bio, page, bytes, 0) < bytes)
break;
len -= bytes;
@@ -536,24 +476,33 @@ cleanup:
*/
int blk_rq_append_bio(struct request *rq, struct bio *bio)
{
- struct bvec_iter iter;
- struct bio_vec bv;
+ const struct queue_limits *lim = &rq->q->limits;
+ unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT;
unsigned int nr_segs = 0;
+ int ret;
- bio_for_each_bvec(bv, bio, iter)
- nr_segs++;
+ /* check that the data layout matches the hardware restrictions */
+ ret = bio_split_rw_at(bio, lim, &nr_segs, max_bytes);
+ if (ret) {
+ /* if we would have to split the bio, copy instead */
+ if (ret > 0)
+ ret = -EREMOTEIO;
+ return ret;
+ }
- if (!rq->bio) {
- blk_rq_bio_prep(rq, bio, nr_segs);
- } else {
+ if (rq->bio) {
if (!ll_back_merge_fn(rq, bio, nr_segs))
return -EINVAL;
rq->biotail->bi_next = bio;
rq->biotail = bio;
- rq->__data_len += (bio)->bi_iter.bi_size;
+ rq->__data_len += bio->bi_iter.bi_size;
bio_crypt_free_ctx(bio);
+ return 0;
}
+ rq->nr_phys_segments = nr_segs;
+ rq->bio = rq->biotail = bio;
+ rq->__data_len = bio->bi_iter.bi_size;
return 0;
}
EXPORT_SYMBOL(blk_rq_append_bio);
@@ -561,9 +510,7 @@ EXPORT_SYMBOL(blk_rq_append_bio);
/* Prepare bio for passthrough IO given ITER_BVEC iter */
static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
{
- const struct queue_limits *lim = &rq->q->limits;
- unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT;
- unsigned int nsegs;
+ unsigned int max_bytes = rq->q->limits.max_hw_sectors << SECTOR_SHIFT;
struct bio *bio;
int ret;
@@ -576,18 +523,10 @@ static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
return -ENOMEM;
bio_iov_bvec_set(bio, iter);
- /* check that the data layout matches the hardware restrictions */
- ret = bio_split_rw_at(bio, lim, &nsegs, max_bytes);
- if (ret) {
- /* if we would have to split the bio, copy instead */
- if (ret > 0)
- ret = -EREMOTEIO;
+ ret = blk_rq_append_bio(rq, bio);
+ if (ret)
blk_mq_map_bio_put(bio);
- return ret;
- }
-
- blk_rq_bio_prep(rq, bio, nsegs);
- return 0;
+ return ret;
}
/**
@@ -644,8 +583,11 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
ret = bio_copy_user_iov(rq, map_data, &i, gfp_mask);
else
ret = bio_map_user_iov(rq, &i, gfp_mask);
- if (ret)
+ if (ret) {
+ if (ret == -EREMOTEIO)
+ ret = -EINVAL;
goto unmap_rq;
+ }
if (!bio)
bio = rq->bio;
} while (iov_iter_count(&i));
diff --git a/block/blk-merge.c b/block/blk-merge.c
index e01383c6e534..15cd231d560c 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -473,137 +473,100 @@ unsigned int blk_recalc_rq_segments(struct request *rq)
return nr_phys_segs;
}
-static inline struct scatterlist *blk_next_sg(struct scatterlist **sg,
- struct scatterlist *sglist)
-{
- if (!*sg)
- return sglist;
+struct phys_vec {
+ phys_addr_t paddr;
+ u32 len;
+};
- /*
- * If the driver previously mapped a shorter list, we could see a
- * termination bit prematurely unless it fully inits the sg table
- * on each mapping. We KNOW that there must be more entries here
- * or the driver would be buggy, so force clear the termination bit
- * to avoid doing a full sg_init_table() in drivers for each command.
- */
- sg_unmark_end(*sg);
- return sg_next(*sg);
-}
-
-static unsigned blk_bvec_map_sg(struct request_queue *q,
- struct bio_vec *bvec, struct scatterlist *sglist,
- struct scatterlist **sg)
+static bool blk_map_iter_next(struct request *req,
+ struct req_iterator *iter, struct phys_vec *vec)
{
- unsigned nbytes = bvec->bv_len;
- unsigned nsegs = 0, total = 0;
-
- while (nbytes > 0) {
- unsigned offset = bvec->bv_offset + total;
- unsigned len = get_max_segment_size(&q->limits,
- bvec_phys(bvec) + total, nbytes);
- struct page *page = bvec->bv_page;
-
- /*
- * Unfortunately a fair number of drivers barf on scatterlists
- * that have an offset larger than PAGE_SIZE, despite other
- * subsystems dealing with that invariant just fine. For now
- * stick to the legacy format where we never present those from
- * the block layer, but the code below should be removed once
- * these offenders (mostly MMC/SD drivers) are fixed.
- */
- page += (offset >> PAGE_SHIFT);
- offset &= ~PAGE_MASK;
-
- *sg = blk_next_sg(sg, sglist);
- sg_set_page(*sg, page, len, offset);
+ unsigned int max_size;
+ struct bio_vec bv;
- total += len;
- nbytes -= len;
- nsegs++;
+ if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
+ if (!iter->bio)
+ return false;
+ vec->paddr = bvec_phys(&req->special_vec);
+ vec->len = req->special_vec.bv_len;
+ iter->bio = NULL;
+ return true;
}
- return nsegs;
-}
-
-static inline int __blk_bvec_map_sg(struct bio_vec bv,
- struct scatterlist *sglist, struct scatterlist **sg)
-{
- *sg = blk_next_sg(sg, sglist);
- sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset);
- return 1;
-}
-
-/* only try to merge bvecs into one sg if they are from two bios */
-static inline bool
-__blk_segment_map_sg_merge(struct request_queue *q, struct bio_vec *bvec,
- struct bio_vec *bvprv, struct scatterlist **sg)
-{
-
- int nbytes = bvec->bv_len;
-
- if (!*sg)
+ if (!iter->iter.bi_size)
return false;
- if ((*sg)->length + nbytes > queue_max_segment_size(q))
- return false;
+ bv = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter);
+ vec->paddr = bvec_phys(&bv);
+ max_size = get_max_segment_size(&req->q->limits, vec->paddr, UINT_MAX);
+ bv.bv_len = min(bv.bv_len, max_size);
+ bio_advance_iter_single(iter->bio, &iter->iter, bv.bv_len);
- if (!biovec_phys_mergeable(q, bvprv, bvec))
- return false;
+ /*
+ * If we are entirely done with this bi_io_vec entry, check if the next
+ * one could be merged into it. This typically happens when moving to
+ * the next bio, but some callers also don't pack bvecs tight.
+ */
+ while (!iter->iter.bi_size || !iter->iter.bi_bvec_done) {
+ struct bio_vec next;
+
+ if (!iter->iter.bi_size) {
+ if (!iter->bio->bi_next)
+ break;
+ iter->bio = iter->bio->bi_next;
+ iter->iter = iter->bio->bi_iter;
+ }
- (*sg)->length += nbytes;
+ next = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter);
+ if (bv.bv_len + next.bv_len > max_size ||
+ !biovec_phys_mergeable(req->q, &bv, &next))
+ break;
+
+ bv.bv_len += next.bv_len;
+ bio_advance_iter_single(iter->bio, &iter->iter, next.bv_len);
+ }
+ vec->len = bv.bv_len;
return true;
}
-static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
- struct scatterlist *sglist,
- struct scatterlist **sg)
+static inline struct scatterlist *blk_next_sg(struct scatterlist **sg,
+ struct scatterlist *sglist)
{
- struct bio_vec bvec, bvprv = { NULL };
- struct bvec_iter iter;
- int nsegs = 0;
- bool new_bio = false;
-
- for_each_bio(bio) {
- bio_for_each_bvec(bvec, bio, iter) {
- /*
- * Only try to merge bvecs from two bios given we
- * have done bio internal merge when adding pages
- * to bio
- */
- if (new_bio &&
- __blk_segment_map_sg_merge(q, &bvec, &bvprv, sg))
- goto next_bvec;
-
- if (bvec.bv_offset + bvec.bv_len <= PAGE_SIZE)
- nsegs += __blk_bvec_map_sg(bvec, sglist, sg);
- else
- nsegs += blk_bvec_map_sg(q, &bvec, sglist, sg);
- next_bvec:
- new_bio = false;
- }
- if (likely(bio->bi_iter.bi_size)) {
- bvprv = bvec;
- new_bio = true;
- }
- }
+ if (!*sg)
+ return sglist;
- return nsegs;
+ /*
+ * If the driver previously mapped a shorter list, we could see a
+ * termination bit prematurely unless it fully inits the sg table
+ * on each mapping. We KNOW that there must be more entries here
+ * or the driver would be buggy, so force clear the termination bit
+ * to avoid doing a full sg_init_table() in drivers for each command.
+ */
+ sg_unmark_end(*sg);
+ return sg_next(*sg);
}
/*
- * map a request to scatterlist, return number of sg entries setup. Caller
- * must make sure sg can hold rq->nr_phys_segments entries
+ * Map a request to scatterlist, return number of sg entries setup. Caller
+ * must make sure sg can hold rq->nr_phys_segments entries.
*/
int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
struct scatterlist *sglist, struct scatterlist **last_sg)
{
+ struct req_iterator iter = {
+ .bio = rq->bio,
+ .iter = rq->bio->bi_iter,
+ };
+ struct phys_vec vec;
int nsegs = 0;
- if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
- nsegs = __blk_bvec_map_sg(rq->special_vec, sglist, last_sg);
- else if (rq->bio)
- nsegs = __blk_bios_map_sg(q, rq->bio, sglist, last_sg);
+ while (blk_map_iter_next(rq, &iter, &vec)) {
+ *last_sg = blk_next_sg(last_sg, sglist);
+ sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len,
+ offset_in_page(vec.paddr));
+ nsegs++;
+ }
if (*last_sg)
sg_mark_end(*last_sg);
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 9638b25fd521..ad8d6a363f24 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -11,6 +11,7 @@
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/group_cpus.h>
+#include <linux/device/bus.h>
#include "blk.h"
#include "blk-mq.h"
@@ -54,3 +55,39 @@ int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int index)
return NUMA_NO_NODE;
}
+
+/**
+ * blk_mq_map_hw_queues - Create CPU to hardware queue mapping
+ * @qmap: CPU to hardware queue map
+ * @dev: The device to map queues
+ * @offset: Queue offset to use for the device
+ *
+ * Create a CPU to hardware queue mapping in @qmap. The struct bus_type
+ * irq_get_affinity callback will be used to retrieve the affinity.
+ */
+void blk_mq_map_hw_queues(struct blk_mq_queue_map *qmap,
+ struct device *dev, unsigned int offset)
+
+{
+ const struct cpumask *mask;
+ unsigned int queue, cpu;
+
+ if (!dev->bus->irq_get_affinity)
+ goto fallback;
+
+ for (queue = 0; queue < qmap->nr_queues; queue++) {
+ mask = dev->bus->irq_get_affinity(dev, queue + offset);
+ if (!mask)
+ goto fallback;
+
+ for_each_cpu(cpu, mask)
+ qmap->mq_map[cpu] = qmap->queue_offset + queue;
+ }
+
+ return;
+
+fallback:
+ WARN_ON_ONCE(qmap->nr_queues > 1);
+ blk_mq_clear_mq_map(qmap);
+}
+EXPORT_SYMBOL_GPL(blk_mq_map_hw_queues);
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 5463697a8442..adf5f0697b6b 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -172,21 +172,13 @@ static int hctx_state_show(void *data, struct seq_file *m)
return 0;
}
-#define BLK_TAG_ALLOC_NAME(name) [BLK_TAG_ALLOC_##name] = #name
-static const char *c