diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-05-26 11:39:36 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-05-26 11:39:36 -0700 |
| commit | 6f59de9bc0d576eb5a5edfea470527902315e924 (patch) | |
| tree | eeb477da0caa8e9569074f448169fb792fac90d9 /block | |
| parent | 3e406741b19890c3d8a2ed126aa7c23b106ca9e1 (diff) | |
| parent | 533c87e2ed742454957f14d7bef9f48d5a72e72d (diff) | |
| download | linux-6f59de9bc0d576eb5a5edfea470527902315e924.tar.gz linux-6f59de9bc0d576eb5a5edfea470527902315e924.tar.bz2 linux-6f59de9bc0d576eb5a5edfea470527902315e924.zip | |
Merge tag 'for-6.16/block-20250523' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- ublk updates:
- Add support for updating the size of a ublk instance
- Zero-copy improvements
- Auto-registering of buffers for zero-copy
- Series simplifying and improving GET_DATA and request lookup
- Series adding quiesce support
- Lots of selftests additions
- Various cleanups
- NVMe updates via Christoph:
- add per-node DMA pools and use them for PRP/SGL allocations
(Caleb Sander Mateos, Keith Busch)
- nvme-fcloop refcounting fixes (Daniel Wagner)
- support delayed removal of the multipath node and optionally
support the multipath node for private namespaces (Nilay Shroff)
- support shared CQs in the PCI endpoint target code (Wilfred
Mallawa)
- support admin-queue only authentication (Hannes Reinecke)
- use the crc32c library instead of the crypto API (Eric Biggers)
- misc cleanups (Christoph Hellwig, Marcelo Moreira, Hannes
Reinecke, Leon Romanovsky, Gustavo A. R. Silva)
- MD updates via Yu:
- Fix that normal IO can be starved by sync IO, found by mkfs on
newly created large raid5, with some clean up patches for bdev
inflight counters
- Clean up brd, getting rid of atomic kmaps and bvec poking
- Add loop driver specifically for zoned IO testing
- Eliminate blk-rq-qos calls with a static key, if not enabled
- Improve hctx locking for when a plug has IO for multiple queues
pending
- Remove block layer bouncing support, which in turn means we can
remove the per-node bounce stat as well
- Improve blk-throttle support
- Improve delay support for blk-throttle
- Improve brd discard support
- Unify IO scheduler switching. This should also fix a bunch of lockdep
warnings we've been seeing, after enabling lockdep support for queue
freezing/unfreezeing
- Add support for block write streams via FDP (flexible data placement)
on NVMe
- Add a bunch of block helpers, facilitating the removal of a bunch of
duplicated boilerplate code
- Remove obsolete BLK_MQ pci and virtio Kconfig options
- Add atomic/untorn write support to blktrace
- Various little cleanups and fixes
* tag 'for-6.16/block-20250523' of git://git.kernel.dk/linux: (186 commits)
selftests: ublk: add test for UBLK_F_QUIESCE
ublk: add feature UBLK_F_QUIESCE
selftests: ublk: add test case for UBLK_U_CMD_UPDATE_SIZE
traceevent/block: Add REQ_ATOMIC flag to block trace events
ublk: run auto buf unregisgering in same io_ring_ctx with registering
io_uring: add helper io_uring_cmd_ctx_handle()
ublk: remove io argument from ublk_auto_buf_reg_fallback()
ublk: handle ublk_set_auto_buf_reg() failure correctly in ublk_fetch()
selftests: ublk: add test for covering UBLK_AUTO_BUF_REG_FALLBACK
selftests: ublk: support UBLK_F_AUTO_BUF_REG
ublk: support UBLK_AUTO_BUF_REG_FALLBACK
ublk: register buffer to local io_uring with provided buf index via UBLK_F_AUTO_BUF_REG
ublk: prepare for supporting to register request buffer automatically
ublk: convert to refcount_t
selftests: ublk: make IO & device removal test more stressful
nvme: rename nvme_mpath_shutdown_disk to nvme_mpath_remove_disk
nvme: introduce multipath_always_on module param
nvme-multipath: introduce delayed removal of the multipath head node
nvme-pci: derive and better document max segments limits
nvme-pci: use struct_size for allocation struct nvme_dev
...
Diffstat (limited to 'block')
| -rw-r--r-- | block/Kconfig | 8 | ||||
| -rw-r--r-- | block/Makefile | 5 | ||||
| -rw-r--r-- | block/bfq-iosched.c | 6 | ||||
| -rw-r--r-- | block/bio-integrity.c | 4 | ||||
| -rw-r--r-- | block/bio.c | 158 | ||||
| -rw-r--r-- | block/blk-core.c | 2 | ||||
| -rw-r--r-- | block/blk-crypto-fallback.c | 1 | ||||
| -rw-r--r-- | block/blk-map.c | 93 | ||||
| -rw-r--r-- | block/blk-merge.c | 137 | ||||
| -rw-r--r-- | block/blk-mq-debugfs.c | 13 | ||||
| -rw-r--r-- | block/blk-mq-dma.c | 116 | ||||
| -rw-r--r-- | block/blk-mq-sched.c | 53 | ||||
| -rw-r--r-- | block/blk-mq.c | 309 | ||||
| -rw-r--r-- | block/blk-mq.h | 7 | ||||
| -rw-r--r-- | block/blk-rq-qos.c | 4 | ||||
| -rw-r--r-- | block/blk-rq-qos.h | 21 | ||||
| -rw-r--r-- | block/blk-settings.c | 5 | ||||
| -rw-r--r-- | block/blk-sysfs.c | 34 | ||||
| -rw-r--r-- | block/blk-throttle.c | 411 | ||||
| -rw-r--r-- | block/blk-throttle.h | 36 | ||||
| -rw-r--r-- | block/blk-wbt.c | 9 | ||||
| -rw-r--r-- | block/blk.h | 50 | ||||
| -rw-r--r-- | block/bounce.c | 267 | ||||
| -rw-r--r-- | block/elevator.c | 329 | ||||
| -rw-r--r-- | block/elevator.h | 6 | ||||
| -rw-r--r-- | block/fops.c | 28 | ||||
| -rw-r--r-- | block/genhd.c | 266 | ||||
| -rw-r--r-- | block/mq-deadline.c | 2 |
28 files changed, 1191 insertions, 1189 deletions
diff --git a/block/Kconfig b/block/Kconfig index df8973bc0539..15027963472d 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -211,14 +211,6 @@ config BLK_INLINE_ENCRYPTION_FALLBACK source "block/partitions/Kconfig" -config BLK_MQ_PCI - def_bool PCI - -config BLK_MQ_VIRTIO - bool - depends on VIRTIO - default y - config BLK_PM def_bool PM diff --git a/block/Makefile b/block/Makefile index 3a941dc0d27f..c65f4da93702 100644 --- a/block/Makefile +++ b/block/Makefile @@ -5,13 +5,12 @@ obj-y := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \ blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ - blk-merge.o blk-timeout.o \ - blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ + blk-merge.o blk-timeout.o blk-lib.o blk-mq.o \ + blk-mq-tag.o blk-mq-dma.o blk-stat.o \ blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \ genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o \ disk-events.o blk-ia-ranges.o early-lookup.o -obj-$(CONFIG_BOUNCE) += bounce.o obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index abd80dc13562..0cb1e9873aab 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -7210,8 +7210,8 @@ static void bfq_exit_queue(struct elevator_queue *e) #endif blk_stat_disable_accounting(bfqd->queue); - clear_bit(ELEVATOR_FLAG_DISABLE_WBT, &e->flags); - wbt_enable_default(bfqd->queue->disk); + blk_queue_flag_clear(QUEUE_FLAG_DISABLE_WBT_DEF, bfqd->queue); + set_bit(ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT, &e->flags); kfree(bfqd); } @@ -7397,7 +7397,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) /* We dispatch from request queue wide instead of hw queue */ blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q); - set_bit(ELEVATOR_FLAG_DISABLE_WBT, &eq->flags); + blk_queue_flag_set(QUEUE_FLAG_DISABLE_WBT_DEF, q); wbt_disable_default(q->disk); blk_stat_enable_accounting(q); diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 43ef6bd06c85..cb94e9be26dc 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -127,10 +127,8 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, if (bip->bip_vcnt > 0) { struct bio_vec *bv = &bip->bip_vec[bip->bip_vcnt - 1]; - bool same_page = false; - if (bvec_try_merge_hw_page(q, bv, page, len, offset, - &same_page)) { + if (bvec_try_merge_hw_page(q, bv, page, len, offset)) { bip->bip_iter.bi_size += len; return len; } diff --git a/block/bio.c b/block/bio.c index 4be592d37fb6..3c0a558c90f5 100644 --- a/block/bio.c +++ b/block/bio.c @@ -251,6 +251,7 @@ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, bio->bi_flags = 0; bio->bi_ioprio = 0; bio->bi_write_hint = 0; + bio->bi_write_stream = 0; bio->bi_status = 0; bio->bi_iter.bi_sector = 0; bio->bi_iter.bi_size = 0; @@ -827,6 +828,7 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp) bio_set_flag(bio, BIO_CLONED); bio->bi_ioprio = bio_src->bi_ioprio; bio->bi_write_hint = bio_src->bi_write_hint; + bio->bi_write_stream = bio_src->bi_write_stream; bio->bi_iter = bio_src->bi_iter; if (bio->bi_bdev) { @@ -918,7 +920,7 @@ static inline bool bio_full(struct bio *bio, unsigned len) } static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page, - unsigned int len, unsigned int off, bool *same_page) + unsigned int len, unsigned int off) { size_t bv_end = bv->bv_offset + bv->bv_len; phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) + bv_end - 1; @@ -931,9 +933,7 @@ static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page, if (!zone_device_pages_have_same_pgmap(bv->bv_page, page)) return false; - *same_page = ((vec_end_addr & PAGE_MASK) == ((page_addr + off) & - PAGE_MASK)); - if (!*same_page) { + if ((vec_end_addr & PAGE_MASK) != ((page_addr + off) & PAGE_MASK)) { if (IS_ENABLED(CONFIG_KMSAN)) return false; if (bv->bv_page + bv_end / PAGE_SIZE != page + off / PAGE_SIZE) @@ -953,8 +953,7 @@ static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page, * helpers to split. Hopefully this will go away soon. */ bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv, - struct page *page, unsigned len, unsigned offset, - bool *same_page) + struct page *page, unsigned len, unsigned offset) { unsigned long mask = queue_segment_boundary(q); phys_addr_t addr1 = bvec_phys(bv); @@ -964,7 +963,7 @@ bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv, return false; if (len > queue_max_segment_size(q) - bv->bv_len) return false; - return bvec_try_merge_page(bv, page, len, offset, same_page); + return bvec_try_merge_page(bv, page, len, offset); } /** @@ -990,6 +989,22 @@ void __bio_add_page(struct bio *bio, struct page *page, EXPORT_SYMBOL_GPL(__bio_add_page); /** + * bio_add_virt_nofail - add data in the direct kernel mapping to a bio + * @bio: destination bio + * @vaddr: data to add + * @len: length of the data to add, may cross pages + * + * Add the data at @vaddr to @bio. The caller must have ensure a segment + * is available for the added data. No merging into an existing segment + * will be performed. + */ +void bio_add_virt_nofail(struct bio *bio, void *vaddr, unsigned len) +{ + __bio_add_page(bio, virt_to_page(vaddr), len, offset_in_page(vaddr)); +} +EXPORT_SYMBOL_GPL(bio_add_virt_nofail); + +/** * bio_add_page - attempt to add page(s) to bio * @bio: destination bio * @page: start page to add @@ -1002,8 +1017,6 @@ EXPORT_SYMBOL_GPL(__bio_add_page); int bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset) { - bool same_page = false; - if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) return 0; if (bio->bi_iter.bi_size > UINT_MAX - len) @@ -1011,7 +1024,7 @@ int bio_add_page(struct bio *bio, struct page *page, if (bio->bi_vcnt > 0 && bvec_try_merge_page(&bio->bi_io_vec[bio->bi_vcnt - 1], - page, len, offset, &same_page)) { + page, len, offset)) { bio->bi_iter.bi_size += len; return len; } @@ -1058,6 +1071,61 @@ bool bio_add_folio(struct bio *bio, struct folio *folio, size_t len, } EXPORT_SYMBOL(bio_add_folio); +/** + * bio_add_vmalloc_chunk - add a vmalloc chunk to a bio + * @bio: destination bio + * @vaddr: vmalloc address to add + * @len: total length in bytes of the data to add + * + * Add data starting at @vaddr to @bio and return how many bytes were added. + * This may be less than the amount originally asked. Returns 0 if no data + * could be added to @bio. + * + * This helper calls flush_kernel_vmap_range() for the range added. For reads + * the caller still needs to manually call invalidate_kernel_vmap_range() in + * the completion handler. + */ +unsigned int bio_add_vmalloc_chunk(struct bio *bio, void *vaddr, unsigned len) +{ + unsigned int offset = offset_in_page(vaddr); + + len = min(len, PAGE_SIZE - offset); + if (bio_add_page(bio, vmalloc_to_page(vaddr), len, offset) < len) + return 0; + if (op_is_write(bio_op(bio))) + flush_kernel_vmap_range(vaddr, len); + return len; +} +EXPORT_SYMBOL_GPL(bio_add_vmalloc_chunk); + +/** + * bio_add_vmalloc - add a vmalloc region to a bio + * @bio: destination bio + * @vaddr: vmalloc address to add + * @len: total length in bytes of the data to add + * + * Add data starting at @vaddr to @bio. Return %true on success or %false if + * @bio does not have enough space for the payload. + * + * This helper calls flush_kernel_vmap_range() for the range added. For reads + * the caller still needs to manually call invalidate_kernel_vmap_range() in + * the completion handler. + */ +bool bio_add_vmalloc(struct bio *bio, void *vaddr, unsigned int len) +{ + do { + unsigned int added = bio_add_vmalloc_chunk(bio, vaddr, len); + + if (!added) + return false; + vaddr += added; + len -= added; + } while (len); + + return true; +} +EXPORT_SYMBOL_GPL(bio_add_vmalloc); + void __bio_release_pages(struct bio *bio, bool mark_dirty) { struct folio_iter fi; @@ -1088,27 +1156,6 @@ void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter) bio_set_flag(bio, BIO_CLONED); } -static int bio_iov_add_folio(struct bio *bio, struct folio *folio, size_t len, - size_t offset) -{ - bool same_page = false; - - if (WARN_ON_ONCE(bio->bi_iter.bi_size > UINT_MAX - len)) - return -EIO; - - if (bio->bi_vcnt > 0 && - bvec_try_merge_page(&bio->bi_io_vec[bio->bi_vcnt - 1], - folio_page(folio, 0), len, offset, - &same_page)) { - bio->bi_iter.bi_size += len; - if (same_page && bio_flagged(bio, BIO_PAGE_PINNED)) - unpin_user_folio(folio, 1); - return 0; - } - bio_add_folio_nofail(bio, folio, len, offset); - return 0; -} - static unsigned int get_contig_folio_len(unsigned int *num_pages, struct page **pages, unsigned int i, struct folio *folio, size_t left, @@ -1203,6 +1250,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) for (left = size, i = 0; left > 0; left -= len, i += num_pages) { struct page *page = pages[i]; struct folio *folio = page_folio(page); + unsigned int old_vcnt = bio->bi_vcnt; folio_offset = ((size_t)folio_page_idx(folio, page) << PAGE_SHIFT) + offset; @@ -1215,7 +1263,23 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) len = get_contig_folio_len(&num_pages, pages, i, folio, left, offset); - bio_iov_add_folio(bio, folio, len, folio_offset); + if (!bio_add_folio(bio, folio, len, folio_offset)) { + WARN_ON_ONCE(1); + ret = -EINVAL; + goto out; + } + + if (bio_flagged(bio, BIO_PAGE_PINNED)) { + /* + * We're adding another fragment of a page that already + * was part of the last segment. Undo our pin as the + * page was pinned when an earlier fragment of it was + * added to the bio and __bio_release_pages expects a + * single pin per page. + */ + if (offset && bio->bi_vcnt == old_vcnt) + unpin_user_folio(folio, 1); + } offset = 0; } @@ -1301,6 +1365,36 @@ int submit_bio_wait(struct bio *bio) } EXPORT_SYMBOL(submit_bio_wait); +/** + * bdev_rw_virt - synchronously read into / write from kernel mapping + * @bdev: block device to access + * @sector: sector to access + * @data: data to read/write + * @len: length in byte to read/write + * @op: operation (e.g. REQ_OP_READ/REQ_OP_WRITE) + * + * Performs synchronous I/O to @bdev for @data/@len. @data must be in + * the kernel direct mapping and not a vmalloc address. + */ +int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data, + size_t len, enum req_op op) +{ + struct bio_vec bv; + struct bio bio; + int error; + + if (WARN_ON_ONCE(is_vmalloc_addr(data))) + return -EIO; + + bio_init(&bio, bdev, &bv, 1, op); + bio.bi_iter.bi_sector = sector; + bio_add_virt_nofail(&bio, data, len); + error = submit_bio_wait(&bio); + bio_uninit(&bio); + return error; +} +EXPORT_SYMBOL_GPL(bdev_rw_virt); + static void bio_wait_end_io(struct bio *bio) { complete(bio->bi_private); diff --git a/block/blk-core.c b/block/blk-core.c index e8cc270a453f..b862c66018f2 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1018,7 +1018,7 @@ again: stamp = READ_ONCE(part->bd_stamp); if (unlikely(time_after(now, stamp)) && likely(try_cmpxchg(&part->bd_stamp, &stamp, now)) && - (end || part_in_flight(part))) + (end || bdev_count_inflight(part))) __part_stat_add(part, io_ticks, now - stamp); if (bdev_is_partition(part)) { diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c index f154be0b575a..005c9157ffb3 100644 --- a/block/blk-crypto-fallback.c +++ b/block/blk-crypto-fallback.c @@ -173,6 +173,7 @@ static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src) bio_set_flag(bio, BIO_REMAPPED); bio->bi_ioprio = bio_src->bi_ioprio; bio->bi_write_hint = bio_src->bi_write_hint; + bio->bi_write_stream = bio_src->bi_write_stream; bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; diff --git a/block/blk-map.c b/block/blk-map.c index d2f22744b3d1..23e5d5ebe59e 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -317,64 +317,26 @@ static void bio_map_kern_endio(struct bio *bio) kfree(bio); } -/** - * bio_map_kern - map kernel address into bio - * @q: the struct request_queue for the bio - * @data: pointer to buffer to map - * @len: length in bytes - * @gfp_mask: allocation flags for bio allocation - * - * Map the kernel address into a bio suitable for io to a block - * device. Returns an error pointer in case of error. - */ -static struct bio *bio_map_kern(struct request_queue *q, void *data, - unsigned int len, gfp_t gfp_mask) +static struct bio *bio_map_kern(void *data, unsigned int len, enum req_op op, + gfp_t gfp_mask) { - unsigned long kaddr = (unsigned long)data; - unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = kaddr >> PAGE_SHIFT; - const int nr_pages = end - start; - bool is_vmalloc = is_vmalloc_addr(data); - struct page *page; - int offset, i; + unsigned int nr_vecs = bio_add_max_vecs(data, len); struct bio *bio; - bio = bio_kmalloc(nr_pages, gfp_mask); + bio = bio_kmalloc(nr_vecs, gfp_mask); if (!bio) return ERR_PTR(-ENOMEM); - bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0); - - if (is_vmalloc) { - flush_kernel_vmap_range(data, len); + bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, op); + if (is_vmalloc_addr(data)) { bio->bi_private = data; - } - - offset = offset_in_page(kaddr); - for (i = 0; i < nr_pages; i++) { - unsigned int bytes = PAGE_SIZE - offset; - - if (len <= 0) - break; - - if (bytes > len) - bytes = len; - - if (!is_vmalloc) - page = virt_to_page(data); - else - page = vmalloc_to_page(data); - if (bio_add_page(bio, page, bytes, offset) < bytes) { - /* we don't support partial mappings */ + if (!bio_add_vmalloc(bio, data, len)) { bio_uninit(bio); kfree(bio); return ERR_PTR(-EINVAL); } - - data += bytes; - len -= bytes; - offset = 0; + } else { + bio_add_virt_nofail(bio, data, len); } - bio->bi_end_io = bio_map_kern_endio; return bio; } @@ -402,17 +364,16 @@ static void bio_copy_kern_endio_read(struct bio *bio) /** * bio_copy_kern - copy kernel address into bio - * @q: the struct request_queue for the bio * @data: pointer to buffer to copy * @len: length in bytes + * @op: bio/request operation * @gfp_mask: allocation flags for bio and page allocation - * @reading: data direction is READ * * copy the kernel address into a bio suitable for io to a block * device. Returns an error pointer in case of error. */ -static struct bio *bio_copy_kern(struct request_queue *q, void *data, - unsigned int len, gfp_t gfp_mask, int reading) +static struct bio *bio_copy_kern(void *data, unsigned int len, enum req_op op, + gfp_t gfp_mask) { unsigned long kaddr = (unsigned long)data; unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -431,7 +392,7 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data, bio = bio_kmalloc(nr_pages, gfp_mask); if (!bio) return ERR_PTR(-ENOMEM); - bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0); + bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, op); while (len) { struct page *page; @@ -444,7 +405,7 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data, if (!page) goto cleanup; - if (!reading) + if (op_is_write(op)) memcpy(page_address(page), p, bytes); if (bio_add_page(bio, page, bytes, 0) < bytes) @@ -454,11 +415,11 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data, p += bytes; } - if (reading) { + if (op_is_write(op)) { + bio->bi_end_io = bio_copy_kern_endio; + } else { bio->bi_end_io = bio_copy_kern_endio_read; bio->bi_private = data; - } else { - bio->bi_end_io = bio_copy_kern_endio; } return bio; @@ -556,8 +517,6 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, if (map_data) copy = true; - else if (blk_queue_may_bounce(q)) - copy = true; else if (iov_iter_alignment(iter) & align) copy = true; else if (iov_iter_is_bvec(iter)) @@ -689,7 +648,6 @@ EXPORT_SYMBOL(blk_rq_unmap_user); /** * blk_rq_map_kern - map kernel data to a request, for passthrough requests - * @q: request queue where request should be inserted * @rq: request to fill * @kbuf: the kernel buffer * @len: length of user data @@ -700,31 +658,26 @@ EXPORT_SYMBOL(blk_rq_unmap_user); * buffer is used. Can be called multiple times to append multiple * buffers. */ -int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, - unsigned int len, gfp_t gfp_mask) +int blk_rq_map_kern(struct request *rq, void *kbuf, unsigned int len, + gfp_t gfp_mask) { - int reading = rq_data_dir(rq) == READ; unsigned long addr = (unsigned long) kbuf; struct bio *bio; int ret; - if (len > (queue_max_hw_sectors(q) << 9)) + if (len > (queue_max_hw_sectors(rq->q) << SECTOR_SHIFT)) return -EINVAL; if (!len || !kbuf) return -EINVAL; - if (!blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf) || - blk_queue_may_bounce(q)) - bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); + if (!blk_rq_aligned(rq->q, addr, len) || object_is_on_stack(kbuf)) + bio = bio_copy_kern(kbuf, len, req_op(rq), gfp_mask); else - bio = bio_map_kern(q, kbuf, len, gfp_mask); + bio = bio_map_kern(kbuf, len, req_op(rq), gfp_mask); if (IS_ERR(bio)) return PTR_ERR(bio); - bio->bi_opf &= ~REQ_OP_MASK; - bio->bi_opf |= req_op(rq); - ret = blk_rq_append_bio(rq, bio); if (unlikely(ret)) { bio_uninit(bio); diff --git a/block/blk-merge.c b/block/blk-merge.c index fdd4efb54c6c..3af1d284add5 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -7,7 +7,6 @@ #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/blk-integrity.h> -#include <linux/scatterlist.h> #include <linux/part_stat.h> #include <linux/blk-cgroup.h> @@ -226,27 +225,6 @@ static inline unsigned get_max_io_size(struct bio *bio, } /** - * get_max_segment_size() - maximum number of bytes to add as a single segment - * @lim: Request queue limits. - * @paddr: address of the range to add - * @len: maximum length available to add at @paddr - * - * Returns the maximum number of bytes of the range starting at @paddr that can - * be added to a single segment. - */ -static inline unsigned get_max_segment_size(const struct queue_limits *lim, - phys_addr_t paddr, unsigned int len) -{ - /* - * Prevent an overflow if mask = ULONG_MAX and offset = 0 by adding 1 - * after having calculated the minimum. - */ - return min_t(unsigned long, len, - min(lim->seg_boundary_mask - (lim->seg_boundary_mask & paddr), - (unsigned long)lim->max_segment_size - 1) + 1); -} - -/** * bvec_split_segs - verify whether or not a bvec should be split in the middle * @lim: [in] queue limits to split based on * @bv: [in] bvec to examine @@ -473,117 +451,6 @@ unsigned int blk_recalc_rq_segments(struct request *rq) return nr_phys_segs; } -struct phys_vec { - phys_addr_t paddr; - u32 len; -}; - -static bool blk_map_iter_next(struct request *req, - struct req_iterator *iter, struct phys_vec *vec) -{ - unsigned int max_size; - struct bio_vec bv; - - if (req->rq_flags & RQF_SPECIAL_PAYLOAD) { - if (!iter->bio) - return false; - vec->paddr = bvec_phys(&req->special_vec); - vec->len = req->special_vec.bv_len; - iter->bio = NULL; - return true; - } - - if (!iter->iter.bi_size) - return false; - - bv = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter); - vec->paddr = bvec_phys(&bv); - max_size = get_max_segment_size(&req->q->limits, vec->paddr, UINT_MAX); - bv.bv_len = min(bv.bv_len, max_size); - bio_advance_iter_single(iter->bio, &iter->iter, bv.bv_len); - - /* - * If we are entirely done with this bi_io_vec entry, check if the next - * one could be merged into it. This typically happens when moving to - * the next bio, but some callers also don't pack bvecs tight. - */ - while (!iter->iter.bi_size || !iter->iter.bi_bvec_done) { - struct bio_vec next; - - if (!iter->iter.bi_size) { - if (!iter->bio->bi_next) - break; - iter->bio = iter->bio->bi_next; - iter->iter = iter->bio->bi_iter; - } - - next = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter); - if (bv.bv_len + next.bv_len > max_size || - !biovec_phys_mergeable(req->q, &bv, &next)) - break; - - bv.bv_len += next.bv_len; - bio_advance_iter_single(iter->bio, &iter->iter, next.bv_len); - } - - vec->len = bv.bv_len; - return true; -} - -static inline struct scatterlist *blk_next_sg(struct scatterlist **sg, - struct scatterlist *sglist) -{ - if (!*sg) - return sglist; - - /* - * If the driver previously mapped a shorter list, we could see a - * termination bit prematurely unless it fully inits the sg table - * on each mapping. We KNOW that there must be more entries here - * or the driver would be buggy, so force clear the termination bit - * to avoid doing a full sg_init_table() in drivers for each command. - */ - sg_unmark_end(*sg); - return sg_next(*sg); -} - -/* - * Map a request to scatterlist, return number of sg entries setup. Caller - * must make sure sg can hold rq->nr_phys_segments entries. - */ -int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist, - struct scatterlist **last_sg) -{ - struct req_iterator iter = { - .bio = rq->bio, - }; - struct phys_vec vec; - int nsegs = 0; - - /* the internal flush request may not have bio attached */ - if (iter.bio) - iter.iter = iter.bio->bi_iter; - - while (blk_map_iter_next(rq, &iter, &vec)) { - *last_sg = blk_next_sg(last_sg, sglist); - sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len, - offset_in_page(vec.paddr)); - nsegs++; - } - - if (*last_sg) - sg_mark_end(*last_sg); - - /* - * Something must have been wrong if the figured number of - * segment is bigger than number of req's physical segments - */ - WARN_ON(nsegs > blk_rq_nr_phys_segments(rq)); - - return nsegs; -} -EXPORT_SYMBOL(__blk_rq_map_sg); - static inline unsigned int blk_rq_get_max_sectors(struct request *rq, sector_t offset) { @@ -832,6 +699,8 @@ static struct request *attempt_merge(struct request_queue *q, if (req->bio->bi_write_hint != next->bio->bi_write_hint) return NULL; + if (req->bio->bi_write_stream != next->bio->bi_write_stream) + return NULL; if (req->bio->bi_ioprio != next->bio->bi_ioprio) return NULL; if (!blk_atomic_write_mergeable_rqs(req, next)) @@ -953,6 +822,8 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) return false; if (rq->bio->bi_write_hint != bio->bi_write_hint) return false; + if (rq->bio->bi_write_stream != bio->bi_write_stream) + return false; if (rq->bio->bi_ioprio != bio->bi_ioprio) return false; if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 3421b5521fe2..29b3540dd180 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -93,6 +93,8 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(RQ_ALLOC_TIME), QUEUE_FLAG_NAME(HCTX_ACTIVE), QUEUE_FLAG_NAME(SQ_SCHED), + QUEUE_FLAG_NAME(DISABLE_WBT_DEF), + QUEUE_FLAG_NAME(NO_ELV_SWITCH), }; #undef QUEUE_FLAG_NAME @@ -624,20 +626,9 @@ void blk_mq_debugfs_register(struct request_queue *q) |
