From 82394db7383d33641f3f565bd79792fb41b1741f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 29 Jun 2020 12:06:37 -0700 Subject: block: add capacity field to zone descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the zoned storage model, the sectors within a zone are typically all writeable. With the introduction of the Zoned Namespace (ZNS) Command Set in the NVM Express organization, the model was extended to have a specific writeable capacity. Extend the zone descriptor data structure with a zone capacity field to indicate to the user how many sectors in a zone are writeable. Introduce backward compatibility in the zone report ioctl by extending the zone report header data structure with a flags field to indicate if the capacity field is available. Reviewed-by: Jens Axboe Reviewed-by: Javier González Reviewed-by: Chaitanya Kulkarni Reviewed-by: Himanshu Madhani Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Reviewed-by: Daniel Wagner Signed-off-by: Matias Bjørling Signed-off-by: Christoph Hellwig --- include/uapi/linux/blkzoned.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h index 0cdef67135f0..42c3366cc25f 100644 --- a/include/uapi/linux/blkzoned.h +++ b/include/uapi/linux/blkzoned.h @@ -73,6 +73,15 @@ enum blk_zone_cond { BLK_ZONE_COND_OFFLINE = 0xF, }; +/** + * enum blk_zone_report_flags - Feature flags of reported zone descriptors. + * + * @BLK_ZONE_REP_CAPACITY: Zone descriptor has capacity field. + */ +enum blk_zone_report_flags { + BLK_ZONE_REP_CAPACITY = (1 << 0), +}; + /** * struct blk_zone - Zone descriptor for BLKREPORTZONE ioctl. * @@ -99,7 +108,9 @@ struct blk_zone { __u8 cond; /* Zone condition */ __u8 non_seq; /* Non-sequential write resources active */ __u8 reset; /* Reset write pointer recommended */ - __u8 reserved[36]; + __u8 resv[4]; + __u64 capacity; /* Zone capacity in number of sectors */ + __u8 reserved[24]; }; /** @@ -115,7 +126,7 @@ struct blk_zone { struct blk_zone_report { __u64 sector; __u32 nr_zones; - __u8 reserved[4]; + __u32 flags; struct blk_zone zones[0]; }; -- cgit v1.2.3 From 71010c30945425203da8d069a10fa45a05a00f96 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 29 Jun 2020 12:06:39 -0700 Subject: nvme: implement multiple I/O Command Set support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements support for multiple I/O Command Sets. NVMe TP 4056 introduces a method to enumerate multiple command sets per namespace. If the command set is exposed, this method for enumeration will be used instead of the traditional method that uses the CC.CSS register command set register for command set identification. For namespaces where the Command Set Identifier is not supported or recognized, the specific namespace will not be created. Reviewed-by: Javier González Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Reviewed-by: Matias Bjørling Reviewed-by: Daniel Wagner Reviewed-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Signed-off-by: Niklas Cassel Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 5ce51ab4c50e..81ffe5247505 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -132,6 +132,7 @@ enum { #define NVME_CAP_TIMEOUT(cap) (((cap) >> 24) & 0xff) #define NVME_CAP_STRIDE(cap) (((cap) >> 32) & 0xf) #define NVME_CAP_NSSRC(cap) (((cap) >> 36) & 0x1) +#define NVME_CAP_CSS(cap) (((cap) >> 37) & 0xff) #define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf) #define NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf) @@ -162,7 +163,6 @@ enum { enum { NVME_CC_ENABLE = 1 << 0, - NVME_CC_CSS_NVM = 0 << 4, NVME_CC_EN_SHIFT = 0, NVME_CC_CSS_SHIFT = 4, NVME_CC_MPS_SHIFT = 7, @@ -170,6 +170,9 @@ enum { NVME_CC_SHN_SHIFT = 14, NVME_CC_IOSQES_SHIFT = 16, NVME_CC_IOCQES_SHIFT = 20, + NVME_CC_CSS_NVM = 0 << NVME_CC_CSS_SHIFT, + NVME_CC_CSS_CSI = 6 << NVME_CC_CSS_SHIFT, + NVME_CC_CSS_MASK = 7 << NVME_CC_CSS_SHIFT, NVME_CC_AMS_RR = 0 << NVME_CC_AMS_SHIFT, NVME_CC_AMS_WRRU = 1 << NVME_CC_AMS_SHIFT, NVME_CC_AMS_VS = 7 << NVME_CC_AMS_SHIFT, @@ -179,6 +182,8 @@ enum { NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT, NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT, NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT, + NVME_CAP_CSS_NVM = 1 << 0, + NVME_CAP_CSS_CSI = 1 << 6, NVME_CSTS_RDY = 1 << 0, NVME_CSTS_CFS = 1 << 1, NVME_CSTS_NSSRO = 1 << 4, @@ -374,6 +379,8 @@ enum { NVME_ID_CNS_CTRL = 0x01, NVME_ID_CNS_NS_ACTIVE_LIST = 0x02, NVME_ID_CNS_NS_DESC_LIST = 0x03, + NVME_ID_CNS_CS_NS = 0x05, + NVME_ID_CNS_CS_CTRL = 0x06, NVME_ID_CNS_NS_PRESENT_LIST = 0x10, NVME_ID_CNS_NS_PRESENT = 0x11, NVME_ID_CNS_CTRL_NS_LIST = 0x12, @@ -383,6 +390,10 @@ enum { NVME_ID_CNS_UUID_LIST = 0x17, }; +enum { + NVME_CSI_NVM = 0, +}; + enum { NVME_DIR_IDENTIFY = 0x00, NVME_DIR_STREAMS = 0x01, @@ -435,11 +446,13 @@ struct nvme_ns_id_desc { #define NVME_NIDT_EUI64_LEN 8 #define NVME_NIDT_NGUID_LEN 16 #define NVME_NIDT_UUID_LEN 16 +#define NVME_NIDT_CSI_LEN 1 enum { NVME_NIDT_EUI64 = 0x01, NVME_NIDT_NGUID = 0x02, NVME_NIDT_UUID = 0x03, + NVME_NIDT_CSI = 0x04, }; struct nvme_smart_log { @@ -972,7 +985,9 @@ struct nvme_identify { __u8 cns; __u8 rsvd3; __le16 ctrlid; - __u32 rsvd11[5]; + __u8 rsvd11[3]; + __u8 csi; + __u32 rsvd12[4]; }; #define NVME_IDENTIFY_DATA_SIZE 4096 -- cgit v1.2.3 From be93e87e780253780df9bb6ecc9bc1199b0d94c3 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 29 Jun 2020 12:06:40 -0700 Subject: nvme: support for multiple Command Sets Supported and Effects log pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Commands Supported and Effects log page was extended with a CSI field that enables the host to query the log page for each command set supported. Retrieve this log page for each command set that an attached namespace supports, and save a pointer to that log in the namespace head. Reviewed-by: Matias Bjørling Reviewed-by: Javier González Reviewed-by: Himanshu Madhani Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Reviewed-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 81ffe5247505..95cd03e240a1 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1101,7 +1101,9 @@ struct nvme_get_log_page_command { }; __le64 lpo; }; - __u32 rsvd14[2]; + __u8 rsvd14[3]; + __u8 csi; + __u32 rsvd15; }; struct nvme_directive_cmd { -- cgit v1.2.3 From 240e6ee272c07a2636dfc7d65f5bbb18377c49e5 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 29 Jun 2020 12:06:41 -0700 Subject: nvme: support for zoned namespaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for NVM Express Zoned Namespaces (ZNS) Command Set defined in NVM Express TP4053. Zoned namespaces are discovered based on their Command Set Identifier reported in the namespaces Namespace Identification Descriptor list. A successfully discovered Zoned Namespace will be registered with the block layer as a host managed zoned block device with Zone Append command support. A namespace that does not support append is not supported by the driver. Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Reviewed-by: Javier González Reviewed-by: Himanshu Madhani Signed-off-by: Hans Holmberg Signed-off-by: Dmitry Fomichev Signed-off-by: Ajay Joshi Signed-off-by: Aravind Ramesh Signed-off-by: Niklas Cassel Signed-off-by: Matias Bjørling Signed-off-by: Damien Le Moal Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 95cd03e240a1..1643005d21e3 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -374,6 +374,30 @@ struct nvme_id_ns { __u8 vs[3712]; }; +struct nvme_zns_lbafe { + __le64 zsze; + __u8 zdes; + __u8 rsvd9[7]; +}; + +struct nvme_id_ns_zns { + __le16 zoc; + __le16 ozcs; + __le32 mar; + __le32 mor; + __le32 rrl; + __le32 frl; + __u8 rsvd20[2796]; + struct nvme_zns_lbafe lbafe[16]; + __u8 rsvd3072[768]; + __u8 vs[256]; +}; + +struct nvme_id_ctrl_zns { + __u8 zasl; + __u8 rsvd1[4095]; +}; + enum { NVME_ID_CNS_NS = 0x00, NVME_ID_CNS_CTRL = 0x01, @@ -392,6 +416,7 @@ enum { enum { NVME_CSI_NVM = 0, + NVME_CSI_ZNS = 2, }; enum { @@ -532,6 +557,27 @@ struct nvme_ana_rsp_hdr { __le16 rsvd10[3]; }; +struct nvme_zone_descriptor { + __u8 zt; + __u8 zs; + __u8 za; + __u8 rsvd3[5]; + __le64 zcap; + __le64 zslba; + __le64 wp; + __u8 rsvd32[32]; +}; + +enum { + NVME_ZONE_TYPE_SEQWRITE_REQ = 0x2, +}; + +struct nvme_zone_report { + __le64 nr_zones; + __u8 resv8[56]; + struct nvme_zone_descriptor entries[]; +}; + enum { NVME_SMART_CRIT_SPARE = 1 << 0, NVME_SMART_CRIT_TEMPERATURE = 1 << 1, @@ -626,6 +672,9 @@ enum nvme_opcode { nvme_cmd_resv_report = 0x0e, nvme_cmd_resv_acquire = 0x11, nvme_cmd_resv_release = 0x15, + nvme_cmd_zone_mgmt_send = 0x79, + nvme_cmd_zone_mgmt_recv = 0x7a, + nvme_cmd_zone_append = 0x7d, }; #define nvme_opcode_name(opcode) { opcode, #opcode } @@ -764,6 +813,7 @@ struct nvme_rw_command { enum { NVME_RW_LR = 1 << 15, NVME_RW_FUA = 1 << 14, + NVME_RW_APPEND_PIREMAP = 1 << 9, NVME_RW_DSM_FREQ_UNSPEC = 0, NVME_RW_DSM_FREQ_TYPICAL = 1, NVME_RW_DSM_FREQ_RARE = 2, @@ -829,6 +879,53 @@ struct nvme_write_zeroes_cmd { __le16 appmask; }; +enum nvme_zone_mgmt_action { + NVME_ZONE_CLOSE = 0x1, + NVME_ZONE_FINISH = 0x2, + NVME_ZONE_OPEN = 0x3, + NVME_ZONE_RESET = 0x4, + NVME_ZONE_OFFLINE = 0x5, + NVME_ZONE_SET_DESC_EXT = 0x10, +}; + +struct nvme_zone_mgmt_send_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __le32 cdw2[2]; + __le64 metadata; + union nvme_data_ptr dptr; + __le64 slba; + __le32 cdw12; + __u8 zsa; + __u8 select_all; + __u8 rsvd13[2]; + __le32 cdw14[2]; +}; + +struct nvme_zone_mgmt_recv_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __le64 rsvd2[2]; + union nvme_data_ptr dptr; + __le64 slba; + __le32 numd; + __u8 zra; + __u8 zrasf; + __u8 pr; + __u8 rsvd13; + __le32 cdw14[2]; +}; + +enum { + NVME_ZRA_ZONE_REPORT = 0, + NVME_ZRASF_ZONE_REPORT_ALL = 0, + NVME_REPORT_ZONE_PARTIAL = 1, +}; + /* Features */ enum { @@ -1300,6 +1397,8 @@ struct nvme_command { struct nvme_format_cmd format; struct nvme_dsm_cmd dsm; struct nvme_write_zeroes_cmd write_zeroes; + struct nvme_zone_mgmt_send_cmd zms; + struct nvme_zone_mgmt_recv_cmd zmr; struct nvme_abort_cmd abort; struct nvme_get_log_page_command get_log_page; struct nvmf_common_command fabrics; @@ -1433,6 +1532,18 @@ enum { NVME_SC_DISCOVERY_RESTART = 0x190, NVME_SC_AUTH_REQUIRED = 0x191, + /* + * I/O Command Set Specific - Zoned commands: + */ + NVME_SC_ZONE_BOUNDARY_ERROR = 0x1b8, + NVME_SC_ZONE_FULL = 0x1b9, + NVME_SC_ZONE_READ_ONLY = 0x1ba, + NVME_SC_ZONE_OFFLINE = 0x1bb, + NVME_SC_ZONE_INVALID_WRITE = 0x1bc, + NVME_SC_ZONE_TOO_MANY_ACTIVE = 0x1bd, + NVME_SC_ZONE_TOO_MANY_OPEN = 0x1be, + NVME_SC_ZONE_INVALID_TRANSITION = 0x1bf, + /* * Media and Data Integrity Errors: */ -- cgit v1.2.3 From e15864f8ea05b24071b07300459ae7e511d0b938 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 14 Jul 2020 23:18:23 +0200 Subject: block: add max_open_zones to blk-sysfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new max_open_zones definition in the sysfs documentation. This definition will be common for all devices utilizing the zoned block device support in the kernel. Export max open zones according to this new definition for NVMe Zoned Namespace devices, ZAC ATA devices (which are treated as SCSI devices by the kernel), and ZBC SCSI devices. Add the new max_open_zones member to struct request_queue, rather than as a queue limit, since this property cannot be split across stacking drivers. Signed-off-by: Niklas Cassel Reviewed-by: Javier González Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index de7adc59b993..c8beb8bbdb08 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -513,6 +513,7 @@ struct request_queue { unsigned int nr_zones; unsigned long *conv_zones_bitmap; unsigned long *seq_zones_wlock; + unsigned int max_open_zones; #endif /* CONFIG_BLK_DEV_ZONED */ /* @@ -722,6 +723,17 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q, return true; return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap); } + +static inline void blk_queue_max_open_zones(struct request_queue *q, + unsigned int max_open_zones) +{ + q->max_open_zones = max_open_zones; +} + +static inline unsigned int queue_max_open_zones(const struct request_queue *q) +{ + return q->max_open_zones; +} #else /* CONFIG_BLK_DEV_ZONED */ static inline unsigned int blk_queue_nr_zones(struct request_queue *q) { @@ -737,6 +749,10 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q, { return 0; } +static inline unsigned int queue_max_open_zones(const struct request_queue *q) +{ + return 0; +} #endif /* CONFIG_BLK_DEV_ZONED */ static inline bool rq_is_sync(struct request *rq) @@ -1519,6 +1535,15 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev) return 0; } +static inline unsigned int bdev_max_open_zones(struct block_device *bdev) +{ + struct request_queue *q = bdev_get_queue(bdev); + + if (q) + return queue_max_open_zones(q); + return 0; +} + static inline int queue_dma_alignment(const struct request_queue *q) { return q ? q->dma_alignment : 511; -- cgit v1.2.3 From 659bf827ba8f1183b714341d8a1d4b1e446178d9 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 14 Jul 2020 23:18:24 +0200 Subject: block: add max_active_zones to blk-sysfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new max_active zones definition in the sysfs documentation. This definition will be common for all devices utilizing the zoned block device support in the kernel. Export max_active_zones according to this new definition for NVMe Zoned Namespace devices, ZAC ATA devices (which are treated as SCSI devices by the kernel), and ZBC SCSI devices. Add the new max_active_zones member to struct request_queue, rather than as a queue limit, since this property cannot be split across stacking drivers. For SCSI devices, even though max active zones is not part of the ZBC/ZAC spec, export max_active_zones as 0, signifying "no limit". Signed-off-by: Niklas Cassel Reviewed-by: Javier González Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c8beb8bbdb08..285b59cfc064 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -514,6 +514,7 @@ struct request_queue { unsigned long *conv_zones_bitmap; unsigned long *seq_zones_wlock; unsigned int max_open_zones; + unsigned int max_active_zones; #endif /* CONFIG_BLK_DEV_ZONED */ /* @@ -734,6 +735,17 @@ static inline unsigned int queue_max_open_zones(const struct request_queue *q) { return q->max_open_zones; } + +static inline void blk_queue_max_active_zones(struct request_queue *q, + unsigned int max_active_zones) +{ + q->max_active_zones = max_active_zones; +} + +static inline unsigned int queue_max_active_zones(const struct request_queue *q) +{ + return q->max_active_zones; +} #else /* CONFIG_BLK_DEV_ZONED */ static inline unsigned int blk_queue_nr_zones(struct request_queue *q) { @@ -753,6 +765,10 @@ static inline unsigned int queue_max_open_zones(const struct request_queue *q) { return 0; } +static inline unsigned int queue_max_active_zones(const struct request_queue *q) +{ + return 0; +} #endif /* CONFIG_BLK_DEV_ZONED */ static inline bool rq_is_sync(struct request *rq) @@ -1544,6 +1560,15 @@ static inline unsigned int bdev_max_open_zones(struct block_device *bdev) return 0; } +static inline unsigned int bdev_max_active_zones(struct block_device *bdev) +{ + struct request_queue *q = bdev_get_queue(bdev); + + if (q) + return queue_max_active_zones(q); + return 0; +} + static inline int queue_dma_alignment(const struct request_queue *q) { return q ? q->dma_alignment : 511; -- cgit v1.2.3