Merge branch 'for-4.9/block' of git://git.kernel.dk/linux-block

Pull block layer updates from Jens Axboe: "This is the main pull request for block layer changes in 4.9. As mentioned at the last merge window, I've changed things up and now do just one branch for core block layer changes, and driver changes. This avoids dependencies between the two branches. Outside of this main pull request, there are two topical branches coming as well. This pull request contains: - A set of fixes, and a conversion to blk-mq, of nbd. From Josef. - Set of fixes and updates for lightnvm from Matias, Simon, and Arnd. Followup dependency fix from Geert. - General fixes from Bart, Baoyou, Guoqing, and Linus W. - CFQ async write starvation fix from Glauber. - Add supprot for delayed kick of the requeue list, from Mike. - Pull out the scalable bitmap code from blk-mq-tag.c and make it generally available under the name of sbitmap. Only blk-mq-tag uses it for now, but the blk-mq scheduling bits will use it as well. From Omar. - bdev thaw error progagation from Pierre. - Improve the blk polling statistics, and allow the user to clear them. From Stephen. - Set of minor cleanups from Christoph in block/blk-mq. - Set of cleanups and optimizations from me for block/blk-mq. - Various nvme/nvmet/nvmeof fixes from the various folks" * 'for-4.9/block' of git://git.kernel.dk/linux-block: (54 commits) fs/block_dev.c: return the right error in thaw_bdev() nvme: Pass pointers, not dma addresses, to nvme_get/set_features() nvme/scsi: Remove power management support nvmet: Make dsm number of ranges zero based nvmet: Use direct IO for writes admin-cmd: Added smart-log command support. nvme-fabrics: Add host_traddr options field to host infrastructure nvme-fabrics: revise host transport option descriptions nvme-fabrics: rework nvmf_get_address() for variable options nbd: use BLK_MQ_F_BLOCKING blkcg: Annotate blkg_hint correctly cfq: fix starvation of asynchronous writes blk-mq: add flag for drivers wanting blocking ->queue_rq() blk-mq: remove non-blocking pass in blk_mq_map_request blk-mq: get rid of manual run of queue with __blk_mq_run_hw_queue() block: export bio_free_pages to other modules lightnvm: propagate device_add() error code lightnvm: expose device geometry through sysfs lightnvm: control life of nvm_dev in driver blk-mq: register device instead of disk ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2016-10-07 14:42:05 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2016-10-07 14:42:05 -0700
commit: 513a4befae06c4469abfb836e8f71977de58c636 (patch)
tree: 18cc7d0b01a7fd2352de734e99a4ca5c29ad5fac /drivers
parent: 87840a2b7e048018d18d60bdac5c09224de85370 (diff)
parent: 997198ba1ed691c09457120576c27dbd953d0557 (diff)
download: linux-513a4befae06c4469abfb836e8f71977de58c636.tar.gz
linux-513a4befae06c4469abfb836e8f71977de58c636.tar.bz2
linux-513a4befae06c4469abfb836e8f71977de58c636.zip
25 files changed, 779 insertions, 528 deletions
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 2aca98e8e427..88c46853dbb5 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3686,7 +3686,7 @@ static int mtip_block_open(struct block_device *dev, fmode_t mode)
 	return -ENODEV;
 }
 
-void mtip_block_release(struct gendisk *disk, fmode_t mode)
+static void mtip_block_release(struct gendisk *disk, fmode_t mode)
 {
 }
 
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index a9e398019f38..ccfcfc11399a 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -34,33 +34,29 @@
 #include <linux/kthread.h>
 #include <linux/types.h>
 #include <linux/debugfs.h>
+#include <linux/blk-mq.h>
 
 #include <asm/uaccess.h>
 #include <asm/types.h>
 
 #include <linux/nbd.h>
 
+#define NBD_TIMEDOUT			0
+#define NBD_DISCONNECT_REQUESTED	1
+
 struct nbd_device {
 	u32 flags;
+	unsigned long runtime_flags;
 	struct socket * sock;	/* If == NULL, device is not ready, yet	*/
 	int magic;
 
-	spinlock_t queue_lock;
-	struct list_head queue_head;	/* Requests waiting result */
-	struct request *active_req;
-	wait_queue_head_t active_wq;
-	struct list_head waiting_queue;	/* Requests to be sent */
-	wait_queue_head_t waiting_wq;
+	struct blk_mq_tag_set tag_set;
 
 	struct mutex tx_lock;
 	struct gendisk *disk;
 	int blksize;
 	loff_t bytesize;
-	int xmit_timeout;
-	bool timedout;
-	bool disconnect; /* a disconnect has been requested by user */
 
-	struct timer_list timeout_timer;
 	/* protects initialization and shutdown of the socket */
 	spinlock_t sock_lock;
 	struct task_struct *task_recv;
@@ -71,6 +67,11 @@ struct nbd_device {
 #endif
 };
 
+struct nbd_cmd {
+	struct nbd_device *nbd;
+	struct list_head list;
+};
+
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 static struct dentry *nbd_dbg_dir;
 #endif
@@ -83,18 +84,6 @@ static unsigned int nbds_max = 16;
 static struct nbd_device *nbd_dev;
 static int max_part;
 
-/*
- * Use just one lock (or at most 1 per NIC). Two arguments for this:
- * 1. Each NIC is essentially a synchronization point for all servers
- *    accessed through that NIC so there's no need to have more locks
- *    than NICs anyway.
- * 2. More locks lead to more "Dirty cache line bouncing" which will slow
- *    down each lock to the point where they're actually slower than just
- *    a single lock.
- * Thanks go to Jens Axboe and Al Viro for their LKML emails explaining this!
- */
-static DEFINE_SPINLOCK(nbd_lock);
-
 static inline struct device *nbd_to_dev(struct nbd_device *nbd)
 {
 	return disk_to_dev(nbd->disk);
@@ -153,18 +142,16 @@ static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
 	return 0;
 }
 
-static void nbd_end_request(struct nbd_device *nbd, struct request *req)
+static void nbd_end_request(struct nbd_cmd *cmd)
 {
+	struct nbd_device *nbd = cmd->nbd;
+	struct request *req = blk_mq_rq_from_pdu(cmd);
 	int error = req->errors ? -EIO : 0;
-	struct request_queue *q = req->q;
-	unsigned long flags;
 
-	dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", req,
+	dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", cmd,
 		error ? "failed" : "done");
 
-	spin_lock_irqsave(q->queue_lock, flags);
-	__blk_end_request_all(req, error);
-	spin_unlock_irqrestore(q->queue_lock, flags);
+	blk_mq_complete_request(req, error);
 }
 
 /*
@@ -172,40 +159,49 @@ static void nbd_end_request(struct nbd_device *nbd, struct request *req)
  */
 static void sock_shutdown(struct nbd_device *nbd)
 {
-	spin_lock_irq(&nbd->sock_lock);
+	struct socket *sock;
+
+	spin_lock(&nbd->sock_lock);
 
 	if (!nbd->sock) {
 		spin_unlock_irq(&nbd->sock_lock);
 		return;
 	}
 
+	sock = nbd->sock;
 	dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
-	kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
-	sockfd_put(nbd->sock);
 	nbd->sock = NULL;
-	spin_unlock_irq(&nbd->sock_lock);
+	spin_unlock(&nbd->sock_lock);
 
-	del_timer(&nbd->timeout_timer);
+	kernel_sock_shutdown(sock, SHUT_RDWR);
+	sockfd_put(sock);
 }
 
-static void nbd_xmit_timeout(unsigned long arg)
+static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
+						 bool reserved)
 {
-	struct nbd_device *nbd = (struct nbd_device *)arg;
-	unsigned long flags;
-
-	if (list_empty(&nbd->queue_head))
-		return;
+	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
+	struct nbd_device *nbd = cmd->nbd;
+	struct socket *sock = NULL;
 
-	spin_lock_irqsave(&nbd->sock_lock, flags);
+	spin_lock(&nbd->sock_lock);
 
-	nbd->timedout = true;
+	set_bit(NBD_TIMEDOUT, &nbd->runtime_flags);
 
-	if (nbd->sock)
-		kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
+	if (nbd->sock) {
+		sock = nbd->sock;
+		get_file(sock->file);
+	}
 
-	spin_unlock_irqrestore(&nbd->sock_lock, flags);
+	spin_unlock(&nbd->sock_lock);
+	if (sock) {
+		kernel_sock_shutdown(sock, SHUT_RDWR);
+		sockfd_put(sock);
+	}
 
+	req->errors++;
 	dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
+	return BLK_EH_HANDLED;
 }
 
 /*
@@ -255,9 +251,6 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
 
 	tsk_restore_flags(current, pflags, PF_MEMALLOC);
 
-	if (!send && nbd->xmit_timeout)
-		mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
-
 	return result;
 }
 
@@ -273,8 +266,9 @@ static inline int sock_send_bvec(struct nbd_device *nbd, struct bio_vec *bvec,
 }
 
 /* always call with the tx_lock held */
-static int nbd_send_req(struct nbd_device *nbd, struct request *req)
+static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd)
 {
+	struct request *req = blk_mq_rq_from_pdu(cmd);
 	int result, flags;
 	struct nbd_request request;
 	unsigned long size = blk_rq_bytes(req);
@@ -298,10 +292,10 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
 		request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
 		request.len = htonl(size);
 	}
-	memcpy(request.handle, &req, sizeof(req));
+	memcpy(request.handle, &req->tag, sizeof(req->tag));
 
 	dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
-		req, nbdcmd_to_ascii(type),
+		cmd, nbdcmd_to_ascii(type),
 		(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
 	result = sock_xmit(nbd, 1, &request, sizeof(request),
 			(type == NBD_CMD_WRITE) ? MSG_MORE : 0);
@@ -323,7 +317,7 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
 			if (!rq_iter_last(bvec, iter))
 				flags = MSG_MORE;
 			dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
-				req, bvec.bv_len);
+				cmd, bvec.bv_len);
 			result = sock_send_bvec(nbd, &bvec, flags);
 			if (result <= 0) {
 				dev_err(disk_to_dev(nbd->disk),
@@ -336,29 +330,6 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
 	return 0;
 }
 
-static struct request *nbd_find_request(struct nbd_device *nbd,
-					struct request *xreq)
-{
-	struct request *req, *tmp;
-	int err;
-
-	err = wait_event_interruptible(nbd->active_wq, nbd->active_req != xreq);
-	if (unlikely(err))
-		return ERR_PTR(err);
-
-	spin_lock(&nbd->queue_lock);
-	list_for_each_entry_safe(req, tmp, &nbd->queue_head, queuelist) {
-		if (req != xreq)
-			continue;
-		list_del_init(&req->queuelist);
-		spin_unlock(&nbd->queue_lock);
-		return req;
-	}
-	spin_unlock(&nbd->queue_lock);
-
-	return ERR_PTR(-ENOENT);
-}
-
 static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec)
 {
 	int result;
@@ -370,11 +341,14 @@ static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec)
 }
 
 /* NULL returned = something went wrong, inform userspace */
-static struct request *nbd_read_stat(struct nbd_device *nbd)
+static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd)
 {
 	int result;
 	struct nbd_reply reply;
-	struct request *req;
+	struct nbd_cmd *cmd;
+	struct request *req = NULL;
+	u16 hwq;
+	int tag;
 
 	reply.magic = 0;
 	result = sock_xmit(nbd, 0, &reply, sizeof(reply), MSG_WAITALL);
@@ -390,25 +364,27 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
 		return ERR_PTR(-EPROTO);
 	}
 
-	req = nbd_find_request(nbd, *(struct request **)reply.handle);
-	if (IS_ERR(req)) {
-		result = PTR_ERR(req);
-		if (result != -ENOENT)
-			return ERR_PTR(result);
+	memcpy(&tag, reply.handle, sizeof(int));
 
-		dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%p)\n",
-			reply.handle);
-		return ERR_PTR(-EBADR);
+	hwq = blk_mq_unique_tag_to_hwq(tag);
+	if (hwq < nbd->tag_set.nr_hw_queues)
+		req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq],
+				       blk_mq_unique_tag_to_tag(tag));
+	if (!req || !blk_mq_request_started(req)) {
+		dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n",
+			tag, req);
+		return ERR_PTR(-ENOENT);
 	}
+	cmd = blk_mq_rq_to_pdu(req);
 
 	if (ntohl(reply.error)) {
 		dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
 			ntohl(reply.error));
 		req->errors++;
-		return req;
+		return cmd;
 	}
 
-	dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req);
+	dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", cmd);
 	if (rq_data_dir(req) != WRITE) {
 		struct req_iterator iter;
 		struct bio_vec bvec;
@@ -419,13 +395,13 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
 				dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
 					result);
 				req->errors++;
-				return req;
+				return cmd;
 			}
 			dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
-				req, bvec.bv_len);
+				cmd, bvec.bv_len);
 		}
 	}
-	return req;
+	return cmd;
 }
 
 static ssize_t pid_show(struct device *dev,
@@ -444,7 +420,7 @@ static struct device_attribute pid_attr = {
 
 static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev)
 {
-	struct request *req;
+	struct nbd_cmd *cmd;
 	int ret;
 
 	BUG_ON(nbd->magic != NBD_MAGIC);
@@ -460,13 +436,13 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev)
 	nbd_size_update(nbd, bdev);
 
 	while (1) {
-		req = nbd_read_stat(nbd);
-		if (IS_ERR(req)) {
-			ret = PTR_ERR(req);
+		cmd = nbd_read_stat(nbd);
+		if (IS_ERR(cmd)) {
+			ret = PTR_ERR(cmd);
 			break;
 		}
 
-		nbd_end_request(nbd, req);
+		nbd_end_request(cmd);
 	}
 
 	nbd_size_clear(nbd, bdev);
@@ -475,44 +451,37 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev)
 	return ret;
 }
 
-static void nbd_clear_que(struct nbd_device *nbd)
+static void nbd_clear_req(struct request *req, void *data, bool reserved)
 {
-	struct request *req;
+	struct nbd_cmd *cmd;
+
+	if (!blk_mq_request_started(req))
+		return;
+	cmd = blk_mq_rq_to_pdu(req);
+	req->errors++;
+	nbd_end_request(cmd);
+}
 
+static void nbd_clear_que(struct nbd_device *nbd)
+{
 	BUG_ON(nbd->magic != NBD_MAGIC);
 
 	/*
 	 * Because we have set nbd->sock to NULL under the tx_lock, all
-	 * modifications to the list must have completed by now.  For
-	 * the same reason, the active_req must be NULL.
-	 *
-	 * As a consequence, we don't need to take the spin lock while
-	 * purging the list here.
+	 * modifications to the list must have completed by now.
 	 */
 	BUG_ON(nbd->sock);
-	BUG_ON(nbd->active_req);
 
-	while (!list_empty(&nbd->queue_head)) {
-		req = list_entry(nbd->queue_head.next, struct request,
-				 queuelist);
-		list_del_init(&req->queuelist);
-		req->errors++;
-		nbd_end_request(nbd, req);
-	}
-
-	while (!list_empty(&nbd->waiting_queue)) {
-		req = list_entry(nbd->waiting_queue.next, struct request,
-				 queuelist);
-		list_del_init(&req->queuelist);
-		req->errors++;
-		nbd_end_request(nbd, req);
-	}
+	blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL);
 	dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
 }
 
 
-static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
+static void nbd_handle_cmd(struct nbd_cmd *cmd)
 {
+	struct request *req = blk_mq_rq_from_pdu(cmd);
+	struct nbd_device *nbd = cmd->nbd;
+
 	if (req->cmd_type != REQ_TYPE_FS)
 		goto error_out;
 
@@ -526,6 +495,7 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
 	req->errors = 0;
 
 	mutex_lock(&nbd->tx_lock);
+	nbd->task_send = current;
 	if (unlikely(!nbd->sock)) {
 		mutex_unlock(&nbd->tx_lock);
 		dev_err(disk_to_dev(nbd->disk),
@@ -533,106 +503,30 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
 		goto error_out;
 	}
 
-	nbd->active_req = req;
-
-	if (nbd->xmit_timeout && list_empty_careful(&nbd->queue_head))
-		mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
-
-	if (nbd_send_req(nbd, req) != 0) {
+	if (nbd_send_cmd(nbd, cmd) != 0) {
 		dev_err(disk_to_dev(nbd->disk), "Request send failed\n");
 		req->errors++;
-		nbd_end_request(nbd, req);
-	} else {
-		spin_lock(&nbd->queue_lock);
-		list_add_tail(&req->queuelist, &nbd->queue_head);
-		spin_unlock(&nbd->queue_lock);
+		nbd_end_request(cmd);
 	}
 
-	nbd->active_req = NULL;
+	nbd->task_send = NULL;
 	mutex_unlock(&nbd->tx_lock);
-	wake_up_all(&nbd->active_wq);
 
 	return;
 
 error_out:
 	req->errors++;
-	nbd_end_request(nbd, req);
+	nbd_end_request(cmd);
 }
 
-static int nbd_thread_send(void *data)
+static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
+			const struct blk_mq_queue_data *bd)
 {
-	struct nbd_device *nbd = data;
-	struct request *req;
-
-	nbd->task_send = current;
-
-	set_user_nice(current, MIN_NICE);
-	while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
-		/* wait for something to do */
-		wait_event_interruptible(nbd->waiting_wq,
-					 kthread_should_stop() ||
-					 !list_empty(&nbd->waiting_queue));
-
-		/* extract request */
-		if (list_empty(&nbd->waiting_queue))
-			continue;
-
-		spin_lock_irq(&nbd->queue_lock);
-		req = list_entry(nbd->waiting_queue.next, struct request,
-				 queuelist);
-		list_del_init(&req->queuelist);
-		spin_unlock_irq(&nbd->queue_lock);
-
-		/* handle request */
-		nbd_handle_req(nbd, req);
-	}
+	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
 
-	nbd->task_send = NULL;
-
-	return 0;
-}
-
-/*
- * We always wait for result of write, for now. It would be nice to make it optional
- * in future
- * if ((rq_data_dir(req) == WRITE) && (nbd->flags & NBD_WRITE_NOCHK))
- *   { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
- */
-
-static void nbd_request_handler(struct request_queue *q)
-		__releases(q->queue_lock) __acquires(q->queue_lock)
-{
-	struct request *req;
-	
-	while ((req = blk_fetch_request(q)) != NULL) {
-		struct nbd_device *nbd;
-
-		spin_unlock_irq(q->queue_lock);
-
-		nbd = req->rq_disk->private_data;
-
-		BUG_ON(nbd->magic != NBD_MAGIC);
-
-		dev_dbg(nbd_to_dev(nbd), "request %p: dequeued (flags=%x)\n",
-			req, req->cmd_type);
-
-		if (unlikely(!nbd->sock)) {
-			dev_err_ratelimited(disk_to_dev(nbd->disk),
-					    "Attempted send on closed socket\n");
-			req->errors++;
-			nbd_end_request(nbd, req);
-			spin_lock_irq(q->queue_lock);
-			continue;
-		}
-
-		spin_lock_irq(&nbd->queue_lock);
-		list_add_tail(&req->queuelist, &nbd->waiting_queue);
-		spin_unlock_irq(&nbd->queue_lock);
-
-		wake_up(&nbd->waiting_wq);
-
-		spin_lock_irq(q->queue_lock);
-	}
+	blk_mq_start_request(bd->rq);
+	nbd_handle_cmd(cmd);
+	return BLK_MQ_RQ_QUEUE_OK;
 }
 
 static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock)
@@ -657,15 +551,13 @@ out:
 /* Reset all properties of an NBD device */
 static void nbd_reset(struct nbd_device *nbd)
 {
-	nbd->disconnect = false;
-	nbd->timedout = false;
+	nbd->runtime_flags = 0;
 	nbd->blksize = 1024;
 	nbd->bytesize = 0;
 	set_capacity(nbd->disk, 0);
 	nbd->flags = 0;
-	nbd->xmit_timeout = 0;
+	nbd->tag_set.timeout = 0;
 	queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
-	del_timer_sync(&nbd->timeout_timer);
 }
 
 static void nbd_bdev_reset(struct block_device *bdev)
@@ -700,33 +592,37 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 {
 	switch (cmd) {
 	case NBD_DISCONNECT: {
-		struct request sreq;
+		struct request *sreq;
 
 		dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
 		if (!nbd->sock)
 			return -EINVAL;
 
+		sreq = blk_mq_alloc_request(bdev_get_queue(bdev), WRITE, 0);
+		if (!sreq)
+			return -ENOMEM;
+
 		mutex_unlock(&nbd->tx_lock);
 		fsync_bdev(bdev);
 		mutex_lock(&nbd->tx_lock);
-		blk_rq_init(NULL, &sreq);
-		sreq.cmd_type = REQ_TYPE_DRV_PRIV;
+		sreq->cmd_type = REQ_TYPE_DRV_PRIV;
 
 		/* Check again after getting mutex back.  */
-		if (!nbd->sock)
+		if (!nbd->sock) {
+			blk_mq_free_request(sreq);
 			return -EINVAL;
+		}
 
-		nbd->disconnect = true;
+		set_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags);
 
-		nbd_send_req(nbd, &sreq);
+		nbd_send_cmd(nbd, blk_mq_rq_to_pdu(sreq));
+		blk_mq_free_request(sreq);
 		return 0;
 	}
  
 	case NBD_CLEAR_SOCK:
 		sock_shutdown(nbd);
 		nbd_clear_que(nbd);
-		BUG_ON(!list_empty(&nbd->queue_head));
-		BUG_ON(!list_empty(&nbd->waiting_queue));
 		kill_bdev(bdev);
 		return 0;
 
@@ -758,13 +654,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		return nbd_size_set(nbd, bdev, nbd->blksize, arg);
 
 	case NBD_SET_TIMEOUT:
-		nbd->xmit_timeout = arg * HZ;
-		if (arg)
-			mod_timer(&nbd->timeout_timer,
-				  jiffies + nbd->xmit_timeout);
-		else
-			del_timer_sync(&nbd->timeout_timer);
-
+		nbd->tag_set.timeout = arg * HZ;
 		return 0;
 
 	case NBD_SET_FLAGS:
@@ -772,7 +662,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		return 0;
 
 	case NBD_DO_IT: {
-		struct task_struct *thread;
 		int error;
 
 		if (nbd->task_recv)
@@ -786,18 +675,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 
 		nbd_parse_flags(nbd, bdev);
 
-		thread = kthread_run(nbd_thread_send, nbd, "%s",
-				     nbd_name(nbd));
-		if (IS_ERR(thread)) {
-			mutex_lock(&nbd->tx_lock);
-			nbd->task_recv = NULL;
-			return PTR_ERR(thread);
-		}
-
 		nbd_dev_dbg_init(nbd);
 		error = nbd_thread_recv(nbd, bdev);
 		nbd_dev_dbg_close(nbd);
-		kthread_stop(thread);
 
 		mutex_lock(&nbd->tx_lock);
 		nbd->task_recv = NULL;
@@ -807,9 +687,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		kill_bdev(bdev);
 		nbd_bdev_reset(bdev);
 
-		if (nbd->disconnect) /* user requested, ignore socket errors */
+		/* user requested, ignore socket errors */
+		if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
 			error = 0;
-		if (nbd->timedout)
+		if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags))
 			error = -ETIMEDOUT;
 
 		nbd_reset(nbd);
@@ -825,10 +706,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		return 0;
 
 	case NBD_PRINT_DEBUG:
-		dev_info(disk_to_dev(nbd->disk),
-			"next = %p, prev = %p, head = %p\n",
-			nbd->queue_head.next, nbd->queue_head.prev,
-			&nbd->queue_head);
+		/*
+		 * For compatibility only, we no longer keep a list of
+		 * outstanding requests.
+		 */
 		return 0;
 	}
 	return -ENOTTY;
@@ -935,7 +816,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd)
 
 	debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops);
 	debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize);
-	debugfs_create_u32("timeout", 0444, dir, &nbd->xmit_timeout);
+	debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout);
 	debugfs_create_u32("blocksize", 0444, dir, &nbd->blksize);
 	debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops);
 
@@ -987,6 +868,24 @@ static void nbd_dbg_close(void)
 
 #endif
 
+static int nbd_init_request(void *data, struct request *rq,
+			    unsigned int hctx_idx, unsigned int request_idx,
+			    unsigned int numa_node)
+{
+	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq);
+
+	cmd->nbd = data;
+	INIT_LIST_HEAD(&cmd->list);
+	return 0;
+}
+
+static struct blk_mq_ops nbd_mq_ops = {
+	.queue_rq	= nbd_queue_rq,
+	.map_queue	= blk_mq_map_queue,
+	.init_request	= nbd_init_request,
+	.timeout	= nbd_xmit_timeout,
+};
+
 /*
  * And here should be modules and kernel interface 
  *  (Just smiley confuses emacs :-)
@@ -1035,16 +934,34 @@ static int __init nbd_init(void)
 		if (!disk)
 			goto out;
 		nbd_dev[i].disk = disk;
+
+		nbd_dev[i].tag_set.ops = &nbd_mq_ops;
+		nbd_dev[i].tag_set.nr_hw_queues = 1;
+		nbd_dev[i].tag_set.queue_depth = 128;
+		nbd_dev[i].tag_set.numa_node = NUMA_NO_NODE;
+		nbd_dev[i].tag_set.cmd_size = sizeof(struct nbd_cmd);
+		nbd_dev[i].tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
+			BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING;
+		nbd_dev[i].tag_set.driver_data = &nbd_dev[i];
+
+		err = blk_mq_alloc_tag_set(&nbd_dev[i].tag_set);
+		if (err) {
+			put_disk(disk);
+			goto out;
+		}
+
 		/*
 		 * The new linux 2.5 block layer implementation requires
 		 * every gendisk to have its very own request_queue struct.
 		 * These structs are big so we dynamically allocate them.
 		 */
-		disk->queue = blk_init_queue(nbd_request_handler, &nbd_lock);
+		disk->queue = blk_mq_init_queue(&nbd_dev[i].tag_set);
 		if (!disk->queue) {
+			blk_mq_free_tag_set(&nbd_dev[i].tag_set);
 			put_disk(disk);
 			goto out;
 		}
+
 		/*
 		 * Tell the block layer that we are not a rotational device
 		 */
@@ -1069,16 +986,8 @@ static int __init nbd_init(void)
 	for (i = 0; i < nbds_max; i++) {
 		struct gendisk *disk = nbd_dev[i].disk;
 		nbd_dev[i].magic = NBD_MAGIC;
-		INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
-		spin_lock_init(&nbd_dev[i].queue_lock);
 		spin_lock_init(&nbd_dev[i].sock_lock);
-		INIT_LIST_HEAD(&nbd_dev[i].queue_head);
 		mutex_init(&nbd_dev[i].tx_lock);
-		init_timer(&nbd_dev[i].timeout_timer);
-		nbd_dev[i].timeout_timer.function = nbd_xmit_timeout;
-		nbd_dev[i].timeout_timer.data = (unsigned long)&nbd_dev[i];
-		init_waitqueue_head(&nbd_dev[i].active_wq);
-		init_waitqueue_head(&nbd_dev[i].waiting_wq);
 		disk->major = NBD_MAJOR;
 		disk->first_minor = i << part_shift;
 		disk->fops = &nbd_fops;
@@ -1091,6 +1000,7 @@ static int __init nbd_init(void)
 	return 0;
 out:
 	while (i--) {
+		blk_mq_free_tag_set(&nbd_dev[i].tag_set);
 		blk_cleanup_queue(nbd_dev[i].disk->queue);
 		put_disk(nbd_dev[i].disk);
 	}
@@ -1110,6 +1020,7 @@ static void __exit nbd_cleanup(void)
 		if (disk) {
 			del_gendisk(disk);
 			blk_cleanup_queue(disk->queue);
+			blk_mq_free_tag_set(&nbd_dev[i].tag_set);
 			put_disk(disk);
 		}
 	}
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 75a7f88d6717..91e1de898daf 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -34,6 +34,7 @@ struct nullb {
 	unsigned int index;
 	struct request_queue *q;
 	struct gendisk *disk;
+	struct nvm_dev *ndev;
 	struct blk_mq_tag_set tag_set;
 	struct hrtimer timer;
 	unsigned int queue_depth;
@@ -414,23 +415,6 @@ static void cleanup_queues(struct nullb *nullb)
 	kfree(nullb->queues);
 }
 
-static void null_del_dev(struct nullb *nullb)
-{
-	list_del_init(&nullb->list);
-
-	if (use_lightnvm)
-		nvm_unregister(nullb->disk_name);
-	else
-		del_gendisk(nullb->disk);
-	blk_cleanup_queue(nullb->q);
-	if (queue_mode == NULL_Q_MQ)
-		blk_mq_free_tag_set(&nullb->tag_set);
-	if (!use_lightnvm)
-		put_disk(nullb->disk);
-	cleanup_queues(nullb);
-	kfree(nullb);
-}
-
 #ifdef CONFIG_NVM
 
 static void null_lnvm_end_io(struct request *rq, int error)
@@ -564,10 +548,58 @@ static struct nvm_dev_ops null_lnvm_dev_ops = {
 	/* Simulate nvme protocol restriction */
 	.max_phys_sect		= 64,
 };
+
+static int null_nvm_register(struct nullb *nullb)
+{
+	struct nvm_dev *dev;
+	int rv;
+
+	dev = nvm_alloc_dev(0);
+	if (!dev)
+		return -ENOMEM;
+
+	dev->q = nullb->q;
+	memcpy(dev->name, nullb->disk_name, DISK_NAME_LEN);
+	dev->ops = &null_lnvm_dev_ops;
+
+	rv = nvm_register(dev);
+	if (rv) {
+		kfree(dev);
+		return rv;
+	}
+	nullb->ndev = dev;
+	return 0;
+}
+
+static void null_nvm_unregister(struct nullb *nullb)
+{
+	nvm_unregister(nullb->ndev);
+}
 #else
-static struct nvm_dev_ops null_lnvm_dev_ops;
+static int null_nvm_register(struct nullb *nullb)
+{
+	return -EINVAL;
+}
+static void null_nvm_unregister(struct nullb *nullb) {}
 #endif /* CONFIG_NVM */
 
+static void null_del_dev(struct nullb *nullb)
+{
+	list_del_init(&nullb->list);
+
+	if (use_lightnvm)
+		null_nvm_unregister(nullb);
+	else
+		del_gendisk(nullb->disk);
+	blk_cleanup_queue(nullb->q);
+	if (queue_mode == NULL_Q_MQ)
+		blk_mq_free_tag_set(&nullb->tag_set);
+	if (!use_lightnvm)
+		put_disk(nullb->disk);
+	cleanup_queues(nullb);
+	kfree(nullb);
+}
+
 static int null_open(struct block_device *
author	Linus Torvalds <torvalds@linux-foundation.org>	2016-10-07 14:42:05 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2016-10-07 14:42:05 -0700
commit	513a4befae06c4469abfb836e8f71977de58c636 (patch)
tree	18cc7d0b01a7fd2352de734e99a4ca5c29ad5fac /drivers
parent	87840a2b7e048018d18d60bdac5c09224de85370 (diff)
parent	997198ba1ed691c09457120576c27dbd953d0557 (diff)
download	linux-513a4befae06c4469abfb836e8f71977de58c636.tar.gz linux-513a4befae06c4469abfb836e8f71977de58c636.tar.bz2 linux-513a4befae06c4469abfb836e8f71977de58c636.zip