VMCI: queue pairs implementation.

VMCI queue pairs allow for bi-directional ordered communication between host and guests. Signed-off-by: George Zhang <georgezhang@vmware.com> Acked-by: Andy king <acking@vmware.com> Acked-by: Dmitry Torokhov <dtor@vmware.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: George Zhang <georgezhang@vmware.com> 2013-01-08 15:54:54 -0800
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2013-01-08 16:15:56 -0800
commit: 06164d2b72aa752ce4633184b3e0d97601017135 (patch)
tree: 51e2ec49ace40729b043978fae3a1e8a5a5411c2
parent: b484b26cc7be6ccf3676deb5e03aed2609ee9a40 (diff)
download: linux-06164d2b72aa752ce4633184b3e0d97601017135.tar.gz
linux-06164d2b72aa752ce4633184b3e0d97601017135.tar.bz2
linux-06164d2b72aa752ce4633184b3e0d97601017135.zip
2 files changed, 3611 insertions, 0 deletions
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
new file mode 100644
index 000000000000..1123111ba1bf
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -0,0 +1,3420 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/device-mapper.h>
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/socket.h>
+#include <linux/wait.h>
+
+#include "vmci_handle_array.h"
+#include "vmci_queue_pair.h"
+#include "vmci_datagram.h"
+#include "vmci_resource.h"
+#include "vmci_context.h"
+#include "vmci_driver.h"
+#include "vmci_event.h"
+#include "vmci_route.h"
+
+/*
+ * In the following, we will distinguish between two kinds of VMX processes -
+ * the ones with versions lower than VMCI_VERSION_NOVMVM that use specialized
+ * VMCI page files in the VMX and supporting VM to VM communication and the
+ * newer ones that use the guest memory directly. We will in the following
+ * refer to the older VMX versions as old-style VMX'en, and the newer ones as
+ * new-style VMX'en.
+ *
+ * The state transition datagram is as follows (the VMCIQPB_ prefix has been
+ * removed for readability) - see below for more details on the transtions:
+ *
+ *            --------------  NEW  -------------
+ *            |                                |
+ *           \_/                              \_/
+ *     CREATED_NO_MEM <-----------------> CREATED_MEM
+ *            |    |                           |
+ *            |    o-----------------------o   |
+ *            |                            |   |
+ *           \_/                          \_/ \_/
+ *     ATTACHED_NO_MEM <----------------> ATTACHED_MEM
+ *            |                            |   |
+ *            |     o----------------------o   |
+ *            |     |                          |
+ *           \_/   \_/                        \_/
+ *     SHUTDOWN_NO_MEM <----------------> SHUTDOWN_MEM
+ *            |                                |
+ *            |                                |
+ *            -------------> gone <-------------
+ *
+ * In more detail. When a VMCI queue pair is first created, it will be in the
+ * VMCIQPB_NEW state. It will then move into one of the following states:
+ *
+ * - VMCIQPB_CREATED_NO_MEM: this state indicates that either:
+ *
+ *     - the created was performed by a host endpoint, in which case there is
+ *       no backing memory yet.
+ *
+ *     - the create was initiated by an old-style VMX, that uses
+ *       vmci_qp_broker_set_page_store to specify the UVAs of the queue pair at
+ *       a later point in time. This state can be distinguished from the one
+ *       above by the context ID of the creator. A host side is not allowed to
+ *       attach until the page store has been set.
+ *
+ * - VMCIQPB_CREATED_MEM: this state is the result when the queue pair
+ *     is created by a VMX using the queue pair device backend that
+ *     sets the UVAs of the queue pair immediately and stores the
+ *     information for later attachers. At this point, it is ready for
+ *     the host side to attach to it.
+ *
+ * Once the queue pair is in one of the created states (with the exception of
+ * the case mentioned for older VMX'en above), it is possible to attach to the
+ * queue pair. Again we have two new states possible:
+ *
+ * - VMCIQPB_ATTACHED_MEM: this state can be reached through the following
+ *   paths:
+ *
+ *     - from VMCIQPB_CREATED_NO_MEM when a new-style VMX allocates a queue
+ *       pair, and attaches to a queue pair previously created by the host side.
+ *
+ *     - from VMCIQPB_CREATED_MEM when the host side attaches to a queue pair
+ *       already created by a guest.
+ *
+ *     - from VMCIQPB_ATTACHED_NO_MEM, when an old-style VMX calls
+ *       vmci_qp_broker_set_page_store (see below).
+ *
+ * - VMCIQPB_ATTACHED_NO_MEM: If the queue pair already was in the
+ *     VMCIQPB_CREATED_NO_MEM due to a host side create, an old-style VMX will
+ *     bring the queue pair into this state. Once vmci_qp_broker_set_page_store
+ *     is called to register the user memory, the VMCIQPB_ATTACH_MEM state
+ *     will be entered.
+ *
+ * From the attached queue pair, the queue pair can enter the shutdown states
+ * when either side of the queue pair detaches. If the guest side detaches
+ * first, the queue pair will enter the VMCIQPB_SHUTDOWN_NO_MEM state, where
+ * the content of the queue pair will no longer be available. If the host
+ * side detaches first, the queue pair will either enter the
+ * VMCIQPB_SHUTDOWN_MEM, if the guest memory is currently mapped, or
+ * VMCIQPB_SHUTDOWN_NO_MEM, if the guest memory is not mapped
+ * (e.g., the host detaches while a guest is stunned).
+ *
+ * New-style VMX'en will also unmap guest memory, if the guest is
+ * quiesced, e.g., during a snapshot operation. In that case, the guest
+ * memory will no longer be available, and the queue pair will transition from
+ * *_MEM state to a *_NO_MEM state. The VMX may later map the memory once more,
+ * in which case the queue pair will transition from the *_NO_MEM state at that
+ * point back to the *_MEM state. Note that the *_NO_MEM state may have changed,
+ * since the peer may have either attached or detached in the meantime. The
+ * values are laid out such that ++ on a state will move from a *_NO_MEM to a
+ * *_MEM state, and vice versa.
+ */
+
+/*
+ * VMCIMemcpy{To,From}QueueFunc() prototypes.  Functions of these
+ * types are passed around to enqueue and dequeue routines.  Note that
+ * often the functions passed are simply wrappers around memcpy
+ * itself.
+ *
+ * Note: In order for the memcpy typedefs to be compatible with the VMKernel,
+ * there's an unused last parameter for the hosted side.  In
+ * ESX, that parameter holds a buffer type.
+ */
+typedef int vmci_memcpy_to_queue_func(struct vmci_queue *queue,
+				      u64 queue_offset, const void *src,
+				      size_t src_offset, size_t size);
+typedef int vmci_memcpy_from_queue_func(void *dest, size_t dest_offset,
+					const struct vmci_queue *queue,
+					u64 queue_offset, size_t size);
+
+/* The Kernel specific component of the struct vmci_queue structure. */
+struct vmci_queue_kern_if {
+	struct page **page;
+	struct page **header_page;
+	void *va;
+	struct mutex __mutex;	/* Protects the queue. */
+	struct mutex *mutex;	/* Shared by producer and consumer queues. */
+	bool host;
+	size_t num_pages;
+	bool mapped;
+};
+
+/*
+ * This structure is opaque to the clients.
+ */
+struct vmci_qp {
+	struct vmci_handle handle;
+	struct vmci_queue *produce_q;
+	struct vmci_queue *consume_q;
+	u64 produce_q_size;
+	u64 consume_q_size;
+	u32 peer;
+	u32 flags;
+	u32 priv_flags;
+	bool guest_endpoint;
+	unsigned int blocked;
+	unsigned int generation;
+	wait_queue_head_t event;
+};
+
+enum qp_broker_state {
+	VMCIQPB_NEW,
+	VMCIQPB_CREATED_NO_MEM,
+	VMCIQPB_CREATED_MEM,
+	VMCIQPB_ATTACHED_NO_MEM,
+	VMCIQPB_ATTACHED_MEM,
+	VMCIQPB_SHUTDOWN_NO_MEM,
+	VMCIQPB_SHUTDOWN_MEM,
+	VMCIQPB_GONE
+};
+
+#define QPBROKERSTATE_HAS_MEM(_qpb) (_qpb->state == VMCIQPB_CREATED_MEM || \
+				     _qpb->state == VMCIQPB_ATTACHED_MEM || \
+				     _qpb->state == VMCIQPB_SHUTDOWN_MEM)
+
+/*
+ * In the queue pair broker, we always use the guest point of view for
+ * the produce and consume queue values and references, e.g., the
+ * produce queue size stored is the guests produce queue size. The
+ * host endpoint will need to swap these around. The only exception is
+ * the local queue pairs on the host, in which case the host endpoint
+ * that creates the queue pair will have the right orientation, and
+ * the attaching host endpoint will need to swap.
+ */
+struct qp_entry {
+	struct list_head list_item;
+	struct vmci_handle handle;
+	u32 peer;
+	u32 flags;
+	u64 produce_size;
+	u64 consume_size;
+	u32 ref_count;
+};
+
+struct qp_broker_entry {
+	struct vmci_resource resource;
+	struct qp_entry qp;
+	u32 create_id;
+	u32 attach_id;
+	enum qp_broker_state state;
+	bool require_trusted_attach;
+	bool created_by_trusted;
+	bool vmci_page_files;	/* Created by VMX using VMCI page files */
+	struct vmci_queue *produce_q;
+	struct vmci_queue *consume_q;
+	struct vmci_queue_header saved_produce_q;
+	struct vmci_queue_header saved_consume_q;
+	vmci_event_release_cb wakeup_cb;
+	void *client_data;
+	void *local_mem;	/* Kernel memory for local queue pair */
+};
+
+struct qp_guest_endpoint {
+	struct vmci_resource resource;
+	struct qp_entry qp;
+	u64 num_ppns;
+	void *produce_q;
+	void *consume_q;
+	struct PPNSet ppn_set;
+};
+
+struct qp_list {
+	struct list_head head;
+	struct mutex mutex;	/* Protect queue list. */
+};
+
+static struct qp_list qp_broker_list = {
+	.head = LIST_HEAD_INIT(qp_broker_list.head),
+	.mutex = __MUTEX_INITIALIZER(qp_broker_list.mutex),
+};
+
+static struct qp_list qp_guest_endpoints = {
+	.head = LIST_HEAD_INIT(qp_guest_endpoints.head),
+	.mutex = __MUTEX_INITIALIZER(qp_guest_endpoints.mutex),
+};
+
+#define INVALID_VMCI_GUEST_MEM_ID  0
+#define QPE_NUM_PAGES(_QPE) ((u32)					 \
+			     (dm_div_up(_QPE.produce_size, PAGE_SIZE) +	 \
+			      dm_div_up(_QPE.consume_size, PAGE_SIZE) + 2))
+
+
+/*
+ * Frees kernel VA space for a given queue and its queue header, and
+ * frees physical data pages.
+ */
+static void qp_free_queue(void *q, u64 size)
+{
+	struct vmci_queue *queue = q;
+
+	if (queue) {
+		u64 i = dm_div_up(size, PAGE_SIZE);
+
+		if (queue->kernel_if->mapped) {
+			vunmap(queue->kernel_if->va);
+			queue->kernel_if->va = NULL;
+		}
+
+		while (i)
+			__free_page(queue->kernel_if->page[--i]);
+
+		vfree(queue->q_header);
+	}
+}
+
+/*
+ * Allocates kernel VA space of specified size, plus space for the
+ * queue structure/kernel interface and the queue header.  Allocates
+ * physical pages for the queue data pages.
+ *
+ * PAGE m:      struct vmci_queue_header (struct vmci_queue->q_header)
+ * PAGE m+1:    struct vmci_queue
+ * PAGE m+1+q:  struct vmci_queue_kern_if (struct vmci_queue->kernel_if)
+ * PAGE n-size: Data pages (struct vmci_queue->kernel_if->page[])
+ */
+static void *qp_alloc_queue(u64 size, u32 flags)
+{
+	u64 i;
+	struct vmci_queue *queue;
+	struct vmci_queue_header *q_header;
+	const u64 num_data_pages = dm_div_up(size, PAGE_SIZE);
+	const uint queue_size =
+	    PAGE_SIZE +
+	    sizeof(*queue) + sizeof(*(queue->kernel_if)) +
+	    num_data_pages * sizeof(*(queue->kernel_if->page));
+
+	q_header = vmalloc(queue_size);
+	if (!q_header)
+		return NULL;
+
+	queue = (void *)q_header + PAGE_SIZE;
+	queue->q_header = q_header;
+	queue->saved_header = NULL;
+	queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1);
+	queue->kernel_if->header_page = NULL;	/* Unused in guest. */
+	queue->kernel_if->page = (struct page **)(queue->kernel_if + 1);
+	queue->kernel_if->host = false;
+	queue->kernel_if->va = NULL;
+	queue->kernel_if->mapped = false;
+
+	for (i = 0; i < num_data_pages; i++) {
+		queue->kernel_if->page[i] = alloc_pages(GFP_KERNEL, 0);
+		if (!queue->kernel_if->page[i])
+			goto fail;
+	}
+
+	if (vmci_qp_pinned(flags)) {
+		queue->kernel_if->va =
+		    vmap(queue->kernel_if->page, num_data_pages, VM_MAP,
+			 PAGE_KERNEL);
+		if (!queue->kernel_if->va)
+			goto fail;
+
+		queue->kernel_if->mapped = true;
+	}
+
+	return (void *)queue;
+
+ fail:
+	qp_free_queue(queue, i * PAGE_SIZE);
+	return NULL;
+}
+
+/*
+ * Copies from a given buffer or iovector to a VMCI Queue.  Uses
+ * kmap()/kunmap() to dynamically map/unmap required portions of the queue
+ * by traversing the offset -> page translation structure for the queue.
+ * Assumes that offset + size does not wrap around in the queue.
+ */
+static int __qp_memcpy_to_queue(struct vmci_queue *queue,
+				u64 queue_offset,
+				const void *src,
+				size_t size,
+				bool is_iovec)
+{
+	struct vmci_queue_kern_if *kernel_if = queue->kernel_if;
+	size_t bytes_copied = 0;
+
+	while (bytes_copied < size) {
+		u64 page_index = (queue_offset + bytes_copied) / PAGE_SIZE;
+		size_t page_offset =
+		    (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
+		void *va;
+		size_t to_copy;
+
+		if (!kernel_if->mapped)
+			va = kmap(kernel_if->page[page_index]);
+		else
+			va = (void *)((u8 *)kernel_if->va +
+				      (page_index * PAGE_SIZE));
+
+		if (size - bytes_copied > PAGE_SIZE - page_offset)
+			/* Enough payload to fill up from this page. */
+			to_copy = PAGE_SIZE - page_offset;
+		else
+			to_copy = size - bytes_copied;
+
+		if (is_iovec) {
+			struct iovec *iov = (struct iovec *)src;
+			int err;
+
+			/* The iovec will track bytes_copied internally. */
+			err = memcpy_fromiovec((u8 *)va + page_offset,
+					       iov, to_copy);
+			if (err != 0) {
+				kunmap(kernel_if->page[page_index]);
+				return VMCI_ERROR_INVALID_ARGS;
+			}
+		} else {
+			memcpy((u8 *)va + page_offset,
+			       (u8 *)src + bytes_copied, to_copy);
+		}
+
+		bytes_copied += to_copy;
+		if (!kernel_if->mapped)
+			kunmap(kernel_if->page[page_index]);
+	}
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Copies to a given buffer or iovector from a VMCI Queue.  Uses
+ * kmap()/kunmap() to dynamically map/unmap required portions of the queue
+ * by traversing the offset -> page translation structure for the queue.
+ * Assumes that offset + size does not wrap around in the queue.
+ */
+static int __qp_memcpy_from_queue(void *dest,
+				  const struct vmci_queue *queue,
+				  u64 queue_offset,
+				  size_t size,
+				  bool is_iovec)
+{
+	struct vmci_queue_kern_if *kernel_if = queue->kernel_if;
+	size_t bytes_copied = 0;
+
+	while (bytes_copied < size) {
+		u64 page_index = (queue_offset + bytes_copied) / PAGE_SIZE;
+		size_t page_offset =
+		    (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
+		void *va;
+		size_t to_copy;
+
+		if (!kernel_if->mapped)
+			va = kmap(kernel_if->page[page_index]);
+		else
+			va = (void *)((u8 *)kernel_if->va +
+				      (page_index * PAGE_SIZE));
+
+		if (size - bytes_copied > PAGE_SIZE - page_offset)
+			/* Enough payload to fill up this page. */
+			to_copy = PAGE_SIZE - page_offset;
+		else
+			to_copy = size - bytes_copied;
+
+		if (is_iovec) {
+			struct iovec *iov = (struct iovec *)dest;
+			int err;
+
+			/* The iovec will track bytes_copied internally. */
+			err = memcpy_toiovec(iov, (u8 *)va + page_offset,
+					     to_copy);
+			if (err != 0) {
+				kunmap(kernel_if->page[page_index]);
+				return VMCI_ERROR_INVALID_ARGS;
+			}
+		} else {
+			memcpy((u8 *)dest + bytes_copied,
+			       (u8 *)va + page_offset, to_copy);
+		}
+
+		bytes_copied += to_copy;
+		if (!kernel_if->mapped)
+			kunmap(kernel_if->page[page_index]);
+	}
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Allocates two list of PPNs --- one for the pages in the produce queue,
+ * and the other for the pages in the consume queue. Intializes the list
+ * of PPNs with the page frame numbers of the KVA for the two queues (and
+ * the queue headers).
+ */
+static int qp_alloc_ppn_set(void *prod_q,
+			    u64 num_produce_pages,
+			    void *cons_q,
+			    u64 num_consume_pages, struct PPNSet *ppn_set)
+{
+	u32 *produce_ppns;
+	u32 *consume_ppns;
+	struct vmci_queue *produce_q = prod_q;
+	struct vmci_queue *consume_q = cons_q;
+	u64 i;
+
+	if (!produce_q || !num_produce_pages || !consume_q ||
+	    !num_consume_pages || !ppn_set)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	if (ppn_set->initialized)
+		return VMCI_ERROR_ALREADY_EXISTS;
+
+	produce_ppns =
+	    kmalloc(num_produce_pages * sizeof(*produce_ppns), GFP_KERNEL);
+	if (!produce_ppns)
+		return VMCI_ERROR_NO_MEM;
+
+	consume_ppns =
+	    kmalloc(num_consume_pages * sizeof(*consume_ppns), GFP_KERNEL);
+	if (!consume_ppns) {
+		kfree(produce_ppns);
+		return VMCI_ERROR_NO_MEM;
+	}
+
+	produce_ppns[0] = page_to_pfn(vmalloc_to_page(produce_q->q_header));
+	for (i = 1; i < num_produce_pages; i++) {
+		unsigned long pfn;
+
+		produce_ppns[i] =
+		    page_to_pfn(produce_q->kernel_if->page[i - 1]);
+		pfn = produce_ppns[i];
+
+		/* Fail allocation if PFN isn't supported by hypervisor. */
+		if (sizeof(pfn) > sizeof(*produce_ppns)
+		    && pfn != produce_ppns[i])
+			goto ppn_error;
+	}
+
+	consume_ppns[0] = page_to_pfn(vmalloc_to_page(consume_q->q_header));
+	for (i = 1; i < num_consume_pages; i++) {
+		unsigned long pfn;
+
+		consume_ppns[i] =
+		    page_to_pfn(consume_q->kernel_if->page[i - 1]);
+		pfn = consume_ppns[i];
+
+		/* Fail allocation if PFN isn't supported by hypervisor. */
+		if (sizeof(pfn) > sizeof(*consume_ppns)
+		    && pfn != consume_ppns[i])
+			goto ppn_error;
+	}
+
+	ppn_set->num_produce_pages = num_produce_pages;
+	ppn_set->num_consume_pages = num_consume_pages;
+	ppn_set->produce_ppns = produce_ppns;
+	ppn_set->consume_ppns = consume_ppns;
+	ppn_set->initialized = true;
+	return VMCI_SUCCESS;
+
+ ppn_error:
+	kfree(produce_ppns);
+	kfree(consume_ppns);
+	return VMCI_ERROR_INVALID_ARGS;
+}
+
+/*
+ * Frees the two list of PPNs for a queue pair.
+ */
+static void qp_free_ppn_set(struct PPNSet *ppn_set)
+{
+	if (ppn_set->initialized) {
+		/* Do not call these functions on NULL inputs. */
+		kfree(ppn_set->produce_ppns);
+		kfree(ppn_set->consume_ppns);
+	}
+	memset(ppn_set, 0, sizeof(*ppn_set));
+}
+
+/*
+ * Populates the list of PPNs in the hypercall structure with the PPNS
+ * of the produce queue and the consume queue.
+ */
+static int qp_populate_ppn_set(u8 *call_buf, const struct PPNSet *ppn_set)
+{
+	memcpy(call_buf, ppn_set->produce_ppns,
+	       ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns));
+	memcpy(call_buf +
+	       ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns),
+	       ppn_set->consume_ppns,
+	       ppn_set->num_consume_pages * sizeof(*ppn_set->consume_ppns));
+
+	return VMCI_SUCCESS;
+}
+
+static int qp_memcpy_to_queue(struct vmci_queue *queue,
+			      u64 queue_offset,
+			      const void *src, size_t src_offset, size_t size)
+{
+	return __qp_memcpy_to_queue(queue, queue_offset,
+				    (u8 *)src + src_offset, size, false);
+}
+
+static int qp_memcpy_from_queue(void *dest,
+				size_t dest_offset,
+				const struct vmci_queue *queue,
+				u64 queue_offset, size_t size)
+{
+	return __qp_memcpy_from_queue((u8 *)dest + dest_offset,
+				      queue, queue_offset, size, false);
+}
+
+/*
+ * Copies from a given iovec from a VMCI Queue.
+ */
+static int qp_memcpy_to_queue_iov(struct vmci_queue *queue,
+				  u64 queue_offset,
+				  const void *src,
+				  size_t src_offset, size_t size)
+{
+
+	/*
+	 * We ignore src_offset because src is really a struct iovec * and will
+	 * maintain offset internally.
+	 */
+	return __qp_memcpy_to_queue(queue, queue_offset, src, size, true);
+}
+
+/*
+ * Copies to a given iovec from a VMCI Queue.
+ */
+static int qp_memcpy_from_queue_iov(void *dest,
+				    size_t dest_offset,
+				    const struct vmci_queue *queue,
+				    u64 queue_offset, size_t size)
+{
+	/*
+	 * We ignore dest_offset because dest is really a struct iovec * and
+	 * will maintain offset internally.
+	 */
+	return __qp_memcpy_from_queue(dest, queue, queue_offset, size, true);
+}
+
+/*
+ * Allocates kernel VA space of specified size plus space for the queue
+ * and kernel interface.  This is different from the guest queue allocator,
+ * because we do not allocate our own queue header/data pages here but
+ * share those of the guest.
+ */
+static struct vmci_queue *qp_host_alloc_queue(u64 size)
+{
+	struct vmci_queue *queue;
+	const size_t num_pages = dm_div_up(size, PAGE_SIZE) + 1;
+	const size_t queue_size = sizeof(*queue) + sizeof(*(queue->kernel_if));
+	const size_t queue_page_size =
+	    num_pages * sizeof(*queue->kernel_if->page);
+
+	queue = kzalloc(queue_size + queue_page_size, GFP_KERNEL);
+	if (queue) {
+		queue->q_header = NULL;
+		queue->saved_header = NULL;
+		queue->kernel_if =
+		    (struct vmci_queue_kern_if *)((u8 *)queue +
+						  sizeof(*queue));
+		queue->kernel_if->host = true;
+		queue->kernel_if->mutex = NULL;
+		queue->kernel_if->num_pages = num_pages;
+		queue->kernel_if->header_page =
+		    (struct page **)((u8 *)queue + queue_size);
+		queue->kernel_if->page = &queue->kernel_if->header_page[1];
+		queue->kernel_if->va = NULL;
+		queue->kernel_if->mapped = false;
+	}
+
+	return queue;
+}
+
+/*
+ * Frees kernel memory for a given queue (header plus translation
+ * structure).
+ */
+static void qp_host_free_queue(struct vmci_queue *queue, u64 queue_size)
+{
+	kfree(queue);
+}
+
+/*
+ * Initialize the mutex for the pair of queues.  This mutex is used to
+ * protect the q_header and the buffer from changing out from under any
+ * users of either queue.  Of course, it's only any good if the mutexes
+ * are actually acquired.  Queue structure must lie on non-paged memory
+ * or we cannot guarantee access to the mutex.
+ */
+static void qp_init_queue_mutex(struct vmci_queue *produce_q,
+				struct vmci_queue *consume_q)
+{
+	/*
+	 * Only the host queue has shared state - the guest queues do not
+	 * need to synchronize access using a queue mutex.
+	 */
+
+	if (produce_q->kernel_if->host) {
+		produce_q->kernel_if->mutex = &produce_q->kernel_if->__mutex;
+		consume_q->kernel_if->mutex = &produce_q->kernel_if->__mutex;
+		mutex_init(produce_q->kernel_if->mutex);
+	}
+}
+
+/*
+ * Cleans up the mutex for the pair of queues.
+ */
+static void qp_cleanup_queue_mutex(struct vmci_queue *produce_q,
+				   struct vmci_queue *consume_q)
+{
+	if (produce_q->kernel_if->host) {
+		produce_q->kernel_if->mutex = NULL;
+		consume_q->kernel_if->mutex = NULL;
+	}
+}
+
+/*
+ * Acquire the mutex for the queue.  Note that the produce_q and
+ * the consume_q share a mutex.  So, only one of the two need to
+ * be passed in to this routine.  Either will work just fine.
+ */
+static void qp_acquire_queue_mutex(struct vmci_queue *queue)
+{
+	if (queue->kernel_if->host)
+		mutex_lock(queue->kernel_if->mutex);
+}
+
+/*
+ * Release the mutex for the queue.  Note that the produce_q and
+ * the consume_q share a mutex.  So, only one of the two need to
+ * be passed in to this routine.  Either will work just fine.
+ */
+static void qp_release_queue_mutex(struct vmci_queue *queue)
+{
+	if (queue->kernel_if->host)
+		mutex_unlock(queue->kernel_if->mutex);
+}
+
+/*
+ * Helper function to release pages in the PageStoreAttachInfo
+ * previously obtained using get_user_pages.
+ */
+static void qp_release_pages(struct page **pages,
+			     u64 num_pages, bool dirty)
+{
+	int i;
+
+	for (i = 0; i < num_pages; i++) {
+		if (dirty)
+			set_page_dirty(pages[i]);
+
+		page_cache_release(pages[i]);
+		pages[i] = NULL;
+	}
+}
+
+/*
+ * Lock the user pages referenced by the {produce,consume}Buffer
+ * struct into memory and populate the {produce,consume}Pages
+ * arrays in the attach structure with them.
+ */
+static int qp_host_get_user_memory(u64 produce_uva,
+				   u64 consume_uva,
+				   struct vmci_queue *produce_q,
+				   struct vmci_queue *consume_q)
+{
+	int retval;
+	int err = VMCI_SUCCESS;
+
+	down_write(&current->mm->mmap_sem);
+	retval = get_user_pages(current,
+				current->mm,
+				(uintptr_t) produce_uva,
+				produce_q->kernel_if->num_pages,
+				1, 0, produce_q->kernel_if->header_page, NULL);
+	if (retval < produce_q->kernel_if->num_pages) {
+		pr_warn("get_user_pages(produce) failed (retval=%d)", retval);
+		qp_release_pages(produce_q->kernel_if->header_page, retval,
+				 false);
+		err = VMCI_ERROR_NO_MEM;
+		goto out;
+	}
+
+	retval = get_user_pages(current,
+				current->mm,
+				(uintptr_t) consume_uva,
+				consume_q->kernel_if->num_pages,
+				1, 0, consume_q->kernel_if->header_page, NULL);
+	if (retval < consume_q->kernel_if->num_pages) {
+		pr_warn("get_user_pages(consume) failed (retval=%d)", retval);
+		qp_release_pages(consume_q->kernel_if->header_page, retval,
+				 false);
+		qp_release_pages(produce_q->kernel_if->header_page,
+				 produce_q->kernel_if->num_pages, false);
+		err = VMCI_ERROR_NO_MEM;
+	}
+
+ out:
+	up_write(&current->mm->mmap_sem);
+
+	return err;
+}
+
+/*
+ * Registers the specification of the user pages used for backing a queue
+ * pair. Enough information to map in pages is stored in the OS specific
+ * part of the struct vmci_queue structure.
+ */
+static int qp_host_register_user_memory(struct vmci_qp_page_store *page_store,
+					struct vmci_queue *produce_q,
+					struct vmci_queue *consume_q)
+{
+	u64 produce_uva;
+	u64 consume_uva;
+
+	/*
+	 * The new style and the old style mapping only differs in
+	 * that we either get a single or two UVAs, so we split the
+	 * single UVA range at the appropriate spot.
+	 */
+	produce_uva = page_store->pages;
+	consume_uva = page_store->pages +
+	    produce_q->kernel_if->num_pages * PAGE_SIZE;
+	return qp_host_get_user_memory(produce_uva, consume_uva, produce_q,
+				       consume_q);
+}
+
+/*
+ * Releases and removes the references to user pages stored in the attach
+ * struct.  Pages are released from the page cache and may become
+ * swappable again.
+ */
+static void qp_host_unregister_user_memory(struct vmci_queue *produce_q,
+					   struct vmci_queue *consume_q)
+{
+	qp_release_pages(produce_q->kernel_if->header_page,
+			 produce_q->kernel_if->num_pages, true);
+	memset(produce_q->kernel_if->header_page, 0,
+	       sizeof(*produce_q->kernel_if->header_page) *
+	       produce_q->kernel_if->num_pages);
+	qp_release_pages(consume_q->kernel_if->header_page,
+			 consume_q->kernel_if->num_pages, true);
+	memset(consume_q->kernel_if->header_page, 0,
+	       sizeof(*consume_q->kernel_if->header_page) *
+	       consume_q->kernel_if->num_pages);
+}
+
+/*
+ * Once qp_host_register_user_memory has been performed on a
+ * queue, the queue pair headers can be mapped into the
+ * kernel. Once mapped, they must be unmapped with
+ * qp_host_unmap_queues prior to calling
+ * qp_host_unregister_user_memory.
+ * Pages are pinned.
+ */
+static int qp_host_map_queues(struct vmci_queue *produce_q,
+			      struct vmci_queue *consume_q)
+{
+	int result;
+
+	if (!produce_q->q_header || !consume_q->q_header) {
+		struct page *headers[2];
+
+		if (produce_q->q_header != consume_q->q_header)
+			return VMCI_ERROR_QUEUEPAIR_MISMATCH;
+
+		if (produce_q->kernel_if->header_page == NULL ||
+		    *produce_q->kernel_if->header_page == NULL)
+			return VMCI_ERROR_UNAVAILABLE;
+
+		headers[0] = *produce_q->kernel_if->header_page;
+		headers[1] = *consume_q->kernel_if->header_page;
+
+		produce_q->q_header = vmap(headers, 2, VM_MAP, PAGE_KERNEL);
+		if (produce_q->q_header != NULL) {
+			consume_q->q_header =
+			    (struct vmci_queue_header *)((u8 *)
+							 produce_q->q_header +
+							 PAGE_SIZE);
+			result = VMCI_SUCCESS;
+		} else {
+			pr_warn("vmap failed\n");
+			result = VMCI_ERROR_NO_MEM;
+		}
+	} else {
+		result = VMCI_SUCCESS;
+	}
+
+	return result;
+}
+
+/*
+ * Unmaps previously mapped queue pair headers from the kernel.
+ * Pages are unpinned.
+ */
+static int qp_host_unmap_queues(u32 gid,
+				struct vmci_queue *produce_q,
+				struct vmci_queue *consume_q)
+{
+	if (produce_q->q_header) {
+		if (produce_q->q_header < consume_q->q_header)
+			vunmap(produce_q->q_header);
+		else
+			vunmap(consume_q->q_header);
+
+		produce_q->q_header = NULL;
+		consume_q->q_header = NULL;
+	}
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Finds the entry in the list corresponding to a given handle. Assumes
+ * that the list is locked.
+ */
+static struct qp_entry *qp_list_find(struct qp_list *qp_list,
+				     struct vmci_handle handle)
+{
+	struct qp_entry *entry;
+
+	if (vmci_handle_is_invalid(handle))
+		return NULL;
+
+	list_for_each_entry(entry, &qp_list->head, list_item) {
+		if (vmci_handle_is_equal(entry->handle, handle))
+			return entry;
+	}
+
+	return NULL;
+}
+
+/*
+ * Finds the entry in the list corresponding to a given handle.
+ */
+static struct qp_guest_endpoint *
+qp_guest_handle_to_entry(struct vmci_handle handle)
+{
+	struct qp_guest_endpoint *entry;
+	struct qp_entry *qp = qp_list_find(&qp_guest_endpoints, handle);
+
+	entry = qp ? container_of(
+		qp, struct qp_guest_endpoint, qp) : NULL;
+	return entry;
+}
+
+/*
+ * Finds the entry in the list corresponding to a given handle.
+ */
+static struct qp_broker_entry *
+qp_broker_handle_to_entry(struct vmci_handle handle)
+{
+	struct qp_broker_entry *entry;
+	struct qp_entry *qp = qp_list_find(&qp_broker_list, handle);
+
+	entry = qp ? container_of(
+		qp, struct qp_broker_entry, qp) : NULL;
+	return entry;
+}
+
+/*
+ * Dispatches a queue pair event message directly into the local event
+ * queue.
+ */
+static int qp_notify_peer_local(bool attach, struct vmci_handle handle)
+{
+	u32 context_id = vmci_get_context_id();
+	struct vmci_event_qp ev;
+
+	ev.msg.hdr.dst = vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
+	ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+					  VMCI_CONTEXT_RESOURCE_ID);
+	ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
+	ev.msg.event_data.event =
+	    attach ? VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH;
+	ev.payload.peer_id = context_id;
+	ev.payload.handle = handle;
+
+	return vmci_event_dispatch(&ev.msg.hdr);
+}
+
+/*
+ * Allocates and initializes a qp_guest_endpoint structure.
+ * Allocates a queue_pair rid (and handle) iff the given entry has
+ * an invalid handle.  0 through VMCI_RESERVED_RESOURCE_ID_MAX
+ * are reserved handles.  Assumes that the QP list mutex is held
+ * by the caller.
+ */
+static struct qp_guest_endpoint *
+qp_guest_endpoint_create(struct vmci_handle handle,
+			 u32 peer,
+			 u32 flags,
+			 u64 produce_size,
+			 u64 consume_size,
+			 void *produce_q,
+			 void *consume_q)
+{
+	int result;
+	struct qp_guest_endpoint *entry;
+	/* One page each for the queue headers. */
+	const u64 num_ppns = dm_div_up(produce_size, PAGE_SIZE) +
+	    dm_div_up(consume_size, PAGE_SIZE) + 2;
+
+	if (vmci_handle_is_invalid(handle)) {
+		u32 context_id = vmci_get_context_id();
+
+		handle = vmci_make_handle(context_id, VMCI_INVALID_ID);
+	}
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (entry) {
+		entry->qp.peer = peer;
+		entry->qp.flags = flags;
+		entry->qp.produce_size = produce_size;
+		entry->qp.consume_size = consume_size;
+		entry->qp.ref_count = 0;
+		entry->num_ppns = num_ppns;
+		entry->produce_q = produce_q;
+		entry->consume_q = consume_q;
+		INIT_LIST_HEAD(&entry->qp.list_item);
+
+		/* Add resource obj */
+		result = vmci_resource_add(&entry->resource,
+					   VMCI_RESOURCE_TYPE_QPAIR_GUEST,
+					   handle);
+		entry->qp.handle = vmci_resource_handle(&entry->resource);
+		if ((result != VMCI_SUCCESS) ||
+		    qp_list_find(&qp_guest_endpoints, entry->qp.handle)) {
+			pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d",
+				handle.context, handle.resource, result);
+			kfree(entry);
+			entry = NULL;
+		}
+	}
+	return entry;
+}
+
+/*
+ * Frees a qp_guest_endpoint structure.
+ */
+static void qp_guest_endpoint_destroy(struct qp_guest_endpoint *entry)
+{
+	qp_free_ppn_set(&entry->ppn_set);
+	qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q);
+	qp_free_queue(entry->produce_q, entry->qp.produce_size);
+	qp_free_queue(entry->consume_q, entry->qp.consume_size);
+	/* Unlink from resource hash table and free callback */
+	vmci_resource_remove(&entry->resource);
+
+	kfree(entry);
+}
+
+/*
+ * Helper to make a queue_pairAlloc hypercall when the driver is
+ * supporting a guest device.
+ */
+static int qp_alloc_hypercall(const struct qp_guest_endpoint *entry)
+{
+	struct vmci_qp_alloc_msg *alloc_msg;
+	size_t msg_size;
+	int result;
+
+	if (!entry || entry->num_ppns <= 2)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	msg_size = sizeof(*alloc_msg) +
+	    (size_t) entry->num_ppns * sizeof(u32);
+	alloc_msg = kmalloc(msg_size, GFP_KERNEL);
+	if (!alloc_msg)
+		return VMCI_ERROR_NO_MEM;
+
+	alloc_msg->hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+					      VMCI_QUEUEPAIR_ALLOC);
+	alloc_msg->hdr.src = VMCI_ANON_SRC_HANDLE;
+	alloc_msg->hdr.payload_size = msg_size - VMCI_DG_HEADERSIZE;
+	alloc_msg->handle = entry->qp.handle;
+	alloc_msg->peer = entry->qp.peer;
+	alloc_msg->flags = entry->qp.flags;
+	alloc_msg->produce_size = entry->qp.produce_size;
+	alloc_msg->consume_size = entry->qp.consume_size;
+	alloc_msg->num_ppns = entry->num_ppns;
+
+	result = qp_populate_ppn
author	George Zhang <georgezhang@vmware.com>	2013-01-08 15:54:54 -0800
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2013-01-08 16:15:56 -0800
commit	06164d2b72aa752ce4633184b3e0d97601017135 (patch)
tree	51e2ec49ace40729b043978fae3a1e8a5a5411c2
parent	b484b26cc7be6ccf3676deb5e03aed2609ee9a40 (diff)
download	linux-06164d2b72aa752ce4633184b3e0d97601017135.tar.gz linux-06164d2b72aa752ce4633184b3e0d97601017135.tar.bz2 linux-06164d2b72aa752ce4633184b3e0d97601017135.zip