summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorge Zhang <georgezhang@vmware.com>2013-01-08 15:54:54 -0800
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2013-01-08 16:15:56 -0800
commit06164d2b72aa752ce4633184b3e0d97601017135 (patch)
tree51e2ec49ace40729b043978fae3a1e8a5a5411c2
parentb484b26cc7be6ccf3676deb5e03aed2609ee9a40 (diff)
downloadlinux-06164d2b72aa752ce4633184b3e0d97601017135.tar.gz
linux-06164d2b72aa752ce4633184b3e0d97601017135.tar.bz2
linux-06164d2b72aa752ce4633184b3e0d97601017135.zip
VMCI: queue pairs implementation.
VMCI queue pairs allow for bi-directional ordered communication between host and guests. Signed-off-by: George Zhang <georgezhang@vmware.com> Acked-by: Andy king <acking@vmware.com> Acked-by: Dmitry Torokhov <dtor@vmware.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--drivers/misc/vmw_vmci/vmci_queue_pair.c3420
-rw-r--r--drivers/misc/vmw_vmci/vmci_queue_pair.h191
2 files changed, 3611 insertions, 0 deletions
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
new file mode 100644
index 000000000000..1123111ba1bf
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -0,0 +1,3420 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/device-mapper.h>
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/socket.h>
+#include <linux/wait.h>
+
+#include "vmci_handle_array.h"
+#include "vmci_queue_pair.h"
+#include "vmci_datagram.h"
+#include "vmci_resource.h"
+#include "vmci_context.h"
+#include "vmci_driver.h"
+#include "vmci_event.h"
+#include "vmci_route.h"
+
+/*
+ * In the following, we will distinguish between two kinds of VMX processes -
+ * the ones with versions lower than VMCI_VERSION_NOVMVM that use specialized
+ * VMCI page files in the VMX and supporting VM to VM communication and the
+ * newer ones that use the guest memory directly. We will in the following
+ * refer to the older VMX versions as old-style VMX'en, and the newer ones as
+ * new-style VMX'en.
+ *
+ * The state transition datagram is as follows (the VMCIQPB_ prefix has been
+ * removed for readability) - see below for more details on the transtions:
+ *
+ * -------------- NEW -------------
+ * | |
+ * \_/ \_/
+ * CREATED_NO_MEM <-----------------> CREATED_MEM
+ * | | |
+ * | o-----------------------o |
+ * | | |
+ * \_/ \_/ \_/
+ * ATTACHED_NO_MEM <----------------> ATTACHED_MEM
+ * | | |
+ * | o----------------------o |
+ * | | |
+ * \_/ \_/ \_/
+ * SHUTDOWN_NO_MEM <----------------> SHUTDOWN_MEM
+ * | |
+ * | |
+ * -------------> gone <-------------
+ *
+ * In more detail. When a VMCI queue pair is first created, it will be in the
+ * VMCIQPB_NEW state. It will then move into one of the following states:
+ *
+ * - VMCIQPB_CREATED_NO_MEM: this state indicates that either:
+ *
+ * - the created was performed by a host endpoint, in which case there is
+ * no backing memory yet.
+ *
+ * - the create was initiated by an old-style VMX, that uses
+ * vmci_qp_broker_set_page_store to specify the UVAs of the queue pair at
+ * a later point in time. This state can be distinguished from the one
+ * above by the context ID of the creator. A host side is not allowed to
+ * attach until the page store has been set.
+ *
+ * - VMCIQPB_CREATED_MEM: this state is the result when the queue pair
+ * is created by a VMX using the queue pair device backend that
+ * sets the UVAs of the queue pair immediately and stores the
+ * information for later attachers. At this point, it is ready for
+ * the host side to attach to it.
+ *
+ * Once the queue pair is in one of the created states (with the exception of
+ * the case mentioned for older VMX'en above), it is possible to attach to the
+ * queue pair. Again we have two new states possible:
+ *
+ * - VMCIQPB_ATTACHED_MEM: this state can be reached through the following
+ * paths:
+ *
+ * - from VMCIQPB_CREATED_NO_MEM when a new-style VMX allocates a queue
+ * pair, and attaches to a queue pair previously created by the host side.
+ *
+ * - from VMCIQPB_CREATED_MEM when the host side attaches to a queue pair
+ * already created by a guest.
+ *
+ * - from VMCIQPB_ATTACHED_NO_MEM, when an old-style VMX calls
+ * vmci_qp_broker_set_page_store (see below).
+ *
+ * - VMCIQPB_ATTACHED_NO_MEM: If the queue pair already was in the
+ * VMCIQPB_CREATED_NO_MEM due to a host side create, an old-style VMX will
+ * bring the queue pair into this state. Once vmci_qp_broker_set_page_store
+ * is called to register the user memory, the VMCIQPB_ATTACH_MEM state
+ * will be entered.
+ *
+ * From the attached queue pair, the queue pair can enter the shutdown states
+ * when either side of the queue pair detaches. If the guest side detaches
+ * first, the queue pair will enter the VMCIQPB_SHUTDOWN_NO_MEM state, where
+ * the content of the queue pair will no longer be available. If the host
+ * side detaches first, the queue pair will either enter the
+ * VMCIQPB_SHUTDOWN_MEM, if the guest memory is currently mapped, or
+ * VMCIQPB_SHUTDOWN_NO_MEM, if the guest memory is not mapped
+ * (e.g., the host detaches while a guest is stunned).
+ *
+ * New-style VMX'en will also unmap guest memory, if the guest is
+ * quiesced, e.g., during a snapshot operation. In that case, the guest
+ * memory will no longer be available, and the queue pair will transition from
+ * *_MEM state to a *_NO_MEM state. The VMX may later map the memory once more,
+ * in which case the queue pair will transition from the *_NO_MEM state at that
+ * point back to the *_MEM state. Note that the *_NO_MEM state may have changed,
+ * since the peer may have either attached or detached in the meantime. The
+ * values are laid out such that ++ on a state will move from a *_NO_MEM to a
+ * *_MEM state, and vice versa.
+ */
+
+/*
+ * VMCIMemcpy{To,From}QueueFunc() prototypes. Functions of these
+ * types are passed around to enqueue and dequeue routines. Note that
+ * often the functions passed are simply wrappers around memcpy
+ * itself.
+ *
+ * Note: In order for the memcpy typedefs to be compatible with the VMKernel,
+ * there's an unused last parameter for the hosted side. In
+ * ESX, that parameter holds a buffer type.
+ */
+typedef int vmci_memcpy_to_queue_func(struct vmci_queue *queue,
+ u64 queue_offset, const void *src,
+ size_t src_offset, size_t size);
+typedef int vmci_memcpy_from_queue_func(void *dest, size_t dest_offset,
+ const struct vmci_queue *queue,
+ u64 queue_offset, size_t size);
+
+/* The Kernel specific component of the struct vmci_queue structure. */
+struct vmci_queue_kern_if {
+ struct page **page;
+ struct page **header_page;
+ void *va;
+ struct mutex __mutex; /* Protects the queue. */
+ struct mutex *mutex; /* Shared by producer and consumer queues. */
+ bool host;
+ size_t num_pages;
+ bool mapped;
+};
+
+/*
+ * This structure is opaque to the clients.
+ */
+struct vmci_qp {
+ struct vmci_handle handle;
+ struct vmci_queue *produce_q;
+ struct vmci_queue *consume_q;
+ u64 produce_q_size;
+ u64 consume_q_size;
+ u32 peer;
+ u32 flags;
+ u32 priv_flags;
+ bool guest_endpoint;
+ unsigned int blocked;
+ unsigned int generation;
+ wait_queue_head_t event;
+};
+
+enum qp_broker_state {
+ VMCIQPB_NEW,
+ VMCIQPB_CREATED_NO_MEM,
+ VMCIQPB_CREATED_MEM,
+ VMCIQPB_ATTACHED_NO_MEM,
+ VMCIQPB_ATTACHED_MEM,
+ VMCIQPB_SHUTDOWN_NO_MEM,
+ VMCIQPB_SHUTDOWN_MEM,
+ VMCIQPB_GONE
+};
+
+#define QPBROKERSTATE_HAS_MEM(_qpb) (_qpb->state == VMCIQPB_CREATED_MEM || \
+ _qpb->state == VMCIQPB_ATTACHED_MEM || \
+ _qpb->state == VMCIQPB_SHUTDOWN_MEM)
+
+/*
+ * In the queue pair broker, we always use the guest point of view for
+ * the produce and consume queue values and references, e.g., the
+ * produce queue size stored is the guests produce queue size. The
+ * host endpoint will need to swap these around. The only exception is
+ * the local queue pairs on the host, in which case the host endpoint
+ * that creates the queue pair will have the right orientation, and
+ * the attaching host endpoint will need to swap.
+ */
+struct qp_entry {
+ struct list_head list_item;
+ struct vmci_handle handle;
+ u32 peer;
+ u32 flags;
+ u64 produce_size;
+ u64 consume_size;
+ u32 ref_count;
+};
+
+struct qp_broker_entry {
+ struct vmci_resource resource;
+ struct qp_entry qp;
+ u32 create_id;
+ u32 attach_id;
+ enum qp_broker_state state;
+ bool require_trusted_attach;
+ bool created_by_trusted;
+ bool vmci_page_files; /* Created by VMX using VMCI page files */
+ struct vmci_queue *produce_q;
+ struct vmci_queue *consume_q;
+ struct vmci_queue_header saved_produce_q;
+ struct vmci_queue_header saved_consume_q;
+ vmci_event_release_cb wakeup_cb;
+ void *client_data;
+ void *local_mem; /* Kernel memory for local queue pair */
+};
+
+struct qp_guest_endpoint {
+ struct vmci_resource resource;
+ struct qp_entry qp;
+ u64 num_ppns;
+ void *produce_q;
+ void *consume_q;
+ struct PPNSet ppn_set;
+};
+
+struct qp_list {
+ struct list_head head;
+ struct mutex mutex; /* Protect queue list. */
+};
+
+static struct qp_list qp_broker_list = {
+ .head = LIST_HEAD_INIT(qp_broker_list.head),
+ .mutex = __MUTEX_INITIALIZER(qp_broker_list.mutex),
+};
+
+static struct qp_list qp_guest_endpoints = {
+ .head = LIST_HEAD_INIT(qp_guest_endpoints.head),
+ .mutex = __MUTEX_INITIALIZER(qp_guest_endpoints.mutex),
+};
+
+#define INVALID_VMCI_GUEST_MEM_ID 0
+#define QPE_NUM_PAGES(_QPE) ((u32) \
+ (dm_div_up(_QPE.produce_size, PAGE_SIZE) + \
+ dm_div_up(_QPE.consume_size, PAGE_SIZE) + 2))
+
+
+/*
+ * Frees kernel VA space for a given queue and its queue header, and
+ * frees physical data pages.
+ */
+static void qp_free_queue(void *q, u64 size)
+{
+ struct vmci_queue *queue = q;
+
+ if (queue) {
+ u64 i = dm_div_up(size, PAGE_SIZE);
+
+ if (queue->kernel_if->mapped) {
+ vunmap(queue->kernel_if->va);
+ queue->kernel_if->va = NULL;
+ }
+
+ while (i)
+ __free_page(queue->kernel_if->page[--i]);
+
+ vfree(queue->q_header);
+ }
+}
+
+/*
+ * Allocates kernel VA space of specified size, plus space for the
+ * queue structure/kernel interface and the queue header. Allocates
+ * physical pages for the queue data pages.
+ *
+ * PAGE m: struct vmci_queue_header (struct vmci_queue->q_header)
+ * PAGE m+1: struct vmci_queue
+ * PAGE m+1+q: struct vmci_queue_kern_if (struct vmci_queue->kernel_if)
+ * PAGE n-size: Data pages (struct vmci_queue->kernel_if->page[])
+ */
+static void *qp_alloc_queue(u64 size, u32 flags)
+{
+ u64 i;
+ struct vmci_queue *queue;
+ struct vmci_queue_header *q_header;
+ const u64 num_data_pages = dm_div_up(size, PAGE_SIZE);
+ const uint queue_size =
+ PAGE_SIZE +
+ sizeof(*queue) + sizeof(*(queue->kernel_if)) +
+ num_data_pages * sizeof(*(queue->kernel_if->page));
+
+ q_header = vmalloc(queue_size);
+ if (!q_header)
+ return NULL;
+
+ queue = (void *)q_header + PAGE_SIZE;
+ queue->q_header = q_header;
+ queue->saved_header = NULL;
+ queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1);
+ queue->kernel_if->header_page = NULL; /* Unused in guest. */
+ queue->kernel_if->page = (struct page **)(queue->kernel_if + 1);
+ queue->kernel_if->host = false;
+ queue->kernel_if->va = NULL;
+ queue->kernel_if->mapped = false;
+
+ for (i = 0; i < num_data_pages; i++) {
+ queue->kernel_if->page[i] = alloc_pages(GFP_KERNEL, 0);
+ if (!queue->kernel_if->page[i])
+ goto fail;
+ }
+
+ if (vmci_qp_pinned(flags)) {
+ queue->kernel_if->va =
+ vmap(queue->kernel_if->page, num_data_pages, VM_MAP,
+ PAGE_KERNEL);
+ if (!queue->kernel_if->va)
+ goto fail;
+
+ queue->kernel_if->mapped = true;
+ }
+
+ return (void *)queue;
+
+ fail:
+ qp_free_queue(queue, i * PAGE_SIZE);
+ return NULL;
+}
+
+/*
+ * Copies from a given buffer or iovector to a VMCI Queue. Uses
+ * kmap()/kunmap() to dynamically map/unmap required portions of the queue
+ * by traversing the offset -> page translation structure for the queue.
+ * Assumes that offset + size does not wrap around in the queue.
+ */
+static int __qp_memcpy_to_queue(struct vmci_queue *queue,
+ u64 queue_offset,
+ const void *src,
+ size_t size,
+ bool is_iovec)
+{
+ struct vmci_queue_kern_if *kernel_if = queue->kernel_if;
+ size_t bytes_copied = 0;
+
+ while (bytes_copied < size) {
+ u64 page_index = (queue_offset + bytes_copied) / PAGE_SIZE;
+ size_t page_offset =
+ (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
+ void *va;
+ size_t to_copy;
+
+ if (!kernel_if->mapped)
+ va = kmap(kernel_if->page[page_index]);
+ else
+ va = (void *)((u8 *)kernel_if->va +
+ (page_index * PAGE_SIZE));
+
+ if (size - bytes_copied > PAGE_SIZE - page_offset)
+ /* Enough payload to fill up from this page. */
+ to_copy = PAGE_SIZE - page_offset;
+ else
+ to_copy = size - bytes_copied;
+
+ if (is_iovec) {
+ struct iovec *iov = (struct iovec *)src;
+ int err;
+
+ /* The iovec will track bytes_copied internally. */
+ err = memcpy_fromiovec((u8 *)va + page_offset,
+ iov, to_copy);
+ if (err != 0) {
+ kunmap(kernel_if->page[page_index]);
+ return VMCI_ERROR_INVALID_ARGS;
+ }
+ } else {
+ memcpy((u8 *)va + page_offset,
+ (u8 *)src + bytes_copied, to_copy);
+ }
+
+ bytes_copied += to_copy;
+ if (!kernel_if->mapped)
+ kunmap(kernel_if->page[page_index]);
+ }
+
+ return VMCI_SUCCESS;
+}
+
+/*
+ * Copies to a given buffer or iovector from a VMCI Queue. Uses
+ * kmap()/kunmap() to dynamically map/unmap required portions of the queue
+ * by traversing the offset -> page translation structure for the queue.
+ * Assumes that offset + size does not wrap around in the queue.
+ */
+static int __qp_memcpy_from_queue(void *dest,
+ const struct vmci_queue *queue,
+ u64 queue_offset,
+ size_t size,
+ bool is_iovec)
+{
+ struct vmci_queue_kern_if *kernel_if = queue->kernel_if;
+ size_t bytes_copied = 0;
+
+ while (bytes_copied < size) {
+ u64 page_index = (queue_offset + bytes_copied) / PAGE_SIZE;
+ size_t page_offset =
+ (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
+ void *va;
+ size_t to_copy;
+
+ if (!kernel_if->mapped)
+ va = kmap(kernel_if->page[page_index]);
+ else
+ va = (void *)((u8 *)kernel_if->va +
+ (page_index * PAGE_SIZE));
+
+ if (size - bytes_copied > PAGE_SIZE - page_offset)
+ /* Enough payload to fill up this page. */
+ to_copy = PAGE_SIZE - page_offset;
+ else
+ to_copy = size - bytes_copied;
+
+ if (is_iovec) {
+ struct iovec *iov = (struct iovec *)dest;
+ int err;
+
+ /* The iovec will track bytes_copied internally. */
+ err = memcpy_toiovec(iov, (u8 *)va + page_offset,
+ to_copy);
+ if (err != 0) {
+ kunmap(kernel_if->page[page_index]);
+ return VMCI_ERROR_INVALID_ARGS;
+ }
+ } else {
+ memcpy((u8 *)dest + bytes_copied,
+ (u8 *)va + page_offset, to_copy);
+ }
+
+ bytes_copied += to_copy;
+ if (!kernel_if->mapped)
+ kunmap(kernel_if->page[page_index]);
+ }
+
+ return VMCI_SUCCESS;
+}
+
+/*
+ * Allocates two list of PPNs --- one for the pages in the produce queue,
+ * and the other for the pages in the consume queue. Intializes the list
+ * of PPNs with the page frame numbers of the KVA for the two queues (and
+ * the queue headers).
+ */
+static int qp_alloc_ppn_set(void *prod_q,
+ u64 num_produce_pages,
+ void *cons_q,
+ u64 num_consume_pages, struct PPNSet *ppn_set)
+{
+ u32 *produce_ppns;
+ u32 *consume_ppns;
+ struct vmci_queue *produce_q = prod_q;
+ struct vmci_queue *consume_q = cons_q;
+ u64 i;
+
+ if (!produce_q || !num_produce_pages || !consume_q ||
+ !num_consume_pages || !ppn_set)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ if (ppn_set->initialized)
+ return VMCI_ERROR_ALREADY_EXISTS;
+
+ produce_ppns =
+ kmalloc(num_produce_pages * sizeof(*produce_ppns), GFP_KERNEL);
+ if (!produce_ppns)
+ return VMCI_ERROR_NO_MEM;
+
+ consume_ppns =
+ kmalloc(num_consume_pages * sizeof(*consume_ppns), GFP_KERNEL);
+ if (!consume_ppns) {
+ kfree(produce_ppns);
+ return VMCI_ERROR_NO_MEM;
+ }
+
+ produce_ppns[0] = page_to_pfn(vmalloc_to_page(produce_q->q_header));
+ for (i = 1; i < num_produce_pages; i++) {
+ unsigned long pfn;
+
+ produce_ppns[i] =
+ page_to_pfn(produce_q->kernel_if->page[i - 1]);
+ pfn = produce_ppns[i];
+
+ /* Fail allocation if PFN isn't supported by hypervisor. */
+ if (sizeof(pfn) > sizeof(*produce_ppns)
+ && pfn != produce_ppns[i])
+ goto ppn_error;
+ }
+
+ consume_ppns[0] = page_to_pfn(vmalloc_to_page(consume_q->q_header));
+ for (i = 1; i < num_consume_pages; i++) {
+ unsigned long pfn;
+
+ consume_ppns[i] =
+ page_to_pfn(consume_q->kernel_if->page[i - 1]);
+ pfn = consume_ppns[i];
+
+ /* Fail allocation if PFN isn't supported by hypervisor. */
+ if (sizeof(pfn) > sizeof(*consume_ppns)
+ && pfn != consume_ppns[i])
+ goto ppn_error;
+ }
+
+ ppn_set->num_produce_pages = num_produce_pages;
+ ppn_set->num_consume_pages = num_consume_pages;
+ ppn_set->produce_ppns = produce_ppns;
+ ppn_set->consume_ppns = consume_ppns;
+ ppn_set->initialized = true;
+ return VMCI_SUCCESS;
+
+ ppn_error:
+ kfree(produce_ppns);
+ kfree(consume_ppns);
+ return VMCI_ERROR_INVALID_ARGS;
+}
+
+/*
+ * Frees the two list of PPNs for a queue pair.
+ */
+static void qp_free_ppn_set(struct PPNSet *ppn_set)
+{
+ if (ppn_set->initialized) {
+ /* Do not call these functions on NULL inputs. */
+ kfree(ppn_set->produce_ppns);
+ kfree(ppn_set->consume_ppns);
+ }
+ memset(ppn_set, 0, sizeof(*ppn_set));
+}
+
+/*
+ * Populates the list of PPNs in the hypercall structure with the PPNS
+ * of the produce queue and the consume queue.
+ */
+static int qp_populate_ppn_set(u8 *call_buf, const struct PPNSet *ppn_set)
+{
+ memcpy(call_buf, ppn_set->produce_ppns,
+ ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns));
+ memcpy(call_buf +
+ ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns),
+ ppn_set->consume_ppns,
+ ppn_set->num_consume_pages * sizeof(*ppn_set->consume_ppns));
+
+ return VMCI_SUCCESS;
+}
+
+static int qp_memcpy_to_queue(struct vmci_queue *queue,
+ u64 queue_offset,
+ const void *src, size_t src_offset, size_t size)
+{
+ return __qp_memcpy_to_queue(queue, queue_offset,
+ (u8 *)src + src_offset, size, false);
+}
+
+static int qp_memcpy_from_queue(void *dest,
+ size_t dest_offset,
+ const struct vmci_queue *queue,
+ u64 queue_offset, size_t size)
+{
+ return __qp_memcpy_from_queue((u8 *)dest + dest_offset,
+ queue, queue_offset, size, false);
+}
+
+/*
+ * Copies from a given iovec from a VMCI Queue.
+ */
+static int qp_memcpy_to_queue_iov(struct vmci_queue *queue,
+ u64 queue_offset,
+ const void *src,
+ size_t src_offset, size_t size)
+{
+
+ /*
+ * We ignore src_offset because src is really a struct iovec * and will
+ * maintain offset internally.
+ */
+ return __qp_memcpy_to_queue(queue, queue_offset, src, size, true);
+}
+
+/*
+ * Copies to a given iovec from a VMCI Queue.
+ */
+static int qp_memcpy_from_queue_iov(void *dest,
+ size_t dest_offset,
+ const struct vmci_queue *queue,
+ u64 queue_offset, size_t size)
+{
+ /*
+ * We ignore dest_offset because dest is really a struct iovec * and
+ * will maintain offset internally.
+ */
+ return __qp_memcpy_from_queue(dest, queue, queue_offset, size, true);
+}
+
+/*
+ * Allocates kernel VA space of specified size plus space for the queue
+ * and kernel interface. This is different from the guest queue allocator,
+ * because we do not allocate our own queue header/data pages here but
+ * share those of the guest.
+ */
+static struct vmci_queue *qp_host_alloc_queue(u64 size)
+{
+ struct vmci_queue *queue;
+ const size_t num_pages = dm_div_up(size, PAGE_SIZE) + 1;
+ const size_t queue_size = sizeof(*queue) + sizeof(*(queue->kernel_if));
+ const size_t queue_page_size =
+ num_pages * sizeof(*queue->kernel_if->page);
+
+ queue = kzalloc(queue_size + queue_page_size, GFP_KERNEL);
+ if (queue) {
+ queue->q_header = NULL;
+ queue->saved_header = NULL;
+ queue->kernel_if =
+ (struct vmci_queue_kern_if *)((u8 *)queue +
+ sizeof(*queue));
+ queue->kernel_if->host = true;
+ queue->kernel_if->mutex = NULL;
+ queue->kernel_if->num_pages = num_pages;
+ queue->kernel_if->header_page =
+ (struct page **)((u8 *)queue + queue_size);
+ queue->kernel_if->page = &queue->kernel_if->header_page[1];
+ queue->kernel_if->va = NULL;
+ queue->kernel_if->mapped = false;
+ }
+
+ return queue;
+}
+
+/*
+ * Frees kernel memory for a given queue (header plus translation
+ * structure).
+ */
+static void qp_host_free_queue(struct vmci_queue *queue, u64 queue_size)
+{
+ kfree(queue);
+}
+
+/*
+ * Initialize the mutex for the pair of queues. This mutex is used to
+ * protect the q_header and the buffer from changing out from under any
+ * users of either queue. Of course, it's only any good if the mutexes
+ * are actually acquired. Queue structure must lie on non-paged memory
+ * or we cannot guarantee access to the mutex.
+ */
+static void qp_init_queue_mutex(struct vmci_queue *produce_q,
+ struct vmci_queue *consume_q)
+{
+ /*
+ * Only the host queue has shared state - the guest queues do not
+ * need to synchronize access using a queue mutex.
+ */
+
+ if (produce_q->kernel_if->host) {
+ produce_q->kernel_if->mutex = &produce_q->kernel_if->__mutex;
+ consume_q->kernel_if->mutex = &produce_q->kernel_if->__mutex;
+ mutex_init(produce_q->kernel_if->mutex);
+ }
+}
+
+/*
+ * Cleans up the mutex for the pair of queues.
+ */
+static void qp_cleanup_queue_mutex(struct vmci_queue *produce_q,
+ struct vmci_queue *consume_q)
+{
+ if (produce_q->kernel_if->host) {
+ produce_q->kernel_if->mutex = NULL;
+ consume_q->kernel_if->mutex = NULL;
+ }
+}
+
+/*
+ * Acquire the mutex for the queue. Note that the produce_q and
+ * the consume_q share a mutex. So, only one of the two need to
+ * be passed in to this routine. Either will work just fine.
+ */
+static void qp_acquire_queue_mutex(struct vmci_queue *queue)
+{
+ if (queue->kernel_if->host)
+ mutex_lock(queue->kernel_if->mutex);
+}
+
+/*
+ * Release the mutex for the queue. Note that the produce_q and
+ * the consume_q share a mutex. So, only one of the two need to
+ * be passed in to this routine. Either will work just fine.
+ */
+static void qp_release_queue_mutex(struct vmci_queue *queue)
+{
+ if (queue->kernel_if->host)
+ mutex_unlock(queue->kernel_if->mutex);
+}
+
+/*
+ * Helper function to release pages in the PageStoreAttachInfo
+ * previously obtained using get_user_pages.
+ */
+static void qp_release_pages(struct page **pages,
+ u64 num_pages, bool dirty)
+{
+ int i;
+
+ for (i = 0; i < num_pages; i++) {
+ if (dirty)
+ set_page_dirty(pages[i]);
+
+ page_cache_release(pages[i]);
+ pages[i] = NULL;
+ }
+}
+
+/*
+ * Lock the user pages referenced by the {produce,consume}Buffer
+ * struct into memory and populate the {produce,consume}Pages
+ * arrays in the attach structure with them.
+ */
+static int qp_host_get_user_memory(u64 produce_uva,
+ u64 consume_uva,
+ struct vmci_queue *produce_q,
+ struct vmci_queue *consume_q)
+{
+ int retval;
+ int err = VMCI_SUCCESS;
+
+ down_write(&current->mm->mmap_sem);
+ retval = get_user_pages(current,
+ current->mm,
+ (uintptr_t) produce_uva,
+ produce_q->kernel_if->num_pages,
+ 1, 0, produce_q->kernel_if->header_page, NULL);
+ if (retval < produce_q->kernel_if->num_pages) {
+ pr_warn("get_user_pages(produce) failed (retval=%d)", retval);
+ qp_release_pages(produce_q->kernel_if->header_page, retval,
+ false);
+ err = VMCI_ERROR_NO_MEM;
+ goto out;
+ }
+
+ retval = get_user_pages(current,
+ current->mm,
+ (uintptr_t) consume_uva,
+ consume_q->kernel_if->num_pages,
+ 1, 0, consume_q->kernel_if->header_page, NULL);
+ if (retval < consume_q->kernel_if->num_pages) {
+ pr_warn("get_user_pages(consume) failed (retval=%d)", retval);
+ qp_release_pages(consume_q->kernel_if->header_page, retval,
+ false);
+ qp_release_pages(produce_q->kernel_if->header_page,
+ produce_q->kernel_if->num_pages, false);
+ err = VMCI_ERROR_NO_MEM;
+ }
+
+ out:
+ up_write(&current->mm->mmap_sem);
+
+ return err;
+}
+
+/*
+ * Registers the specification of the user pages used for backing a queue
+ * pair. Enough information to map in pages is stored in the OS specific
+ * part of the struct vmci_queue structure.
+ */
+static int qp_host_register_user_memory(struct vmci_qp_page_store *page_store,
+ struct vmci_queue *produce_q,
+ struct vmci_queue *consume_q)
+{
+ u64 produce_uva;
+ u64 consume_uva;
+
+ /*
+ * The new style and the old style mapping only differs in
+ * that we either get a single or two UVAs, so we split the
+ * single UVA range at the appropriate spot.
+ */
+ produce_uva = page_store->pages;
+ consume_uva = page_store->pages +
+ produce_q->kernel_if->num_pages * PAGE_SIZE;
+ return qp_host_get_user_memory(produce_uva, consume_uva, produce_q,
+ consume_q);
+}
+
+/*
+ * Releases and removes the references to user pages stored in the attach
+ * struct. Pages are released from the page cache and may become
+ * swappable again.
+ */
+static void qp_host_unregister_user_memory(struct vmci_queue *produce_q,
+ struct vmci_queue *consume_q)
+{
+ qp_release_pages(produce_q->kernel_if->header_page,
+ produce_q->kernel_if->num_pages, true);
+ memset(produce_q->kernel_if->header_page, 0,
+ sizeof(*produce_q->kernel_if->header_page) *
+ produce_q->kernel_if->num_pages);
+ qp_release_pages(consume_q->kernel_if->header_page,
+ consume_q->kernel_if->num_pages, true);
+ memset(consume_q->kernel_if->header_page, 0,
+ sizeof(*consume_q->kernel_if->header_page) *
+ consume_q->kernel_if->num_pages);
+}
+
+/*
+ * Once qp_host_register_user_memory has been performed on a
+ * queue, the queue pair headers can be mapped into the
+ * kernel. Once mapped, they must be unmapped with
+ * qp_host_unmap_queues prior to calling
+ * qp_host_unregister_user_memory.
+ * Pages are pinned.
+ */
+static int qp_host_map_queues(struct vmci_queue *produce_q,
+ struct vmci_queue *consume_q)
+{
+ int result;
+
+ if (!produce_q->q_header || !consume_q->q_header) {
+ struct page *headers[2];
+
+ if (produce_q->q_header != consume_q->q_header)
+ return VMCI_ERROR_QUEUEPAIR_MISMATCH;
+
+ if (produce_q->kernel_if->header_page == NULL ||
+ *produce_q->kernel_if->header_page == NULL)
+ return VMCI_ERROR_UNAVAILABLE;
+
+ headers[0] = *produce_q->kernel_if->header_page;
+ headers[1] = *consume_q->kernel_if->header_page;
+
+ produce_q->q_header = vmap(headers, 2, VM_MAP, PAGE_KERNEL);
+ if (produce_q->q_header != NULL) {
+ consume_q->q_header =
+ (struct vmci_queue_header *)((u8 *)
+ produce_q->q_header +
+ PAGE_SIZE);
+ result = VMCI_SUCCESS;
+ } else {
+ pr_warn("vmap failed\n");
+ result = VMCI_ERROR_NO_MEM;
+ }
+ } else {
+ result = VMCI_SUCCESS;
+ }
+
+ return result;
+}
+
+/*
+ * Unmaps previously mapped queue pair headers from the kernel.
+ * Pages are unpinned.
+ */
+static int qp_host_unmap_queues(u32 gid,
+ struct vmci_queue *produce_q,
+ struct vmci_queue *consume_q)
+{
+ if (produce_q->q_header) {
+ if (produce_q->q_header < consume_q->q_header)
+ vunmap(produce_q->q_header);
+ else
+ vunmap(consume_q->q_header);
+
+ produce_q->q_header = NULL;
+ consume_q->q_header = NULL;
+ }
+
+ return VMCI_SUCCESS;
+}
+
+/*
+ * Finds the entry in the list corresponding to a given handle. Assumes
+ * that the list is locked.
+ */
+static struct qp_entry *qp_list_find(struct qp_list *qp_list,
+ struct vmci_handle handle)
+{
+ struct qp_entry *entry;
+
+ if (vmci_handle_is_invalid(handle))
+ return NULL;
+
+ list_for_each_entry(entry, &qp_list->head, list_item) {
+ if (vmci_handle_is_equal(entry->handle, handle))
+ return entry;
+ }
+
+ return NULL;
+}
+
+/*
+ * Finds the entry in the list corresponding to a given handle.
+ */
+static struct qp_guest_endpoint *
+qp_guest_handle_to_entry(struct vmci_handle handle)
+{
+ struct qp_guest_endpoint *entry;
+ struct qp_entry *qp = qp_list_find(&qp_guest_endpoints, handle);
+
+ entry = qp ? container_of(
+ qp, struct qp_guest_endpoint, qp) : NULL;
+ return entry;
+}
+
+/*
+ * Finds the entry in the list corresponding to a given handle.
+ */
+static struct qp_broker_entry *
+qp_broker_handle_to_entry(struct vmci_handle handle)
+{
+ struct qp_broker_entry *entry;
+ struct qp_entry *qp = qp_list_find(&qp_broker_list, handle);
+
+ entry = qp ? container_of(
+ qp, struct qp_broker_entry, qp) : NULL;
+ return entry;
+}
+
+/*
+ * Dispatches a queue pair event message directly into the local event
+ * queue.
+ */
+static int qp_notify_peer_local(bool attach, struct vmci_handle handle)
+{
+ u32 context_id = vmci_get_context_id();
+ struct vmci_event_qp ev;
+
+ ev.msg.hdr.dst = vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
+ ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_CONTEXT_RESOURCE_ID);
+ ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
+ ev.msg.event_data.event =
+ attach ? VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH;
+ ev.payload.peer_id = context_id;
+ ev.payload.handle = handle;
+
+ return vmci_event_dispatch(&ev.msg.hdr);
+}
+
+/*
+ * Allocates and initializes a qp_guest_endpoint structure.
+ * Allocates a queue_pair rid (and handle) iff the given entry has
+ * an invalid handle. 0 through VMCI_RESERVED_RESOURCE_ID_MAX
+ * are reserved handles. Assumes that the QP list mutex is held
+ * by the caller.
+ */
+static struct qp_guest_endpoint *
+qp_guest_endpoint_create(struct vmci_handle handle,
+ u32 peer,
+ u32 flags,
+ u64 produce_size,
+ u64 consume_size,
+ void *produce_q,
+ void *consume_q)
+{
+ int result;
+ struct qp_guest_endpoint *entry;
+ /* One page each for the queue headers. */
+ const u64 num_ppns = dm_div_up(produce_size, PAGE_SIZE) +
+ dm_div_up(consume_size, PAGE_SIZE) + 2;
+
+ if (vmci_handle_is_invalid(handle)) {
+ u32 context_id = vmci_get_context_id();
+
+ handle = vmci_make_handle(context_id, VMCI_INVALID_ID);
+ }
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (entry) {
+ entry->qp.peer = peer;
+ entry->qp.flags = flags;
+ entry->qp.produce_size = produce_size;
+ entry->qp.consume_size = consume_size;
+ entry->qp.ref_count = 0;
+ entry->num_ppns = num_ppns;
+ entry->produce_q = produce_q;
+ entry->consume_q = consume_q;
+ INIT_LIST_HEAD(&entry->qp.list_item);
+
+ /* Add resource obj */
+ result = vmci_resource_add(&entry->resource,
+ VMCI_RESOURCE_TYPE_QPAIR_GUEST,
+ handle);
+ entry->qp.handle = vmci_resource_handle(&entry->resource);
+ if ((result != VMCI_SUCCESS) ||
+ qp_list_find(&qp_guest_endpoints, entry->qp.handle)) {
+ pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d",
+ handle.context, handle.resource, result);
+ kfree(entry);
+ entry = NULL;
+ }
+ }
+ return entry;
+}
+
+/*
+ * Frees a qp_guest_endpoint structure.
+ */
+static void qp_guest_endpoint_destroy(struct qp_guest_endpoint *entry)
+{
+ qp_free_ppn_set(&entry->ppn_set);
+ qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q);
+ qp_free_queue(entry->produce_q, entry->qp.produce_size);
+ qp_free_queue(entry->consume_q, entry->qp.consume_size);
+ /* Unlink from resource hash table and free callback */
+ vmci_resource_remove(&entry->resource);
+
+ kfree(entry);
+}
+
+/*
+ * Helper to make a queue_pairAlloc hypercall when the driver is
+ * supporting a guest device.
+ */
+static int qp_alloc_hypercall(const struct qp_guest_endpoint *entry)
+{
+ struct vmci_qp_alloc_msg *alloc_msg;
+ size_t msg_size;
+ int result;
+
+ if (!entry || entry->num_ppns <= 2)
+ return VMCI_ERROR_INVALID_ARGS;
+
+ msg_size = sizeof(*alloc_msg) +
+ (size_t) entry->num_ppns * sizeof(u32);
+ alloc_msg = kmalloc(msg_size, GFP_KERNEL);
+ if (!alloc_msg)
+ return VMCI_ERROR_NO_MEM;
+
+ alloc_msg->hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_QUEUEPAIR_ALLOC);
+ alloc_msg->hdr.src = VMCI_ANON_SRC_HANDLE;
+ alloc_msg->hdr.payload_size = msg_size - VMCI_DG_HEADERSIZE;
+ alloc_msg->handle = entry->qp.handle;
+ alloc_msg->peer = entry->qp.peer;
+ alloc_msg->flags = entry->qp.flags;
+ alloc_msg->produce_size = entry->qp.produce_size;
+ alloc_msg->consume_size = entry->qp.consume_size;
+ alloc_msg->num_ppns = entry->num_ppns;
+
+ result = qp_populate_ppn