diff options
author | George Zhang <georgezhang@vmware.com> | 2013-01-08 15:54:54 -0800 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2013-01-08 16:15:56 -0800 |
commit | 06164d2b72aa752ce4633184b3e0d97601017135 (patch) | |
tree | 51e2ec49ace40729b043978fae3a1e8a5a5411c2 | |
parent | b484b26cc7be6ccf3676deb5e03aed2609ee9a40 (diff) | |
download | linux-06164d2b72aa752ce4633184b3e0d97601017135.tar.gz linux-06164d2b72aa752ce4633184b3e0d97601017135.tar.bz2 linux-06164d2b72aa752ce4633184b3e0d97601017135.zip |
VMCI: queue pairs implementation.
VMCI queue pairs allow for bi-directional ordered communication between host and guests.
Signed-off-by: George Zhang <georgezhang@vmware.com>
Acked-by: Andy king <acking@vmware.com>
Acked-by: Dmitry Torokhov <dtor@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r-- | drivers/misc/vmw_vmci/vmci_queue_pair.c | 3420 | ||||
-rw-r--r-- | drivers/misc/vmw_vmci/vmci_queue_pair.h | 191 |
2 files changed, 3611 insertions, 0 deletions
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c new file mode 100644 index 000000000000..1123111ba1bf --- /dev/null +++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c @@ -0,0 +1,3420 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include <linux/device-mapper.h> +#include <linux/vmw_vmci_defs.h> +#include <linux/vmw_vmci_api.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/socket.h> +#include <linux/wait.h> + +#include "vmci_handle_array.h" +#include "vmci_queue_pair.h" +#include "vmci_datagram.h" +#include "vmci_resource.h" +#include "vmci_context.h" +#include "vmci_driver.h" +#include "vmci_event.h" +#include "vmci_route.h" + +/* + * In the following, we will distinguish between two kinds of VMX processes - + * the ones with versions lower than VMCI_VERSION_NOVMVM that use specialized + * VMCI page files in the VMX and supporting VM to VM communication and the + * newer ones that use the guest memory directly. We will in the following + * refer to the older VMX versions as old-style VMX'en, and the newer ones as + * new-style VMX'en. + * + * The state transition datagram is as follows (the VMCIQPB_ prefix has been + * removed for readability) - see below for more details on the transtions: + * + * -------------- NEW ------------- + * | | + * \_/ \_/ + * CREATED_NO_MEM <-----------------> CREATED_MEM + * | | | + * | o-----------------------o | + * | | | + * \_/ \_/ \_/ + * ATTACHED_NO_MEM <----------------> ATTACHED_MEM + * | | | + * | o----------------------o | + * | | | + * \_/ \_/ \_/ + * SHUTDOWN_NO_MEM <----------------> SHUTDOWN_MEM + * | | + * | | + * -------------> gone <------------- + * + * In more detail. When a VMCI queue pair is first created, it will be in the + * VMCIQPB_NEW state. It will then move into one of the following states: + * + * - VMCIQPB_CREATED_NO_MEM: this state indicates that either: + * + * - the created was performed by a host endpoint, in which case there is + * no backing memory yet. + * + * - the create was initiated by an old-style VMX, that uses + * vmci_qp_broker_set_page_store to specify the UVAs of the queue pair at + * a later point in time. This state can be distinguished from the one + * above by the context ID of the creator. A host side is not allowed to + * attach until the page store has been set. + * + * - VMCIQPB_CREATED_MEM: this state is the result when the queue pair + * is created by a VMX using the queue pair device backend that + * sets the UVAs of the queue pair immediately and stores the + * information for later attachers. At this point, it is ready for + * the host side to attach to it. + * + * Once the queue pair is in one of the created states (with the exception of + * the case mentioned for older VMX'en above), it is possible to attach to the + * queue pair. Again we have two new states possible: + * + * - VMCIQPB_ATTACHED_MEM: this state can be reached through the following + * paths: + * + * - from VMCIQPB_CREATED_NO_MEM when a new-style VMX allocates a queue + * pair, and attaches to a queue pair previously created by the host side. + * + * - from VMCIQPB_CREATED_MEM when the host side attaches to a queue pair + * already created by a guest. + * + * - from VMCIQPB_ATTACHED_NO_MEM, when an old-style VMX calls + * vmci_qp_broker_set_page_store (see below). + * + * - VMCIQPB_ATTACHED_NO_MEM: If the queue pair already was in the + * VMCIQPB_CREATED_NO_MEM due to a host side create, an old-style VMX will + * bring the queue pair into this state. Once vmci_qp_broker_set_page_store + * is called to register the user memory, the VMCIQPB_ATTACH_MEM state + * will be entered. + * + * From the attached queue pair, the queue pair can enter the shutdown states + * when either side of the queue pair detaches. If the guest side detaches + * first, the queue pair will enter the VMCIQPB_SHUTDOWN_NO_MEM state, where + * the content of the queue pair will no longer be available. If the host + * side detaches first, the queue pair will either enter the + * VMCIQPB_SHUTDOWN_MEM, if the guest memory is currently mapped, or + * VMCIQPB_SHUTDOWN_NO_MEM, if the guest memory is not mapped + * (e.g., the host detaches while a guest is stunned). + * + * New-style VMX'en will also unmap guest memory, if the guest is + * quiesced, e.g., during a snapshot operation. In that case, the guest + * memory will no longer be available, and the queue pair will transition from + * *_MEM state to a *_NO_MEM state. The VMX may later map the memory once more, + * in which case the queue pair will transition from the *_NO_MEM state at that + * point back to the *_MEM state. Note that the *_NO_MEM state may have changed, + * since the peer may have either attached or detached in the meantime. The + * values are laid out such that ++ on a state will move from a *_NO_MEM to a + * *_MEM state, and vice versa. + */ + +/* + * VMCIMemcpy{To,From}QueueFunc() prototypes. Functions of these + * types are passed around to enqueue and dequeue routines. Note that + * often the functions passed are simply wrappers around memcpy + * itself. + * + * Note: In order for the memcpy typedefs to be compatible with the VMKernel, + * there's an unused last parameter for the hosted side. In + * ESX, that parameter holds a buffer type. + */ +typedef int vmci_memcpy_to_queue_func(struct vmci_queue *queue, + u64 queue_offset, const void *src, + size_t src_offset, size_t size); +typedef int vmci_memcpy_from_queue_func(void *dest, size_t dest_offset, + const struct vmci_queue *queue, + u64 queue_offset, size_t size); + +/* The Kernel specific component of the struct vmci_queue structure. */ +struct vmci_queue_kern_if { + struct page **page; + struct page **header_page; + void *va; + struct mutex __mutex; /* Protects the queue. */ + struct mutex *mutex; /* Shared by producer and consumer queues. */ + bool host; + size_t num_pages; + bool mapped; +}; + +/* + * This structure is opaque to the clients. + */ +struct vmci_qp { + struct vmci_handle handle; + struct vmci_queue *produce_q; + struct vmci_queue *consume_q; + u64 produce_q_size; + u64 consume_q_size; + u32 peer; + u32 flags; + u32 priv_flags; + bool guest_endpoint; + unsigned int blocked; + unsigned int generation; + wait_queue_head_t event; +}; + +enum qp_broker_state { + VMCIQPB_NEW, + VMCIQPB_CREATED_NO_MEM, + VMCIQPB_CREATED_MEM, + VMCIQPB_ATTACHED_NO_MEM, + VMCIQPB_ATTACHED_MEM, + VMCIQPB_SHUTDOWN_NO_MEM, + VMCIQPB_SHUTDOWN_MEM, + VMCIQPB_GONE +}; + +#define QPBROKERSTATE_HAS_MEM(_qpb) (_qpb->state == VMCIQPB_CREATED_MEM || \ + _qpb->state == VMCIQPB_ATTACHED_MEM || \ + _qpb->state == VMCIQPB_SHUTDOWN_MEM) + +/* + * In the queue pair broker, we always use the guest point of view for + * the produce and consume queue values and references, e.g., the + * produce queue size stored is the guests produce queue size. The + * host endpoint will need to swap these around. The only exception is + * the local queue pairs on the host, in which case the host endpoint + * that creates the queue pair will have the right orientation, and + * the attaching host endpoint will need to swap. + */ +struct qp_entry { + struct list_head list_item; + struct vmci_handle handle; + u32 peer; + u32 flags; + u64 produce_size; + u64 consume_size; + u32 ref_count; +}; + +struct qp_broker_entry { + struct vmci_resource resource; + struct qp_entry qp; + u32 create_id; + u32 attach_id; + enum qp_broker_state state; + bool require_trusted_attach; + bool created_by_trusted; + bool vmci_page_files; /* Created by VMX using VMCI page files */ + struct vmci_queue *produce_q; + struct vmci_queue *consume_q; + struct vmci_queue_header saved_produce_q; + struct vmci_queue_header saved_consume_q; + vmci_event_release_cb wakeup_cb; + void *client_data; + void *local_mem; /* Kernel memory for local queue pair */ +}; + +struct qp_guest_endpoint { + struct vmci_resource resource; + struct qp_entry qp; + u64 num_ppns; + void *produce_q; + void *consume_q; + struct PPNSet ppn_set; +}; + +struct qp_list { + struct list_head head; + struct mutex mutex; /* Protect queue list. */ +}; + +static struct qp_list qp_broker_list = { + .head = LIST_HEAD_INIT(qp_broker_list.head), + .mutex = __MUTEX_INITIALIZER(qp_broker_list.mutex), +}; + +static struct qp_list qp_guest_endpoints = { + .head = LIST_HEAD_INIT(qp_guest_endpoints.head), + .mutex = __MUTEX_INITIALIZER(qp_guest_endpoints.mutex), +}; + +#define INVALID_VMCI_GUEST_MEM_ID 0 +#define QPE_NUM_PAGES(_QPE) ((u32) \ + (dm_div_up(_QPE.produce_size, PAGE_SIZE) + \ + dm_div_up(_QPE.consume_size, PAGE_SIZE) + 2)) + + +/* + * Frees kernel VA space for a given queue and its queue header, and + * frees physical data pages. + */ +static void qp_free_queue(void *q, u64 size) +{ + struct vmci_queue *queue = q; + + if (queue) { + u64 i = dm_div_up(size, PAGE_SIZE); + + if (queue->kernel_if->mapped) { + vunmap(queue->kernel_if->va); + queue->kernel_if->va = NULL; + } + + while (i) + __free_page(queue->kernel_if->page[--i]); + + vfree(queue->q_header); + } +} + +/* + * Allocates kernel VA space of specified size, plus space for the + * queue structure/kernel interface and the queue header. Allocates + * physical pages for the queue data pages. + * + * PAGE m: struct vmci_queue_header (struct vmci_queue->q_header) + * PAGE m+1: struct vmci_queue + * PAGE m+1+q: struct vmci_queue_kern_if (struct vmci_queue->kernel_if) + * PAGE n-size: Data pages (struct vmci_queue->kernel_if->page[]) + */ +static void *qp_alloc_queue(u64 size, u32 flags) +{ + u64 i; + struct vmci_queue *queue; + struct vmci_queue_header *q_header; + const u64 num_data_pages = dm_div_up(size, PAGE_SIZE); + const uint queue_size = + PAGE_SIZE + + sizeof(*queue) + sizeof(*(queue->kernel_if)) + + num_data_pages * sizeof(*(queue->kernel_if->page)); + + q_header = vmalloc(queue_size); + if (!q_header) + return NULL; + + queue = (void *)q_header + PAGE_SIZE; + queue->q_header = q_header; + queue->saved_header = NULL; + queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1); + queue->kernel_if->header_page = NULL; /* Unused in guest. */ + queue->kernel_if->page = (struct page **)(queue->kernel_if + 1); + queue->kernel_if->host = false; + queue->kernel_if->va = NULL; + queue->kernel_if->mapped = false; + + for (i = 0; i < num_data_pages; i++) { + queue->kernel_if->page[i] = alloc_pages(GFP_KERNEL, 0); + if (!queue->kernel_if->page[i]) + goto fail; + } + + if (vmci_qp_pinned(flags)) { + queue->kernel_if->va = + vmap(queue->kernel_if->page, num_data_pages, VM_MAP, + PAGE_KERNEL); + if (!queue->kernel_if->va) + goto fail; + + queue->kernel_if->mapped = true; + } + + return (void *)queue; + + fail: + qp_free_queue(queue, i * PAGE_SIZE); + return NULL; +} + +/* + * Copies from a given buffer or iovector to a VMCI Queue. Uses + * kmap()/kunmap() to dynamically map/unmap required portions of the queue + * by traversing the offset -> page translation structure for the queue. + * Assumes that offset + size does not wrap around in the queue. + */ +static int __qp_memcpy_to_queue(struct vmci_queue *queue, + u64 queue_offset, + const void *src, + size_t size, + bool is_iovec) +{ + struct vmci_queue_kern_if *kernel_if = queue->kernel_if; + size_t bytes_copied = 0; + + while (bytes_copied < size) { + u64 page_index = (queue_offset + bytes_copied) / PAGE_SIZE; + size_t page_offset = + (queue_offset + bytes_copied) & (PAGE_SIZE - 1); + void *va; + size_t to_copy; + + if (!kernel_if->mapped) + va = kmap(kernel_if->page[page_index]); + else + va = (void *)((u8 *)kernel_if->va + + (page_index * PAGE_SIZE)); + + if (size - bytes_copied > PAGE_SIZE - page_offset) + /* Enough payload to fill up from this page. */ + to_copy = PAGE_SIZE - page_offset; + else + to_copy = size - bytes_copied; + + if (is_iovec) { + struct iovec *iov = (struct iovec *)src; + int err; + + /* The iovec will track bytes_copied internally. */ + err = memcpy_fromiovec((u8 *)va + page_offset, + iov, to_copy); + if (err != 0) { + kunmap(kernel_if->page[page_index]); + return VMCI_ERROR_INVALID_ARGS; + } + } else { + memcpy((u8 *)va + page_offset, + (u8 *)src + bytes_copied, to_copy); + } + + bytes_copied += to_copy; + if (!kernel_if->mapped) + kunmap(kernel_if->page[page_index]); + } + + return VMCI_SUCCESS; +} + +/* + * Copies to a given buffer or iovector from a VMCI Queue. Uses + * kmap()/kunmap() to dynamically map/unmap required portions of the queue + * by traversing the offset -> page translation structure for the queue. + * Assumes that offset + size does not wrap around in the queue. + */ +static int __qp_memcpy_from_queue(void *dest, + const struct vmci_queue *queue, + u64 queue_offset, + size_t size, + bool is_iovec) +{ + struct vmci_queue_kern_if *kernel_if = queue->kernel_if; + size_t bytes_copied = 0; + + while (bytes_copied < size) { + u64 page_index = (queue_offset + bytes_copied) / PAGE_SIZE; + size_t page_offset = + (queue_offset + bytes_copied) & (PAGE_SIZE - 1); + void *va; + size_t to_copy; + + if (!kernel_if->mapped) + va = kmap(kernel_if->page[page_index]); + else + va = (void *)((u8 *)kernel_if->va + + (page_index * PAGE_SIZE)); + + if (size - bytes_copied > PAGE_SIZE - page_offset) + /* Enough payload to fill up this page. */ + to_copy = PAGE_SIZE - page_offset; + else + to_copy = size - bytes_copied; + + if (is_iovec) { + struct iovec *iov = (struct iovec *)dest; + int err; + + /* The iovec will track bytes_copied internally. */ + err = memcpy_toiovec(iov, (u8 *)va + page_offset, + to_copy); + if (err != 0) { + kunmap(kernel_if->page[page_index]); + return VMCI_ERROR_INVALID_ARGS; + } + } else { + memcpy((u8 *)dest + bytes_copied, + (u8 *)va + page_offset, to_copy); + } + + bytes_copied += to_copy; + if (!kernel_if->mapped) + kunmap(kernel_if->page[page_index]); + } + + return VMCI_SUCCESS; +} + +/* + * Allocates two list of PPNs --- one for the pages in the produce queue, + * and the other for the pages in the consume queue. Intializes the list + * of PPNs with the page frame numbers of the KVA for the two queues (and + * the queue headers). + */ +static int qp_alloc_ppn_set(void *prod_q, + u64 num_produce_pages, + void *cons_q, + u64 num_consume_pages, struct PPNSet *ppn_set) +{ + u32 *produce_ppns; + u32 *consume_ppns; + struct vmci_queue *produce_q = prod_q; + struct vmci_queue *consume_q = cons_q; + u64 i; + + if (!produce_q || !num_produce_pages || !consume_q || + !num_consume_pages || !ppn_set) + return VMCI_ERROR_INVALID_ARGS; + + if (ppn_set->initialized) + return VMCI_ERROR_ALREADY_EXISTS; + + produce_ppns = + kmalloc(num_produce_pages * sizeof(*produce_ppns), GFP_KERNEL); + if (!produce_ppns) + return VMCI_ERROR_NO_MEM; + + consume_ppns = + kmalloc(num_consume_pages * sizeof(*consume_ppns), GFP_KERNEL); + if (!consume_ppns) { + kfree(produce_ppns); + return VMCI_ERROR_NO_MEM; + } + + produce_ppns[0] = page_to_pfn(vmalloc_to_page(produce_q->q_header)); + for (i = 1; i < num_produce_pages; i++) { + unsigned long pfn; + + produce_ppns[i] = + page_to_pfn(produce_q->kernel_if->page[i - 1]); + pfn = produce_ppns[i]; + + /* Fail allocation if PFN isn't supported by hypervisor. */ + if (sizeof(pfn) > sizeof(*produce_ppns) + && pfn != produce_ppns[i]) + goto ppn_error; + } + + consume_ppns[0] = page_to_pfn(vmalloc_to_page(consume_q->q_header)); + for (i = 1; i < num_consume_pages; i++) { + unsigned long pfn; + + consume_ppns[i] = + page_to_pfn(consume_q->kernel_if->page[i - 1]); + pfn = consume_ppns[i]; + + /* Fail allocation if PFN isn't supported by hypervisor. */ + if (sizeof(pfn) > sizeof(*consume_ppns) + && pfn != consume_ppns[i]) + goto ppn_error; + } + + ppn_set->num_produce_pages = num_produce_pages; + ppn_set->num_consume_pages = num_consume_pages; + ppn_set->produce_ppns = produce_ppns; + ppn_set->consume_ppns = consume_ppns; + ppn_set->initialized = true; + return VMCI_SUCCESS; + + ppn_error: + kfree(produce_ppns); + kfree(consume_ppns); + return VMCI_ERROR_INVALID_ARGS; +} + +/* + * Frees the two list of PPNs for a queue pair. + */ +static void qp_free_ppn_set(struct PPNSet *ppn_set) +{ + if (ppn_set->initialized) { + /* Do not call these functions on NULL inputs. */ + kfree(ppn_set->produce_ppns); + kfree(ppn_set->consume_ppns); + } + memset(ppn_set, 0, sizeof(*ppn_set)); +} + +/* + * Populates the list of PPNs in the hypercall structure with the PPNS + * of the produce queue and the consume queue. + */ +static int qp_populate_ppn_set(u8 *call_buf, const struct PPNSet *ppn_set) +{ + memcpy(call_buf, ppn_set->produce_ppns, + ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns)); + memcpy(call_buf + + ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns), + ppn_set->consume_ppns, + ppn_set->num_consume_pages * sizeof(*ppn_set->consume_ppns)); + + return VMCI_SUCCESS; +} + +static int qp_memcpy_to_queue(struct vmci_queue *queue, + u64 queue_offset, + const void *src, size_t src_offset, size_t size) +{ + return __qp_memcpy_to_queue(queue, queue_offset, + (u8 *)src + src_offset, size, false); +} + +static int qp_memcpy_from_queue(void *dest, + size_t dest_offset, + const struct vmci_queue *queue, + u64 queue_offset, size_t size) +{ + return __qp_memcpy_from_queue((u8 *)dest + dest_offset, + queue, queue_offset, size, false); +} + +/* + * Copies from a given iovec from a VMCI Queue. + */ +static int qp_memcpy_to_queue_iov(struct vmci_queue *queue, + u64 queue_offset, + const void *src, + size_t src_offset, size_t size) +{ + + /* + * We ignore src_offset because src is really a struct iovec * and will + * maintain offset internally. + */ + return __qp_memcpy_to_queue(queue, queue_offset, src, size, true); +} + +/* + * Copies to a given iovec from a VMCI Queue. + */ +static int qp_memcpy_from_queue_iov(void *dest, + size_t dest_offset, + const struct vmci_queue *queue, + u64 queue_offset, size_t size) +{ + /* + * We ignore dest_offset because dest is really a struct iovec * and + * will maintain offset internally. + */ + return __qp_memcpy_from_queue(dest, queue, queue_offset, size, true); +} + +/* + * Allocates kernel VA space of specified size plus space for the queue + * and kernel interface. This is different from the guest queue allocator, + * because we do not allocate our own queue header/data pages here but + * share those of the guest. + */ +static struct vmci_queue *qp_host_alloc_queue(u64 size) +{ + struct vmci_queue *queue; + const size_t num_pages = dm_div_up(size, PAGE_SIZE) + 1; + const size_t queue_size = sizeof(*queue) + sizeof(*(queue->kernel_if)); + const size_t queue_page_size = + num_pages * sizeof(*queue->kernel_if->page); + + queue = kzalloc(queue_size + queue_page_size, GFP_KERNEL); + if (queue) { + queue->q_header = NULL; + queue->saved_header = NULL; + queue->kernel_if = + (struct vmci_queue_kern_if *)((u8 *)queue + + sizeof(*queue)); + queue->kernel_if->host = true; + queue->kernel_if->mutex = NULL; + queue->kernel_if->num_pages = num_pages; + queue->kernel_if->header_page = + (struct page **)((u8 *)queue + queue_size); + queue->kernel_if->page = &queue->kernel_if->header_page[1]; + queue->kernel_if->va = NULL; + queue->kernel_if->mapped = false; + } + + return queue; +} + +/* + * Frees kernel memory for a given queue (header plus translation + * structure). + */ +static void qp_host_free_queue(struct vmci_queue *queue, u64 queue_size) +{ + kfree(queue); +} + +/* + * Initialize the mutex for the pair of queues. This mutex is used to + * protect the q_header and the buffer from changing out from under any + * users of either queue. Of course, it's only any good if the mutexes + * are actually acquired. Queue structure must lie on non-paged memory + * or we cannot guarantee access to the mutex. + */ +static void qp_init_queue_mutex(struct vmci_queue *produce_q, + struct vmci_queue *consume_q) +{ + /* + * Only the host queue has shared state - the guest queues do not + * need to synchronize access using a queue mutex. + */ + + if (produce_q->kernel_if->host) { + produce_q->kernel_if->mutex = &produce_q->kernel_if->__mutex; + consume_q->kernel_if->mutex = &produce_q->kernel_if->__mutex; + mutex_init(produce_q->kernel_if->mutex); + } +} + +/* + * Cleans up the mutex for the pair of queues. + */ +static void qp_cleanup_queue_mutex(struct vmci_queue *produce_q, + struct vmci_queue *consume_q) +{ + if (produce_q->kernel_if->host) { + produce_q->kernel_if->mutex = NULL; + consume_q->kernel_if->mutex = NULL; + } +} + +/* + * Acquire the mutex for the queue. Note that the produce_q and + * the consume_q share a mutex. So, only one of the two need to + * be passed in to this routine. Either will work just fine. + */ +static void qp_acquire_queue_mutex(struct vmci_queue *queue) +{ + if (queue->kernel_if->host) + mutex_lock(queue->kernel_if->mutex); +} + +/* + * Release the mutex for the queue. Note that the produce_q and + * the consume_q share a mutex. So, only one of the two need to + * be passed in to this routine. Either will work just fine. + */ +static void qp_release_queue_mutex(struct vmci_queue *queue) +{ + if (queue->kernel_if->host) + mutex_unlock(queue->kernel_if->mutex); +} + +/* + * Helper function to release pages in the PageStoreAttachInfo + * previously obtained using get_user_pages. + */ +static void qp_release_pages(struct page **pages, + u64 num_pages, bool dirty) +{ + int i; + + for (i = 0; i < num_pages; i++) { + if (dirty) + set_page_dirty(pages[i]); + + page_cache_release(pages[i]); + pages[i] = NULL; + } +} + +/* + * Lock the user pages referenced by the {produce,consume}Buffer + * struct into memory and populate the {produce,consume}Pages + * arrays in the attach structure with them. + */ +static int qp_host_get_user_memory(u64 produce_uva, + u64 consume_uva, + struct vmci_queue *produce_q, + struct vmci_queue *consume_q) +{ + int retval; + int err = VMCI_SUCCESS; + + down_write(¤t->mm->mmap_sem); + retval = get_user_pages(current, + current->mm, + (uintptr_t) produce_uva, + produce_q->kernel_if->num_pages, + 1, 0, produce_q->kernel_if->header_page, NULL); + if (retval < produce_q->kernel_if->num_pages) { + pr_warn("get_user_pages(produce) failed (retval=%d)", retval); + qp_release_pages(produce_q->kernel_if->header_page, retval, + false); + err = VMCI_ERROR_NO_MEM; + goto out; + } + + retval = get_user_pages(current, + current->mm, + (uintptr_t) consume_uva, + consume_q->kernel_if->num_pages, + 1, 0, consume_q->kernel_if->header_page, NULL); + if (retval < consume_q->kernel_if->num_pages) { + pr_warn("get_user_pages(consume) failed (retval=%d)", retval); + qp_release_pages(consume_q->kernel_if->header_page, retval, + false); + qp_release_pages(produce_q->kernel_if->header_page, + produce_q->kernel_if->num_pages, false); + err = VMCI_ERROR_NO_MEM; + } + + out: + up_write(¤t->mm->mmap_sem); + + return err; +} + +/* + * Registers the specification of the user pages used for backing a queue + * pair. Enough information to map in pages is stored in the OS specific + * part of the struct vmci_queue structure. + */ +static int qp_host_register_user_memory(struct vmci_qp_page_store *page_store, + struct vmci_queue *produce_q, + struct vmci_queue *consume_q) +{ + u64 produce_uva; + u64 consume_uva; + + /* + * The new style and the old style mapping only differs in + * that we either get a single or two UVAs, so we split the + * single UVA range at the appropriate spot. + */ + produce_uva = page_store->pages; + consume_uva = page_store->pages + + produce_q->kernel_if->num_pages * PAGE_SIZE; + return qp_host_get_user_memory(produce_uva, consume_uva, produce_q, + consume_q); +} + +/* + * Releases and removes the references to user pages stored in the attach + * struct. Pages are released from the page cache and may become + * swappable again. + */ +static void qp_host_unregister_user_memory(struct vmci_queue *produce_q, + struct vmci_queue *consume_q) +{ + qp_release_pages(produce_q->kernel_if->header_page, + produce_q->kernel_if->num_pages, true); + memset(produce_q->kernel_if->header_page, 0, + sizeof(*produce_q->kernel_if->header_page) * + produce_q->kernel_if->num_pages); + qp_release_pages(consume_q->kernel_if->header_page, + consume_q->kernel_if->num_pages, true); + memset(consume_q->kernel_if->header_page, 0, + sizeof(*consume_q->kernel_if->header_page) * + consume_q->kernel_if->num_pages); +} + +/* + * Once qp_host_register_user_memory has been performed on a + * queue, the queue pair headers can be mapped into the + * kernel. Once mapped, they must be unmapped with + * qp_host_unmap_queues prior to calling + * qp_host_unregister_user_memory. + * Pages are pinned. + */ +static int qp_host_map_queues(struct vmci_queue *produce_q, + struct vmci_queue *consume_q) +{ + int result; + + if (!produce_q->q_header || !consume_q->q_header) { + struct page *headers[2]; + + if (produce_q->q_header != consume_q->q_header) + return VMCI_ERROR_QUEUEPAIR_MISMATCH; + + if (produce_q->kernel_if->header_page == NULL || + *produce_q->kernel_if->header_page == NULL) + return VMCI_ERROR_UNAVAILABLE; + + headers[0] = *produce_q->kernel_if->header_page; + headers[1] = *consume_q->kernel_if->header_page; + + produce_q->q_header = vmap(headers, 2, VM_MAP, PAGE_KERNEL); + if (produce_q->q_header != NULL) { + consume_q->q_header = + (struct vmci_queue_header *)((u8 *) + produce_q->q_header + + PAGE_SIZE); + result = VMCI_SUCCESS; + } else { + pr_warn("vmap failed\n"); + result = VMCI_ERROR_NO_MEM; + } + } else { + result = VMCI_SUCCESS; + } + + return result; +} + +/* + * Unmaps previously mapped queue pair headers from the kernel. + * Pages are unpinned. + */ +static int qp_host_unmap_queues(u32 gid, + struct vmci_queue *produce_q, + struct vmci_queue *consume_q) +{ + if (produce_q->q_header) { + if (produce_q->q_header < consume_q->q_header) + vunmap(produce_q->q_header); + else + vunmap(consume_q->q_header); + + produce_q->q_header = NULL; + consume_q->q_header = NULL; + } + + return VMCI_SUCCESS; +} + +/* + * Finds the entry in the list corresponding to a given handle. Assumes + * that the list is locked. + */ +static struct qp_entry *qp_list_find(struct qp_list *qp_list, + struct vmci_handle handle) +{ + struct qp_entry *entry; + + if (vmci_handle_is_invalid(handle)) + return NULL; + + list_for_each_entry(entry, &qp_list->head, list_item) { + if (vmci_handle_is_equal(entry->handle, handle)) + return entry; + } + + return NULL; +} + +/* + * Finds the entry in the list corresponding to a given handle. + */ +static struct qp_guest_endpoint * +qp_guest_handle_to_entry(struct vmci_handle handle) +{ + struct qp_guest_endpoint *entry; + struct qp_entry *qp = qp_list_find(&qp_guest_endpoints, handle); + + entry = qp ? container_of( + qp, struct qp_guest_endpoint, qp) : NULL; + return entry; +} + +/* + * Finds the entry in the list corresponding to a given handle. + */ +static struct qp_broker_entry * +qp_broker_handle_to_entry(struct vmci_handle handle) +{ + struct qp_broker_entry *entry; + struct qp_entry *qp = qp_list_find(&qp_broker_list, handle); + + entry = qp ? container_of( + qp, struct qp_broker_entry, qp) : NULL; + return entry; +} + +/* + * Dispatches a queue pair event message directly into the local event + * queue. + */ +static int qp_notify_peer_local(bool attach, struct vmci_handle handle) +{ + u32 context_id = vmci_get_context_id(); + struct vmci_event_qp ev; + + ev.msg.hdr.dst = vmci_make_handle(context_id, VMCI_EVENT_HANDLER); + ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_CONTEXT_RESOURCE_ID); + ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr); + ev.msg.event_data.event = + attach ? VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH; + ev.payload.peer_id = context_id; + ev.payload.handle = handle; + + return vmci_event_dispatch(&ev.msg.hdr); +} + +/* + * Allocates and initializes a qp_guest_endpoint structure. + * Allocates a queue_pair rid (and handle) iff the given entry has + * an invalid handle. 0 through VMCI_RESERVED_RESOURCE_ID_MAX + * are reserved handles. Assumes that the QP list mutex is held + * by the caller. + */ +static struct qp_guest_endpoint * +qp_guest_endpoint_create(struct vmci_handle handle, + u32 peer, + u32 flags, + u64 produce_size, + u64 consume_size, + void *produce_q, + void *consume_q) +{ + int result; + struct qp_guest_endpoint *entry; + /* One page each for the queue headers. */ + const u64 num_ppns = dm_div_up(produce_size, PAGE_SIZE) + + dm_div_up(consume_size, PAGE_SIZE) + 2; + + if (vmci_handle_is_invalid(handle)) { + u32 context_id = vmci_get_context_id(); + + handle = vmci_make_handle(context_id, VMCI_INVALID_ID); + } + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (entry) { + entry->qp.peer = peer; + entry->qp.flags = flags; + entry->qp.produce_size = produce_size; + entry->qp.consume_size = consume_size; + entry->qp.ref_count = 0; + entry->num_ppns = num_ppns; + entry->produce_q = produce_q; + entry->consume_q = consume_q; + INIT_LIST_HEAD(&entry->qp.list_item); + + /* Add resource obj */ + result = vmci_resource_add(&entry->resource, + VMCI_RESOURCE_TYPE_QPAIR_GUEST, + handle); + entry->qp.handle = vmci_resource_handle(&entry->resource); + if ((result != VMCI_SUCCESS) || + qp_list_find(&qp_guest_endpoints, entry->qp.handle)) { + pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d", + handle.context, handle.resource, result); + kfree(entry); + entry = NULL; + } + } + return entry; +} + +/* + * Frees a qp_guest_endpoint structure. + */ +static void qp_guest_endpoint_destroy(struct qp_guest_endpoint *entry) +{ + qp_free_ppn_set(&entry->ppn_set); + qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q); + qp_free_queue(entry->produce_q, entry->qp.produce_size); + qp_free_queue(entry->consume_q, entry->qp.consume_size); + /* Unlink from resource hash table and free callback */ + vmci_resource_remove(&entry->resource); + + kfree(entry); +} + +/* + * Helper to make a queue_pairAlloc hypercall when the driver is + * supporting a guest device. + */ +static int qp_alloc_hypercall(const struct qp_guest_endpoint *entry) +{ + struct vmci_qp_alloc_msg *alloc_msg; + size_t msg_size; + int result; + + if (!entry || entry->num_ppns <= 2) + return VMCI_ERROR_INVALID_ARGS; + + msg_size = sizeof(*alloc_msg) + + (size_t) entry->num_ppns * sizeof(u32); + alloc_msg = kmalloc(msg_size, GFP_KERNEL); + if (!alloc_msg) + return VMCI_ERROR_NO_MEM; + + alloc_msg->hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_QUEUEPAIR_ALLOC); + alloc_msg->hdr.src = VMCI_ANON_SRC_HANDLE; + alloc_msg->hdr.payload_size = msg_size - VMCI_DG_HEADERSIZE; + alloc_msg->handle = entry->qp.handle; + alloc_msg->peer = entry->qp.peer; + alloc_msg->flags = entry->qp.flags; + alloc_msg->produce_size = entry->qp.produce_size; + alloc_msg->consume_size = entry->qp.consume_size; + alloc_msg->num_ppns = entry->num_ppns; + + result = qp_populate_ppn |