/* Copyright (C) 2009 Red Hat, Inc.
* Copyright (C) 2006 Rusty Russell IBM Corporation
*
* Author: Michael S. Tsirkin <mst@redhat.com>
*
* Inspiration, some code, and most witty comments come from
* Documentation/virtual/lguest/lguest.c, by Rusty Russell
*
* This work is licensed under the terms of the GNU GPL, version 2.
*
* Generic code for virtio server in host kernel.
*/
#include <linux/eventfd.h>
#include <linux/vhost.h>
#include <linux/uio.h>
#include <linux/mm.h>
#include <linux/mmu_context.h>
#include <linux/miscdevice.h>
#include <linux/mutex.h>
#include <linux/poll.h>
#include <linux/file.h>
#include <linux/highmem.h>
#include <linux/slab.h>
#include <linux/kthread.h>
#include <linux/cgroup.h>
#include <linux/module.h>
#include "vhost.h"
enum {
VHOST_MEMORY_MAX_NREGIONS = 64,
VHOST_MEMORY_F_LOG = 0x1,
};
#define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num])
#define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num])
static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
poll_table *pt)
{
struct vhost_poll *poll;
poll = container_of(pt, struct vhost_poll, table);
poll->wqh = wqh;
add_wait_queue(wqh, &poll->wait);
}
static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync,
void *key)
{
struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait);
if (!((unsigned long)key & poll->mask))
return 0;
vhost_poll_queue(poll);
return 0;
}
void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn)
{
INIT_LIST_HEAD(&work->node);
work->fn = fn;
init_waitqueue_head(&work->done);
work->flushing = 0;
work->queue_seq = work->done_seq = 0;
}
EXPORT_SYMBOL_GPL(vhost_work_init);
/* Init poll structure */
void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
unsigned long mask, struct vhost_dev *dev)
{
init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup);
init_poll_funcptr(&poll->table, vhost_poll_func);
poll->mask = mask;
poll->dev = dev;
poll->wqh = NULL;
vhost_work_init(&poll->work, fn);
}
EXPORT_SYMBOL_GPL(vhost_poll_init);
/* Start polling a file. We add ourselves to file's wait queue. The caller must
* keep a reference to a file until after vhost_poll_stop is called. */
int vhost_poll_start(struct vhost_poll *poll, struct file *file)
{
unsigned long mask;
int ret = 0;
if (poll->wqh)
return 0;
mask = file->f_op->poll(file, &poll->table);
if (mask)
vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
if (mask & POLLERR) {
if (poll->wqh)
remove_wait_queue(poll->wqh, &poll->wait);
ret = -EINVAL;
}
return ret;
}
EXPORT_SYMBOL_GPL(vhost_poll_start);
/* Stop polling a file. After this function returns, it becomes safe to drop the
* file reference. You must also flush afterwards. */
void vhost_poll_stop(struct vhost_poll *poll)
{
if (poll->wqh) {
remove_wait_queue(poll->wqh, &poll->wait);
poll->wqh = NULL;
}
}
EXPORT_SYMBOL_GPL(vhost_poll_stop);
static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work,
unsigned seq)
{
int left;
spin_lock_irq(&dev->work_lock);
left = seq - work->done_seq;
spin_unlock_irq(&dev->work_lock);
return left <= 0;
}
void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work)
{
unsigned seq;
int flushing;
spin_lock_irq(&dev->work_lock);
seq = work->queue_seq;
work->flushing++;
spin_unlock_irq(&dev->work_lock);
wait_event(work->done, vhost_work_seq_done(dev, work, seq));
spin_lock_irq(&dev->work_lock);
flushing = --work->flushing;
spin_unlock_irq(&dev->work_lock);
BUG_ON(flushing < 0);
}
EXPORT_SYMBOL_GPL(vhost_work_flush);
/* Flush any work that has been scheduled. When calling this, don't hold any
* locks that are also used by the callback. */
void vhost_poll_flush(struct vhost_poll *poll)
{
vhost_work_flush(poll->dev, &poll->work);
}
EXPORT_SYMBOL_GPL(vhost_poll_flush);
void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
{
unsigned long flags;
spin_lock_irqsave(&dev->work_lock, flags);
if (list_empty(&work->node)) {
list_add_tail(&work->node, &dev->work_list);
work->queue_seq++;
spin_unlock_irqrestore(&dev->work_lock, flags);
wake_up_process(dev->worker);
} else {
spin_unlock_irqrestore(&dev->work_lock, flags);
}
}
EXPORT_SYMBOL_GPL(vhost_work_queue);
void vhost_poll_queue(struct vhost_poll *poll)
{
vhost_work_queue(poll->dev, &poll->work);
}
EXPORT_SYMBOL_GPL(vhost_poll_queue);
static void vhost_vq_reset(struct vhost_dev *dev,
struct vhost_virtqueue *vq)
{
vq->num = 1;
vq->desc = NULL;
vq->avail = NULL;
vq->used = NULL;
vq->last_avail_idx = 0;
vq->avail_idx = 0;
vq->last_used_idx = 0;
vq->signalled_used = 0;
vq->signalled_used_valid = false;
vq->used_flags = 0;
vq->log_used = false;
vq->log_addr = -1ull;
vq->private_data = NULL;
vq->acked_features = 0;
vq->log_base = NULL;
vq->error_ctx = NULL;
vq->error = NULL;
vq->kick = NULL;
vq->call_ctx = NULL;
vq->call = NULL;
vq->log_ctx = NULL;
vq->memory = NULL;
}
static int vhost_worker(void *data)
{
struct vhost_dev *dev = data;
struct vhost_work *work = NULL;
unsigned uninitialized_var(seq);
mm_segment_t oldfs = get_fs();
set_fs(USER_DS);
use_mm(dev->mm);
for (;;) {
/* mb paired w/ kthread_stop */
set_current_state(TASK_INTERRUPTIBLE);
spin_lock_irq(&dev->work_lock);
if (work) {
work->done_seq = seq;
if (work->flushing)
wake_up_all(&work->done);
}
if (kthread_should_stop()) {
spin_unlock_irq(&dev->work_lock);
__set_current_state(TASK_RUNNING);
break;
}
if (!list_empty(&dev->work_list)) {
work = list_first_entry(&dev->work_list,
struct vhost_work, node);
list_del_init(&work->node);
seq = work->queue_seq;
} else
work = NULL;
spin_unlock_irq(&dev->work_lock);
if (work) {
__set_current_state(TASK_RUNNING);
work->fn(work);
if (need_resched())
schedule();
} else
schedule();
}
unuse_mm(dev->mm);
set_fs(oldfs);
return 0;
}
static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
{
kfree(vq->indirect);
vq->indirect = NULL
|