summaryrefslogtreecommitdiff
path: root/drivers/lguest
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-18 09:24:01 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-18 09:24:01 -0800
commit53861af9a17022898619a2ae4ead0dfc601b7c13 (patch)
treedc11088d9e86fa1d8d8479974864153a8f976897 /drivers/lguest
parent5c2770079fb9b8c5bfb7113d9e76de66e77a0e24 (diff)
parent5b40a7daf51812b35cf05d1601a779a7043f8414 (diff)
downloadlinux-53861af9a17022898619a2ae4ead0dfc601b7c13.tar.gz
linux-53861af9a17022898619a2ae4ead0dfc601b7c13.tar.bz2
linux-53861af9a17022898619a2ae4ead0dfc601b7c13.zip
Merge tag 'virtio-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux
Pull virtio updates from Rusty Russell: "OK, this has the big virtio 1.0 implementation, as specified by OASIS. On top of tht is the major rework of lguest, to use PCI and virtio 1.0, to double-check the implementation. Then comes the inevitable fixes and cleanups from that work" * tag 'virtio-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux: (80 commits) virtio: don't set VIRTIO_CONFIG_S_DRIVER_OK twice. virtio_net: unconditionally define struct virtio_net_hdr_v1. tools/lguest: don't use legacy definitions for net device in example launcher. virtio: Don't expose legacy net features when VIRTIO_NET_NO_LEGACY defined. tools/lguest: use common error macros in the example launcher. tools/lguest: give virtqueues names for better error messages tools/lguest: more documentation and checking of virtio 1.0 compliance. lguest: don't look in console features to find emerg_wr. tools/lguest: don't start devices until DRIVER_OK status set. tools/lguest: handle indirect partway through chain. tools/lguest: insert driver references from the 1.0 spec (4.1 Virtio Over PCI) tools/lguest: insert device references from the 1.0 spec (4.1 Virtio Over PCI) tools/lguest: rename virtio_pci_cfg_cap field to match spec. tools/lguest: fix features_accepted logic in example launcher. tools/lguest: handle device reset correctly in example launcher. virtual: Documentation: simplify and generalize paravirt_ops.txt lguest: remove NOTIFY call and eventfd facility. lguest: remove NOTIFY facility from demonstration launcher. lguest: use the PCI console device's emerg_wr for early boot messages. lguest: always put console in PCI slot #1. ...
Diffstat (limited to 'drivers/lguest')
-rw-r--r--drivers/lguest/Makefile3
-rw-r--r--drivers/lguest/core.c29
-rw-r--r--drivers/lguest/hypercalls.c7
-rw-r--r--drivers/lguest/lg.h26
-rw-r--r--drivers/lguest/lguest_device.c540
-rw-r--r--drivers/lguest/lguest_user.c221
-rw-r--r--drivers/lguest/page_tables.c75
-rw-r--r--drivers/lguest/x86/core.c198
8 files changed, 252 insertions, 847 deletions
diff --git a/drivers/lguest/Makefile b/drivers/lguest/Makefile
index c4197503900e..16f52ee73994 100644
--- a/drivers/lguest/Makefile
+++ b/drivers/lguest/Makefile
@@ -1,6 +1,3 @@
-# Guest requires the device configuration and probing code.
-obj-$(CONFIG_LGUEST_GUEST) += lguest_device.o
-
# Host requires the other files, which can be a module.
obj-$(CONFIG_LGUEST) += lg.o
lg-y = core.o hypercalls.o page_tables.o interrupts_and_traps.o \
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 6590558d1d31..7dc93aa004c8 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -208,6 +208,14 @@ void __lgwrite(struct lg_cpu *cpu, unsigned long addr, const void *b,
*/
int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
{
+ /* If the launcher asked for a register with LHREQ_GETREG */
+ if (cpu->reg_read) {
+ if (put_user(*cpu->reg_read, user))
+ return -EFAULT;
+ cpu->reg_read = NULL;
+ return sizeof(*cpu->reg_read);
+ }
+
/* We stop running once the Guest is dead. */
while (!cpu->lg->dead) {
unsigned int irq;
@@ -217,21 +225,12 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
if (cpu->hcall)
do_hypercalls(cpu);
- /*
- * It's possible the Guest did a NOTIFY hypercall to the
- * Launcher.
- */
- if (cpu->pending_notify) {
- /*
- * Does it just needs to write to a registered
- * eventfd (ie. the appropriate virtqueue thread)?
- */
- if (!send_notify_to_eventfd(cpu)) {
- /* OK, we tell the main Launcher. */
- if (put_user(cpu->pending_notify, user))
- return -EFAULT;
- return sizeof(cpu->pending_notify);
- }
+ /* Do we have to tell the Launcher about a trap? */
+ if (cpu->pending.trap) {
+ if (copy_to_user(user, &cpu->pending,
+ sizeof(cpu->pending)))
+ return -EFAULT;
+ return sizeof(cpu->pending);
}
/*
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index 83511eb0923d..1219af493c0f 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -117,9 +117,6 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
/* Similarly, this sets the halted flag for run_guest(). */
cpu->halted = 1;
break;
- case LHCALL_NOTIFY:
- cpu->pending_notify = args->arg1;
- break;
default:
/* It should be an architecture-specific hypercall. */
if (lguest_arch_do_hcall(cpu, args))
@@ -189,7 +186,7 @@ static void do_async_hcalls(struct lg_cpu *cpu)
* Stop doing hypercalls if they want to notify the Launcher:
* it needs to service this first.
*/
- if (cpu->pending_notify)
+ if (cpu->pending.trap)
break;
}
}
@@ -280,7 +277,7 @@ void do_hypercalls(struct lg_cpu *cpu)
* NOTIFY to the Launcher, we want to return now. Otherwise we do
* the hypercall.
*/
- if (!cpu->pending_notify) {
+ if (!cpu->pending.trap) {
do_hcall(cpu, cpu->hcall);
/*
* Tricky point: we reset the hcall pointer to mark the
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 2eef40be4c04..307e8b39e7d1 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -50,7 +50,10 @@ struct lg_cpu {
/* Bitmap of what has changed: see CHANGED_* above. */
int changed;
- unsigned long pending_notify; /* pfn from LHCALL_NOTIFY */
+ /* Pending operation. */
+ struct lguest_pending pending;
+
+ unsigned long *reg_read; /* register from LHREQ_GETREG */
/* At end of a page shared mapped over lguest_pages in guest. */
unsigned long regs_page;
@@ -78,24 +81,18 @@ struct lg_cpu {
struct lg_cpu_arch arch;
};
-struct lg_eventfd {
- unsigned long addr;
- struct eventfd_ctx *event;
-};
-
-struct lg_eventfd_map {
- unsigned int num;
- struct lg_eventfd map[];
-};
-
/* The private info the thread maintains about the guest. */
struct lguest {
struct lguest_data __user *lguest_data;
struct lg_cpu cpus[NR_CPUS];
unsigned int nr_cpus;
+ /* Valid guest memory pages must be < this. */
u32 pfn_limit;
+ /* Device memory is >= pfn_limit and < device_limit. */
+ u32 device_limit;
+
/*
* This provides the offset to the base of guest-physical memory in the
* Launcher.
@@ -110,8 +107,6 @@ struct lguest {
unsigned int stack_pages;
u32 tsc_khz;
- struct lg_eventfd_map *eventfds;
-
/* Dead? */
const char *dead;
};
@@ -197,8 +192,10 @@ void guest_pagetable_flush_user(struct lg_cpu *cpu);
void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir,
unsigned long vaddr, pte_t val);
void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages);
-bool demand_page(struct lg_cpu *cpu, unsigned long cr2, int errcode);
+bool demand_page(struct lg_cpu *cpu, unsigned long cr2, int errcode,
+ unsigned long *iomem);
void pin_page(struct lg_cpu *cpu, unsigned long vaddr);
+bool __guest_pa(struct lg_cpu *cpu, unsigned long vaddr, unsigned long *paddr);
unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr);
void page_table_guest_data_init(struct lg_cpu *cpu);
@@ -210,6 +207,7 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu);
int lguest_arch_init_hypercalls(struct lg_cpu *cpu);
int lguest_arch_do_hcall(struct lg_cpu *cpu, struct hcall_args *args);
void lguest_arch_setup_regs(struct lg_cpu *cpu, unsigned long start);
+unsigned long *lguest_arch_regptr(struct lg_cpu *cpu, size_t reg_off, bool any);
/* <arch>/switcher.S: */
extern char start_switcher_text[], end_switcher_text[], switch_to_guest[];
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
deleted file mode 100644
index 89088d6538fd..000000000000
--- a/drivers/lguest/lguest_device.c
+++ /dev/null
@@ -1,540 +0,0 @@
-/*P:050
- * Lguest guests use a very simple method to describe devices. It's a
- * series of device descriptors contained just above the top of normal Guest
- * memory.
- *
- * We use the standard "virtio" device infrastructure, which provides us with a
- * console, a network and a block driver. Each one expects some configuration
- * information and a "virtqueue" or two to send and receive data.
-:*/
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/lguest_launcher.h>
-#include <linux/virtio.h>
-#include <linux/virtio_config.h>
-#include <linux/interrupt.h>
-#include <linux/virtio_ring.h>
-#include <linux/err.h>
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <asm/io.h>
-#include <asm/paravirt.h>
-#include <asm/lguest_hcall.h>
-
-/* The pointer to our (page) of device descriptions. */
-static void *lguest_devices;
-
-/*
- * For Guests, device memory can be used as normal memory, so we cast away the
- * __iomem to quieten sparse.
- */
-static inline void *lguest_map(unsigned long phys_addr, unsigned long pages)
-{
- return (__force void *)ioremap_cache(phys_addr, PAGE_SIZE*pages);
-}
-
-static inline void lguest_unmap(void *addr)
-{
- iounmap((__force void __iomem *)addr);
-}
-
-/*D:100
- * Each lguest device is just a virtio device plus a pointer to its entry
- * in the lguest_devices page.
- */
-struct lguest_device {
- struct virtio_device vdev;
-
- /* The entry in the lguest_devices page for this device. */
- struct lguest_device_desc *desc;
-};
-
-/*
- * Since the virtio infrastructure hands us a pointer to the virtio_device all
- * the time, it helps to have a curt macro to get a pointer to the struct
- * lguest_device it's enclosed in.
- */
-#define to_lgdev(vd) container_of(vd, struct lguest_device, vdev)
-
-/*D:130
- * Device configurations
- *
- * The configuration information for a device consists of one or more
- * virtqueues, a feature bitmap, and some configuration bytes. The
- * configuration bytes don't really matter to us: the Launcher sets them up, and
- * the driver will look at them during setup.
- *
- * A convenient routine to return the device's virtqueue config array:
- * immediately after the descriptor.
- */
-static struct lguest_vqconfig *lg_vq(const struct lguest_device_desc *desc)
-{
- return (void *)(desc + 1);
-}
-
-/* The features come immediately after the virtqueues. */
-static u8 *lg_features(const struct lguest_device_desc *desc)
-{
- return (void *)(lg_vq(desc) + desc->num_vq);
-}
-
-/* The config space comes after the two feature bitmasks. */
-static u8 *lg_config(const struct lguest_device_desc *desc)
-{
- return lg_features(desc) + desc->feature_len * 2;
-}
-
-/* The total size of the config page used by this device (incl. desc) */
-static unsigned desc_size(const struct lguest_device_desc *desc)
-{
- return sizeof(*desc)
- + desc->num_vq * sizeof(struct lguest_vqconfig)
- + desc->feature_len * 2
- + desc->config_len;
-}
-
-/* This gets the device's feature bits. */
-static u64 lg_get_features(struct virtio_device *vdev)
-{
- unsigned int i;
- u32 features = 0;
- struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
- u8 *in_features = lg_features(desc);
-
- /* We do this the slow but generic way. */
- for (i = 0; i < min(desc->feature_len * 8, 32); i++)
- if (in_features[i / 8] & (1 << (i % 8)))
- features |= (1 << i);
-
- return features;
-}
-
-/*
- * To notify on reset or feature finalization, we (ab)use the NOTIFY
- * hypercall, with the descriptor address of the device.
- */
-static void status_notify(struct virtio_device *vdev)
-{
- unsigned long offset = (void *)to_lgdev(vdev)->desc - lguest_devices;
-
- hcall(LHCALL_NOTIFY, (max_pfn << PAGE_SHIFT) + offset, 0, 0, 0);
-}
-
-/*
- * The virtio core takes the features the Host offers, and copies the ones
- * supported by the driver into the vdev->features array. Once that's all
- * sorted out, this routine is called so we can tell the Host which features we
- * understand and accept.
- */
-static int lg_finalize_features(struct virtio_device *vdev)
-{
- unsigned int i, bits;
- struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
- /* Second half of bitmap is features we accept. */
- u8 *out_features = lg_features(desc) + desc->feature_len;
-
- /* Give virtio_ring a chance to accept features. */
- vring_transport_features(vdev);
-
- /* Make sure we don't have any features > 32 bits! */
- BUG_ON((u32)vdev->features != vdev->features);
-
- /*
- * Since lguest is currently x86-only, we're little-endian. That
- * means we could just memcpy. But it's not time critical, and in
- * case someone copies this code, we do it the slow, obvious way.
- */
- memset(out_features, 0, desc->feature_len);
- bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
- for (i = 0; i < bits; i++) {
- if (__virtio_test_bit(vdev, i))
- out_features[i / 8] |= (1 << (i % 8));
- }
-
- /* Tell Host we've finished with this device's feature negotiation */
- status_notify(vdev);
-
- return 0;
-}
-
-/* Once they've found a field, getting a copy of it is easy. */
-static void lg_get(struct virtio_device *vdev, unsigned int offset,
- void *buf, unsigned len)
-{
- struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
-
- /* Check they didn't ask for more than the length of the config! */
- BUG_ON(offset + len > desc->config_len);
- memcpy(buf, lg_config(desc) + offset, len);
-}
-
-/* Setting the contents is also trivial. */
-static void lg_set(struct virtio_device *vdev, unsigned int offset,
- const void *buf, unsigned len)
-{
- struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
-
- /* Check they didn't ask for more than the length of the config! */
- BUG_ON(offset + len > desc->config_len);
- memcpy(lg_config(desc) + offset, buf, len);
-}
-
-/*
- * The operations to get and set the status word just access the status field
- * of the device descriptor.
- */
-static u8 lg_get_status(struct virtio_device *vdev)
-{
- return to_lgdev(vdev)->desc->status;
-}
-
-static void lg_set_status(struct virtio_device *vdev, u8 status)
-{
- BUG_ON(!status);
- to_lgdev(vdev)->desc->status = status;
-
- /* Tell Host immediately if we failed. */
- if (status & VIRTIO_CONFIG_S_FAILED)
- status_notify(vdev);
-}
-
-static void lg_reset(struct virtio_device *vdev)
-{
- /* 0 status means "reset" */
- to_lgdev(vdev)->desc->status = 0;
- status_notify(vdev);
-}
-
-/*
- * Virtqueues
- *
- * The other piece of infrastructure virtio needs is a "virtqueue": a way of
- * the Guest device registering buffers for the other side to read from or
- * write into (ie. send and receive buffers). Each device can have multiple
- * virtqueues: for example the console driver uses one queue for sending and
- * another for receiving.
- *
- * Fortunately for us, a very fast shared-memory-plus-descriptors virtqueue
- * already exists in virtio_ring.c. We just need to connect it up.
- *
- * We start with the information we need to keep about each virtqueue.
- */
-
-/*D:140 This is the information we remember about each virtqueue. */
-struct lguest_vq_info {
- /* A copy of the information contained in the device config. */
- struct lguest_vqconfig config;
-
- /* The address where we mapped the virtio ring, so we can unmap it. */
- void *pages;
-};
-
-/*
- * When the virtio_ring code wants to prod the Host, it calls us here and we
- * make a hypercall. We hand the physical address of the virtqueue so the Host
- * knows which virtqueue we're talking about.
- */
-static bool lg_notify(struct virtqueue *vq)
-{
- /*
- * We store our virtqueue information in the "priv" pointer of the
- * virtqueue structure.
- */
- struct lguest_vq_info *lvq = vq->priv;
-
- hcall(LHCALL_NOTIFY, lvq->config.pfn << PAGE_SHIFT, 0, 0, 0);
- return true;
-}
-
-/* An extern declaration inside a C file is bad form. Don't do it. */
-extern int lguest_setup_irq(unsigned int irq);
-
-/*
- * This routine finds the Nth virtqueue described in the configuration of
- * this device and sets it up.
- *
- * This is kind of an ugly duckling. It'd be nicer to have a standard
- * representation of a virtqueue in the configuration space, but it seems that
- * everyone wants to do it differently. The KVM coders want the Guest to
- * allocate its own pages and tell the Host where they are, but for lguest it's
- * simpler for the Host to simply tell us where the pages are.
- */
-static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
- unsigned index,
- void (*callback)(struct virtqueue *vq),
- const char *name)
-{
- struct lguest_device *ldev = to_lgdev(vdev);
- struct lguest_vq_info *lvq;
- struct virtqueue *vq;
- int err;
-
- if (!name)
- return NULL;
-
- /* We must have this many virtqueues. */
- if (index >= ldev->desc->num_vq)
- return ERR_PTR(-ENOENT);
-
- lvq = kmalloc(sizeof(*lvq), GFP_KERNEL);
- if (!lvq)
- return ERR_PTR(-ENOMEM);
-
- /*
- * Make a copy of the "struct lguest_vqconfig" entry, which sits after
- * the descriptor. We need a copy because the config space might not
- * be aligned correctly.
- */
- memcpy(&lvq->config, lg_vq(ldev->desc)+index, sizeof(lvq->config));
-
- printk("Mapping virtqueue %i addr %lx\n", index,
- (unsigned long)lvq->config.pfn << PAGE_SHIFT);
- /* Figure out how many pages the ring will take, and map that memory */
- lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT,
- DIV_ROUND_UP(vring_size(lvq->config.num,
- LGUEST_VRING_ALIGN),
- PAGE_SIZE));
- if (!lvq->pages) {
- err = -ENOMEM;
- goto free_lvq;
- }
-
- /*
- * OK, tell virtio_ring.c to set up a virtqueue now we know its size
- * and we've got a pointer to its pages. Note that we set weak_barriers
- * to 'true': the host just a(nother) SMP CPU, so we only need inter-cpu
- * barriers.
- */
- vq = vring_new_virtqueue(index, lvq->config.num, LGUEST_VRING_ALIGN, vdev,
- true, lvq->pages, lg_notify, callback, name);
- if (!vq) {
- err = -ENOMEM;
- goto unmap;
- }
-
- /* Make sure the interrupt is allocated. */
- err = lguest_setup_irq(lvq->config.irq);
- if (err)
- goto destroy_vring;
-
- /*
- * Tell the interrupt for this virtqueue to go to the virtio_ring
- * interrupt handler.
- *
- * FIXME: We used to have a flag for the Host to tell us we could use
- * the interrupt as a source of randomness: it'd be nice to have that
- * back.
- */
- err = request_irq(lvq->config.irq, vring_interrupt, IRQF_SHARED,
- dev_name(&vdev->dev), vq);
- if (err)
- goto free_desc;
-
- /*
- * Last of all we hook up our 'struct lguest_vq_info" to the
- * virtqueue's priv pointer.
- */
- vq->priv = lvq;
- return vq;
-
-free_desc:
- irq_free_desc(lvq->config.irq);
-destroy_vring:
- vring_del_virtqueue(vq);
-unmap:
- lguest_unmap(lvq->pages);
-free_lvq:
- kfree(lvq);
- return ERR_PTR(err);
-}
-/*:*/
-
-/* Cleaning up a virtqueue is easy */
-static void lg_del_vq(struct virtqueue *vq)
-{
- struct lguest_vq_info *lvq = vq->priv;
-
- /* Release the interrupt */
- free_irq(lvq->config.irq, vq);
- /* Tell virtio_ring.c to free the virtqueue. */
- vring_del_virtqueue(vq);
- /* Unmap the pages containing the ring. */
- lguest_unmap(lvq->pages);
- /* Free our own queue information. */
- kfree(lvq);
-}
-
-static void lg_del_vqs(struct virtio_device *vdev)
-{
- struct virtqueue *vq, *n;
-
- list_for_each_entry_safe(vq, n, &vdev->vqs, list)
- lg_del_vq(vq);
-}
-
-static int lg_find_vqs(struct virtio_device *vdev, unsigned nvqs,
- struct virtqueue *vqs[],
- vq_callback_t *callbacks[],
- const char *names[])
-{
- struct lguest_device *ldev = to_lgdev(vdev);
- int i;
-
- /* We must have this many virtqueues. */
- if (nvqs > ldev->desc->num_vq)
- return -ENOENT;
-
- for (i = 0; i < nvqs; ++i) {
- vqs[i] = lg_find_vq(vdev, i, callbacks[i], names[i]);
- if (IS_ERR(vqs[i]))
- goto error;
- }
- return 0;
-
-error:
- lg_del_vqs(vdev);
- return PTR_ERR(vqs[i]);
-}
-
-static const char *lg_bus_name(struct virtio_device *vdev)
-{
- return "";
-}
-
-/* The ops structure which hooks everything together. */
-static const struct virtio_config_ops lguest_config_ops = {
- .get_features = lg_get_features,
- .finalize_features = lg_finalize_features,
- .get = lg_get,
- .set = lg_set,
- .get_status = lg_get_status,
- .set_status = lg_set_status,
- .reset = lg_reset,
- .find_vqs = lg_find_vqs,
- .del_vqs = lg_del_vqs,
- .bus_name = lg_bus_name,
-};
-
-/*
- * The root device for the lguest virtio devices. This makes them appear as
- * /sys/devices/lguest/0,1,2 not /sys/devices/0,1,2.
- */
-static struct device *lguest_root;
-
-/*D:120
- * This is the core of the lguest bus: actually adding a new device.
- * It's a separate function because it's neater that way, and because an
- * earlier version of the code supported hotplug and unplug. They were removed
- * early on because they were never used.
- *
- * As Andrew Tridgell says, "Untested code is buggy code".
- *
- * It's worth reading this carefully: we start with a pointer to the new device
- * descriptor in the "lguest_devices" page, and the offset into the device
- * descriptor page so we can uniquely identify it if things go badly wrong.
- */
-static void add_lguest_device(struct lguest_device_desc *d,
- unsigned int offset)
-{
- struct lguest_device *ldev;
-
- /* Start with zeroed memory; Linux's device layer counts on it. */
- ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
- if (!ldev) {
- printk(KERN_EMERG "Cannot allocate lguest dev %u type %u\n",
- offset, d->type);
- return;
- }
-
- /* This devices' parent is the lguest/ dir. */
- ldev->vdev.dev.parent = lguest_root;
- /*
- * The device type comes straight from the descriptor. There's also a
- * device vendor field in the virtio_device struct, which we leave as
- * 0.
- */
- ldev->vdev.id.device = d->type;
- /*
- * We have a simple set of routines for querying the device's
- * configuration information and setting its status.
- */
- ldev->vdev.config = &lguest_config_ops;
- /* And we remember the device's descriptor for lguest_config_ops. */
- ldev->desc = d;
-
- /*
- * register_virtio_device() sets up the generic fields for the struct
- * virtio_device and calls device_register(). This makes the bus
- * infrastructure look for a matching driver.
- */
- if (register_virtio_device(&ldev->vdev) != 0) {
- printk(KERN_ERR "Failed to register lguest dev %u type %u\n",
- offset, d->type);
- kfree(ldev);
- }
-}
-
-/*D:110
- * scan_devices() simply iterates through the device page. The type 0 is
- * reserved to mean "end of devices".
- */
-static void scan_devices(void)
-{
- unsigned int i;
- struct lguest_device_desc *d;
-
- /* We start at the page beginning, and skip over each entry. */
- for (i = 0; i < PAGE_SIZE; i += desc_size(d)) {
- d = lguest_devices + i;
-
- /* Once we hit a zero, stop. */
- if (d->type == 0)
- break;
-
- printk("Device at %i has size %u\n", i, desc_size(d));
- add_lguest_device(d, i);
- }
-}
-
-/*D:105
- * Fairly early in boot, lguest_devices_init() is called to set up the
- * lguest device infrastructure. We check that we are a Guest by checking
- * pv_info.name: there are other ways of checking, but this seems most
- * obvious to me.
- *
- * So we can access the "struct lguest_device_desc"s easily, we map that memory
- * and store the pointer in the global "lguest_devices". Then we register a
- * root device from which all our devices will hang (this seems to be the
- * correct sysfs incantation).
- *
- * Finally we call scan_devices() which adds all the devices found in the
- * lguest_devices page.
- */
-static int __init lguest_devices_init(void)
-{
- if (strcmp(pv_info.name, "lguest") != 0)
- return 0;
-
- lguest_root = root_device_register("lguest");
- if (IS_ERR(lguest_root))
- panic("Could not register lguest root");
-
- /* Devices are in a single page above top of "normal" mem */
- lguest_devices = lguest_map(max_pfn<<PAGE_SHIFT, 1);
-
- scan_devices();
- return 0;
-}
-/* We do this after core stuff, but before the drivers. */
-postcore_initcall(lguest_devices_init);
-
-/*D:150
- * At this point in the journey we used to now wade through the lguest
- * devices themselves: net, block and console. Since they're all now virtio
- * devices rather than lguest-specific, I've decided to ignore them. Mostly,
- * they're kind of boring. But this does mean you'll never experience the
- * thrill of reading the forbidden love scene buried deep in the block driver.
- *
- * "make Launcher" beckons, where we answer questions like "Where do Guests
- * come from?", and "What do you do when someone asks for optimization?".
- */
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index 4263f4cc8c55..c4c6113eb9a6 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -2,175 +2,62 @@
* launcher controls and communicates with the Guest. For example,
* the first write will tell us the Guest's memory layout and entry
* point. A read will run the Guest until something happens, such as
- * a signal or the Guest doing a NOTIFY out to the Launcher. There is
- * also a way for the Launcher to attach eventfds to particular NOTIFY
- * values instead of returning from the read() call.
+ * a signal or the Guest accessing a device.
:*/
#include <linux/uaccess.h>
#include <linux/miscdevice.h>
#include <linux/fs.h>
#include <linux/sched.h>
-#include <linux/eventfd.h>
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/export.h>
#include "lg.h"
-/*L:056
- * Before we move on, let's jump ahead and look at what the kernel does when
- * it needs to look up the eventfds. That will complete our picture of how we
- * use RCU.
- *
- * The notification value is in cpu->pending_notify: we return true if it went
- * to an eventfd.
- */
-bool send_notify_to_eventfd(struct lg_cpu *cpu)
-{
- unsigned int i;
- struct lg_eventfd_map *map;
-
- /*
- * This "rcu_read_lock()" helps track when someone is still looking at
- * the (RCU-using) eventfds array. It's not actually a lock at all;
- * indeed it's a noop in many configurations. (You didn't expect me to
- * explain all the RCU secrets here, did you?)
- */
- rcu_read_lock();
- /*
- * rcu_dereference is the counter-side of rcu_assign_pointer(); it
- * makes sure we don't access the memory pointed to by
- * cpu->lg->eventfds before cpu->lg->eventfds is set. Sounds crazy,
- * but Alpha allows this! Paul McKenney points out that a really
- * aggressive compiler could have the same effect:
- * http://lists.ozlabs.org/pipermail/lguest/2009-July/001560.html
- *
- * So play safe, use rcu_dereference to get the rcu-protected pointer:
- */
- map = rcu_dereference(cpu->lg->eventfds);
- /*
- * Simple array search: even if they add an eventfd while we do this,
- * we'll continue to use the old array and just won't see the new one.
- */
- for (i = 0; i < map->num; i++) {
- if (map->map[i].addr == cpu->pending_notify) {
- eventfd_signal(map->map[i].event, 1);
- cpu->pending_notify = 0;
- break;
- }
- }
- /* We're done with the rcu-protected variable cpu->lg->eventfds. */
- rcu_read_unlock();
-
- /* If we cleared the notification, it's because we found a match. */
- return cpu->pending_notify == 0;
-}
-
-/*L:055
- * One of the more tricksy tricks in the Linux Kernel is a technique called
- * Read Copy Update. Since one point of lguest is to teach lguest journeyers
- * about kernel coding, I use it here. (In case you're curious, other purposes
- * include learning about virtualization and instilling a deep appreciation for
- * simplicity and puppies).
- *
- * We keep a simple array which maps LHCALL_NOTIFY values to eventfds, but we
- * add new eventfds without ever blocking readers from accessing the array.
- * The current Launcher only does this during boot, so that never happens. But
- * Read Copy Update is cool, and adding a lock risks damaging even more puppies
- * than this code does.
- *
- * We allocate a brand new one-larger array, copy the old one and add our new
- * element. Then we make the lg eventfd pointer point to the new array.
- * That's the easy part: now we need to free the old one, but we need to make
- * sure no slow CPU somewhere is still looking at it. That's what
- * synchronize_rcu does for us: waits until every CPU has indicated that it has
- * moved on to know it's no longer using the old one.
- *
- * If that's unclear, see http://en.wikipedia.org/wiki/Read-copy-update.
- */
-static int add_eventfd(struct lguest *lg, unsigned long addr, int fd)
+/*L:052
+ The Launcher can get the registers, and also set some of them.
+*/
+static int getreg_setup(struct lg_cpu *cpu, const unsigned long __user *input)
{
- struct lg_eventfd_map *new, *old = lg->eventfds;
-
- /*
- * We don't allow notifications on value 0 anyway (pending_notify of
- * 0 means "nothing pending").
- */
- if (!addr)
- return -EINVAL;
-
- /*
- * Replace the old array with the new one, carefully: others can
- * be accessing it at the same time.
- */
- new = kmalloc(sizeof(*new) + sizeof(new->map[0]) * (old->num + 1),
- GFP_KERNEL);
- if (!new)
- return -ENOMEM;
+ unsigned long which;
- /* First make identical copy. */
- memcpy(new->map, old->map, sizeof(old->map[0]) * old->num);
- new->num = old->num;
-
- /* Now append new entry. */
- new->map[new->num].addr = addr;
- new->map[new->num].event = eventfd_ctx_fdget(fd);
- if (IS_ERR(new->map[new->num].event)) {
- int err = PTR_ERR(new->map[new->num].event);
- kfree(new);
- return err;
- }
- new->num++;
+ /* We re-use the ptrace structure to specify which register to read. */
+ if (get_user(which, input) != 0)
+ return -EFAULT;
/*
- * Now put new one in place: rcu_assign_pointer() is a fancy way of
- * doing "lg->eventfds = new", but it uses memory barriers to make
- * absolutely sure that the contents of "new" written above is nailed
- * down before we actually do the assignment.
+ * We set up the cpu register pointer, and their next read will
+ * actually get the value (instead of running the guest).
*
- * We have to think about these kinds of things when we're operating on
- * live data without locks.
+ * The last argument 'true' says we can access any register.
*/
- rcu_assign_pointer(lg->eventfds, new);
+ cpu->reg_read = lguest_arch_regptr(cpu, which, true);
+ if (!cpu->reg_read)
+ return -ENOENT;
- /*
- * We're not in a big hurry. Wait until no one's looking at old
- * version, then free it.
- */
- synchronize_rcu();
- kfree(old);
-
- return 0;
+ /* And because this is a write() call, we return the length used. */
+ return sizeof(unsigned long) * 2;
}
-/*L:052
- * Receiving notifications from the Guest is usually done by attaching a
- * particular LHCALL_NOTIFY value to an event filedescriptor. The eventfd will
- * become readable when the Guest does an LHCALL_NOTIFY with that value.
- *
- * This is really convenient for processing each virtqueue in a separate
- * thread.
- */
-static int attach_eventfd(struct lguest *lg, const unsigned long __user *input)
+static int setreg(struct lg_cpu *cpu, const unsigned long __user *input)
{
- unsigned long addr, fd;
- int err;
+ unsigned long which, value, *reg;
- if (get_user(addr, input) != 0)
+ /* We re-use the ptrace structure to specify which register to read. */
+ if (get_user(which, input) != 0)
return -EFAULT;
input++;
- if (get_user(fd, input) != 0)
+ if (get_user(value, input) != 0)
return -EFAULT;
- /*
- * Just make sure two callers don't add eventfds at once. We really
- * only need to lock against callers adding to the same Guest, so using
- * the Big Lguest Lock is overkill. But this is setup, not a fast path.
- */
- mutex_lock(&lguest_lock);
- err = add_eventfd(lg, addr, fd);
- mutex_unlock(&lguest_lock);
+ /* The last argument 'false' means we can't access all registers. */
+ reg = lguest_arch_regptr(cpu, which, false);
+ if (!reg)
+ return -ENOENT;
- return err;
+ *reg = value;
+
+ /* And because this is a write() call, we return the length used. */
+ return sizeof(unsigned long) * 3;
}
/*L:050
@@ -194,6 +81,23 @@ static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input)
return 0;
}
+/*L:053
+ * Deliver a trap: this is used by the Launcher if it can't emulate
+ * an instruction.
+ */
+static int trap(struct lg_cpu *cpu, const unsigned long __user *input)
+{
+ unsigned long trapnum;
+
+ if (get_user(trapnum, input) != 0)
+ return -EFAULT;
+
+ if (!deliver_trap(cpu, trapnum))
+ return -EINVAL;
+
+ return 0;
+}
+
/*L:040
* Once our Guest is initialized, the Launcher makes it run by reading
* from /