/* Copyright (C) 2009 Red Hat, Inc.
* Author: Michael S. Tsirkin <mst@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2.
*
* virtio-net server in host kernel.
*/
#include <linux/compat.h>
#include <linux/eventfd.h>
#include <linux/vhost.h>
#include <linux/virtio_net.h>
#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/mutex.h>
#include <linux/workqueue.h>
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/sched/clock.h>
#include <linux/sched/signal.h>
#include <linux/vmalloc.h>
#include <linux/net.h>
#include <linux/if_packet.h>
#include <linux/if_arp.h>
#include <linux/if_tun.h>
#include <linux/if_macvlan.h>
#include <linux/if_tap.h>
#include <linux/if_vlan.h>
#include <linux/skb_array.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/xdp.h>
#include "vhost.h"
static int experimental_zcopytx = 1;
module_param(experimental_zcopytx, int, 0444);
MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
" 1 -Enable; 0 - Disable");
/* Max number of bytes transferred before requeueing the job.
* Using this limit prevents one virtqueue from starving others. */
#define VHOST_NET_WEIGHT 0x80000
/* Max number of packets transferred before requeueing the job.
* Using this limit prevents one virtqueue from starving others with small
* pkts.
*/
#define VHOST_NET_PKT_WEIGHT 256
/* MAX number of TX used buffers for outstanding zerocopy */
#define VHOST_MAX_PEND 128
#define VHOST_GOODCOPY_LEN 256
/*
* For transmit, used buffer len is unused; we override it to track buffer
* status internally; used for zerocopy tx only.
*/
/* Lower device DMA failed */
#define VHOST_DMA_FAILED_LEN ((__force __virtio32)3)
/* Lower device DMA done */
#define VHOST_DMA_DONE_LEN ((__force __virtio32)2)
/* Lower device DMA in progress */
#define VHOST_DMA_IN_PROGRESS ((__force __virtio32)1)
/* Buffer unused */
#define VHOST_DMA_CLEAR_LEN ((__force __virtio32)0)
#define VHOST_DMA_IS_DONE(len) ((__force u32)(len) >= (__force u32)VHOST_DMA_DONE_LEN)
enum {
VHOST_NET_FEATURES = VHOST_FEATURES |
(1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
(1ULL << VIRTIO_NET_F_MRG_RXBUF) |
(1ULL << VIRTIO_F_IOMMU_PLATFORM)
};
enum {
VHOST_NET_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)
};
enum {
VHOST_NET_VQ_RX = 0,
VHOST_NET_VQ_TX = 1,
VHOST_NET_VQ_MAX = 2,
};
struct vhost_net_ubuf_ref {
/* refcount follows semantics similar to kref:
* 0: object is released
* 1: no outstanding ubufs
* >1: outstanding ubufs
*/
atomic_t refcount;
wait_queue_head_t wait;
struct vhost_virtqueue *vq;
};
#define VHOST_NET_BATCH 64
struct vhost_net_buf {
void **queue;
int tail;
int head;
};
struct vhost_net_virtqueue {
struct vhost_virtqueue vq;
size_t vhost_hlen;
size_t sock_hlen;
/* vhost zerocopy support fields below: */
/* last used idx for outstanding DMA zerocopy buffers */
int upend_idx;
/* For TX, first used idx for DMA done zerocopy buffers
* For RX, number of batched heads
*/
int done_idx;
/* an array of userspace buffers info */
struct ubuf_info *ubuf_info;
/* Reference counting for outstanding ubufs.
* Protected by vq mutex. Writers must also take device mutex. */
struct vhost_net_ubuf_ref *ubufs;
struct ptr_ring *rx_ring;
struct vhost_net_buf rxq;
};
struct vhost_net {
struct vhost_dev dev;
struct vhost_net_virtqueue vqs[VHOST_NET_VQ_MAX];
struct vhost_poll poll[VHOST_NET_VQ_MAX];
/* Number of TX recently submitted.
* Protected by tx vq lock. */
unsigned tx_packets;
/* Number of times zerocopy TX recently failed.
* Protected by tx vq lock. */
unsigned tx_zcopy_err;
/* Flush in progress. Protected by tx vq lock. */
bool tx_flush;
};
static unsigned vhost_net_zcopy_mask __read_mostly;
static void *vhost_net_buf_get_ptr(struct vhost_net_buf *rxq)
{
if (rxq->tail != rxq->head)
return rxq->queue[rxq->head];
else
return NULL;
}
static int vhost_net_buf_get_size(struct vhost_net_buf *rxq)
{
return rxq->tail - rxq->head;
}
static int vhost_net_buf_is_empty(struct vhost_net_buf *rxq)
{
return rxq->tail == rxq->head;
}
static void *vhost_net_buf_consume(struct vhost_net_buf *rxq)
{
void *ret = vhost_net_buf_get_ptr(rxq);
|