From 50babca5da1441ff33a5c9df0f749eea154ee7bd Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 1 Aug 2008 16:55:49 +0100 Subject: [PATCH] [virtio] Add legacy driver for KVM virtio-net This patch adds support for the virtio-net adapter provided by KVM. Written by Laurent Vivier for Etherboot. Wrapped as legacy driver for gPXE by Stefan Hajnoczi . --- src/drivers/net/virtio-net.c | 492 ++++++++++++++++++++++++++++++++++ src/drivers/net/virtio-net.h | 44 +++ src/drivers/net/virtio-pci.h | 94 +++++++ src/drivers/net/virtio-ring.h | 93 +++++++ 4 files changed, 723 insertions(+) create mode 100644 src/drivers/net/virtio-net.c create mode 100644 src/drivers/net/virtio-net.h create mode 100644 src/drivers/net/virtio-pci.h create mode 100644 src/drivers/net/virtio-ring.h diff --git a/src/drivers/net/virtio-net.c b/src/drivers/net/virtio-net.c new file mode 100644 index 00000000..4ec154df --- /dev/null +++ b/src/drivers/net/virtio-net.c @@ -0,0 +1,492 @@ +/* virtio-net.c - etherboot driver for virtio network interface + * + * (c) Copyright 2008 Bull S.A.S. + * + * Author: Laurent Vivier + * + * some parts from Linux Virtio PCI driver + * + * Copyright IBM Corp. 2007 + * Authors: Anthony Liguori + * + * some parts from Linux Virtio Ring + * + * Copyright Rusty Russell IBM Corporation 2007 + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + * + */ + +#include "etherboot.h" +#include "nic.h" +#include "virtio-ring.h" +#include "virtio-pci.h" +#include "virtio-net.h" + +#define BUG() do { \ + printf("BUG: failure at %s:%d/%s()!\n", \ + __FILE__, __LINE__, __FUNCTION__); \ + while(1); \ +} while (0) +#define BUG_ON(condition) do { if (condition) BUG(); } while (0) + +/* Ethernet header */ + +struct eth_hdr { + unsigned char dst_addr[ETH_ALEN]; + unsigned char src_addr[ETH_ALEN]; + unsigned short type; +}; + +struct eth_frame { + struct eth_hdr hdr; + unsigned char data[ETH_FRAME_LEN]; +}; + +typedef unsigned char virtio_queue_t[PAGE_MASK + vring_size(MAX_QUEUE_NUM)]; + +/* TX: virtio header and eth buffer */ + +static struct virtio_net_hdr tx_virtio_hdr; +static struct eth_frame tx_eth_frame; + +/* RX: virtio headers and buffers */ + +#define RX_BUF_NB 6 +static struct virtio_net_hdr rx_hdr[RX_BUF_NB]; +static unsigned char rx_buffer[RX_BUF_NB][ETH_FRAME_LEN]; + +/* virtio queues and vrings */ + +enum { + RX_INDEX = 0, + TX_INDEX, + QUEUE_NB +}; + +static virtio_queue_t queue[QUEUE_NB]; +static struct vring vring[QUEUE_NB]; +static u16 free_head[QUEUE_NB]; +static u16 last_used_idx[QUEUE_NB]; +static u16 vdata[QUEUE_NB][MAX_QUEUE_NUM]; + +/* + * Virtio PCI interface + * + */ + +static int vp_find_vq(struct nic *nic, int queue_index) +{ + struct vring * vr = &vring[queue_index]; + u16 num; + + /* select the queue */ + + outw(queue_index, nic->ioaddr + VIRTIO_PCI_QUEUE_SEL); + + /* check if the queue is available */ + + num = inw(nic->ioaddr + VIRTIO_PCI_QUEUE_NUM); + if (!num) { + printf("ERROR: queue size is 0\n"); + return -1; + } + + if (num > MAX_QUEUE_NUM) { + printf("ERROR: queue size %d > %d\n", num, MAX_QUEUE_NUM); + return -1; + } + + /* check if the queue is already active */ + + if (inl(nic->ioaddr + VIRTIO_PCI_QUEUE_PFN)) { + printf("ERROR: queue already active\n"); + return -1; + } + + /* initialize the queue */ + + vring_init(vr, num, (unsigned char*)&queue[queue_index]); + + /* activate the queue + * + * NOTE: vr->desc is initialized by vring_init() + */ + + outl((unsigned long)virt_to_phys(vr->desc) >> PAGE_SHIFT, + nic->ioaddr + VIRTIO_PCI_QUEUE_PFN); + + return num; +} + +/* + * Virtual ring management + * + */ + +static void vring_enable_cb(int queue_index) +{ + vring[queue_index].avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; +} + +static void vring_disable_cb(int queue_index) +{ + vring[queue_index].avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; +} + +/* + * vring_free + * + * put at the begin of the free list the current desc[head] + */ + +static void vring_detach(int queue_index, unsigned int head) +{ + struct vring *vr = &vring[queue_index]; + unsigned int i; + + /* find end of given descriptor */ + + i = head; + while (vr->desc[i].flags & VRING_DESC_F_NEXT) + i = vr->desc[i].next; + + /* link it with free list and point to it */ + + vr->desc[i].next = free_head[queue_index]; + wmb(); + free_head[queue_index] = head; +} + +/* + * vring_more_used + * + * is there some used buffers ? + * + */ + +static inline int vring_more_used(int queue_index) +{ + wmb(); + return last_used_idx[queue_index] != vring[queue_index].used->idx; +} + +/* + * vring_get_buf + * + * get a buffer from the used list + * + */ + +static int vring_get_buf(int queue_index, unsigned int *len) +{ + struct vring *vr = &vring[queue_index]; + struct vring_used_elem *elem; + u32 id; + int ret; + + elem = &vr->used->ring[last_used_idx[queue_index] % vr->num]; + wmb(); + id = elem->id; + if (len != NULL) + *len = elem->len; + + ret = vdata[queue_index][id]; + + vring_detach(queue_index, id); + + last_used_idx[queue_index]++; + + return ret; +} + +static void vring_add_buf(int queue_index, int index, int num_added) +{ + struct vring *vr = &vring[queue_index]; + int i, avail, head; + + BUG_ON(queue_index >= QUEUE_NB); + + head = free_head[queue_index]; + i = head; + + if (queue_index == TX_INDEX) { + + BUG_ON(index != 0); + + /* add header into vring */ + + vr->desc[i].flags = VRING_DESC_F_NEXT; + vr->desc[i].addr = (u64)virt_to_phys(&tx_virtio_hdr); + vr->desc[i].len = sizeof(struct virtio_net_hdr); + i = vr->desc[i].next; + + /* add frame buffer into vring */ + + vr->desc[i].flags = 0; + vr->desc[i].addr = (u64)virt_to_phys(&tx_eth_frame); + vr->desc[i].len = ETH_FRAME_LEN; + i = vr->desc[i].next; + + } else if (queue_index == RX_INDEX) { + + BUG_ON(index >= RX_BUF_NB); + + /* add header into vring */ + + vr->desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; + vr->desc[i].addr = (u64)virt_to_phys(&rx_hdr[index]); + vr->desc[i].len = sizeof(struct virtio_net_hdr); + i = vr->desc[i].next; + + /* add frame buffer into vring */ + + vr->desc[i].flags = VRING_DESC_F_WRITE; + vr->desc[i].addr = (u64)virt_to_phys(&rx_buffer[index]); + vr->desc[i].len = ETH_FRAME_LEN; + i = vr->desc[i].next; + } + + free_head[queue_index] = i; + + vdata[queue_index][head] = index; + + avail = (vr->avail->idx + num_added) % vr->num; + vr->avail->ring[avail] = head; + wmb(); +} + +static void vring_kick(struct nic *nic, int queue_index, int num_added) +{ + struct vring *vr = &vring[queue_index]; + + wmb(); + vr->avail->idx += num_added; + + mb(); + if (!(vr->used->flags & VRING_USED_F_NO_NOTIFY)) + vp_notify(nic, queue_index); +} + +/* + * virtnet_disable + * + * Turn off ethernet interface + * + */ + +static void virtnet_disable(struct nic *nic) +{ + int i; + + for (i = 0; i < QUEUE_NB; i++) { + vring_disable_cb(i); + vp_del_vq(nic, i); + } + vp_reset(nic); +} + +/* + * virtnet_poll + * + * Wait for a frame + * + * return true if there is a packet ready to read + * + * nic->packet should contain data on return + * nic->packetlen should contain length of data + * + */ +static int virtnet_poll(struct nic *nic, int retrieve) +{ + unsigned int len; + u16 token; + struct virtio_net_hdr *hdr; + + if (!vring_more_used(RX_INDEX)) + return 0; + + if (!retrieve) + return 1; + + token = vring_get_buf(RX_INDEX, &len); + + BUG_ON(len > sizeof(struct virtio_net_hdr) + ETH_FRAME_LEN); + + hdr = &rx_hdr[token]; /* FIXME: check flags */ + len -= sizeof(struct virtio_net_hdr); + + nic->packetlen = len; + memcpy(nic->packet, (char *)rx_buffer[token], nic->packetlen); + + /* add buffer to desc */ + + vring_add_buf(RX_INDEX, token, 0); + vring_kick(nic, RX_INDEX, 1); + + return 1; +} + +/* + * + * virtnet_transmit + * + * Transmit a frame + * + */ + +static void virtnet_transmit(struct nic *nic, const char *destaddr, + unsigned int type, unsigned int len, const char *data) +{ + /* + * from http://www.etherboot.org/wiki/dev/devmanual : + * "You do not need more than one transmit buffer." + */ + + /* FIXME: initialize header according to vp_get_features() */ + + tx_virtio_hdr.flags = 0; + tx_virtio_hdr.csum_offset = 0; + tx_virtio_hdr.csum_start = 0; + tx_virtio_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE; + tx_virtio_hdr.gso_size = 0; + tx_virtio_hdr.hdr_len = 0; + + /* add ethernet frame into vring */ + + BUG_ON(len > sizeof(tx_eth_frame.data)); + + memcpy(tx_eth_frame.hdr.dst_addr, destaddr, ETH_ALEN); + memcpy(tx_eth_frame.hdr.src_addr, nic->node_addr, ETH_ALEN); + tx_eth_frame.hdr.type = htons(type); + memcpy(tx_eth_frame.data, data, len); + + vring_add_buf(TX_INDEX, 0, 0); + + /* + * http://www.etherboot.org/wiki/dev/devmanual + * + * "You should ensure the packet is fully transmitted + * before returning from this routine" + */ + + while (vring_more_used(TX_INDEX)) { + mb(); + udelay(10); + } + + vring_kick(nic, TX_INDEX, 1); + + /* free desc */ + + (void)vring_get_buf(TX_INDEX, NULL); +} + +static void virtnet_irq(struct nic *nic __unused, irq_action_t action) +{ + switch ( action ) { + case DISABLE : + vring_disable_cb(RX_INDEX); + vring_disable_cb(TX_INDEX); + break; + case ENABLE : + vring_enable_cb(RX_INDEX); + vring_enable_cb(TX_INDEX); + break; + case FORCE : + break; + } +} + +static void provide_buffers(struct nic *nic) +{ + int i; + + for (i = 0; i < RX_BUF_NB; i++) + vring_add_buf(RX_INDEX, i, i); + + /* nofify */ + + vring_kick(nic, RX_INDEX, i); +} + +static struct nic_operations virtnet_operations = { + .connect = dummy_connect, + .poll = virtnet_poll, + .transmit = virtnet_transmit, + .irq = virtnet_irq, +}; + +/* + * virtnet_probe + * + * Look for a virtio network adapter + * + */ + +static int virtnet_probe(struct nic *nic, struct pci_device *pci) +{ + u32 features; + int i; + + /* Mask the bit that says "this is an io addr" */ + + nic->ioaddr = pci->ioaddr & ~3; + + /* Copy IRQ from PCI information */ + + nic->irqno = pci->irq; + + printf("I/O address 0x%08x, IRQ #%d\n", nic->ioaddr, nic->irqno); + + adjust_pci_device(pci); + + vp_reset(nic); + + features = vp_get_features(nic); + if (features & (1 << VIRTIO_NET_F_MAC)) { + vp_get(nic, offsetof(struct virtio_net_config, mac), + nic->node_addr, ETH_ALEN); + printf("MAC address "); + for (i = 0; i < ETH_ALEN; i++) { + printf("%02x%c", nic->node_addr[i], + (i == ETH_ALEN - 1) ? '\n' : ':'); + } + } + + /* initialize emit/receive queue */ + + for (i = 0; i < QUEUE_NB; i++) { + free_head[i] = 0; + last_used_idx[i] = 0; + memset((char*)&queue[i], 0, sizeof(queue[i])); + if (vp_find_vq(nic, i) == -1) + printf("Cannot register queue #%d\n", i); + } + + /* provide some receive buffers */ + + provide_buffers(nic); + + /* define NIC interface */ + + nic->nic_op = &virtnet_operations; + + /* driver is ready */ + + vp_set_features(nic, features & (1 << VIRTIO_NET_F_MAC)); + vp_set_status(nic, VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK); + + return 1; +} + +static struct pci_device_id virtnet_nics[] = { +PCI_ROM(0x1af4, 0x1000, "virtio-net", "Virtio Network Interface"), +}; + +PCI_DRIVER ( virtnet_driver, virtnet_nics, PCI_NO_CLASS ); + +DRIVER ( "VIRTIO-NET", nic_driver, pci_driver, virtnet_driver, + virtnet_probe, virtnet_disable ); diff --git a/src/drivers/net/virtio-net.h b/src/drivers/net/virtio-net.h new file mode 100644 index 00000000..3abef28e --- /dev/null +++ b/src/drivers/net/virtio-net.h @@ -0,0 +1,44 @@ +#ifndef _VIRTIO_NET_H_ +# define _VIRTIO_NET_H_ + +/* The feature bitmap for virtio net */ +#define VIRTIO_NET_F_CSUM 0 /* Host handles pkts w/ partial csum */ +#define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */ +#define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */ +#define VIRTIO_NET_F_GSO 6 /* Host handles pkts w/ any GSO type */ +#define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */ +#define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */ +#define VIRTIO_NET_F_GUEST_ECN 9 /* Guest can handle TSO[6] w/ ECN in. */ +#define VIRTIO_NET_F_GUEST_UFO 10 /* Guest can handle UFO in. */ +#define VIRTIO_NET_F_HOST_TSO4 11 /* Host can handle TSOv4 in. */ +#define VIRTIO_NET_F_HOST_TSO6 12 /* Host can handle TSOv6 in. */ +#define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */ +#define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */ + +struct virtio_net_config +{ + /* The config defining mac address (if VIRTIO_NET_F_MAC) */ + u8 mac[6]; +} __attribute__((packed)); + +/* This is the first element of the scatter-gather list. If you don't + * specify GSO or CSUM features, you can simply ignore the header. */ + +struct virtio_net_hdr +{ +#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 // Use csum_start, csum_offset + uint8_t flags; +#define VIRTIO_NET_HDR_GSO_NONE 0 // Not a GSO frame +#define VIRTIO_NET_HDR_GSO_TCPV4 1 // GSO frame, IPv4 TCP (TSO) +/* FIXME: Do we need this? If they said they can handle ECN, do they care? */ +#define VIRTIO_NET_HDR_GSO_TCPV4_ECN 2 // GSO frame, IPv4 TCP w/ ECN +#define VIRTIO_NET_HDR_GSO_UDP 3 // GSO frame, IPv4 UDP (UFO) +#define VIRTIO_NET_HDR_GSO_TCPV6 4 // GSO frame, IPv6 TCP +#define VIRTIO_NET_HDR_GSO_ECN 0x80 // TCP has ECN set + uint8_t gso_type; + uint16_t hdr_len; + uint16_t gso_size; + uint16_t csum_start; + uint16_t csum_offset; +}; +#endif /* _VIRTIO_NET_H_ */ diff --git a/src/drivers/net/virtio-pci.h b/src/drivers/net/virtio-pci.h new file mode 100644 index 00000000..ba0604d5 --- /dev/null +++ b/src/drivers/net/virtio-pci.h @@ -0,0 +1,94 @@ +#ifndef _VIRTIO_PCI_H_ +# define _VIRTIO_PCI_H_ + +/* A 32-bit r/o bitmask of the features supported by the host */ +#define VIRTIO_PCI_HOST_FEATURES 0 + +/* A 32-bit r/w bitmask of features activated by the guest */ +#define VIRTIO_PCI_GUEST_FEATURES 4 + +/* A 32-bit r/w PFN for the currently selected queue */ +#define VIRTIO_PCI_QUEUE_PFN 8 + +/* A 16-bit r/o queue size for the currently selected queue */ +#define VIRTIO_PCI_QUEUE_NUM 12 + +/* A 16-bit r/w queue selector */ +#define VIRTIO_PCI_QUEUE_SEL 14 + +/* A 16-bit r/w queue notifier */ +#define VIRTIO_PCI_QUEUE_NOTIFY 16 + +/* An 8-bit device status register. */ +#define VIRTIO_PCI_STATUS 18 + +/* An 8-bit r/o interrupt status register. Reading the value will return the + * current contents of the ISR and will also clear it. This is effectively + * a read-and-acknowledge. */ +#define VIRTIO_PCI_ISR 19 + +/* The bit of the ISR which indicates a device configuration change. */ +#define VIRTIO_PCI_ISR_CONFIG 0x2 + +/* The remaining space is defined by each driver as the per-driver + * configuration space */ +#define VIRTIO_PCI_CONFIG 20 + +/* Virtio ABI version, this must match exactly */ +#define VIRTIO_PCI_ABI_VERSION 0 + +static inline u32 vp_get_features(struct nic *nic) +{ + return inl(nic->ioaddr + VIRTIO_PCI_HOST_FEATURES); +} + +static inline void vp_set_features(struct nic *nic, u32 features) +{ + outl(features, nic->ioaddr + VIRTIO_PCI_GUEST_FEATURES); +} + +static inline void vp_get(struct nic *nic, unsigned offset, + void *buf, unsigned len) +{ + u8 *ptr = buf; + unsigned i; + + for (i = 0; i < len; i++) + ptr[i] = inb(nic->ioaddr + VIRTIO_PCI_CONFIG + offset + i); +} + +static inline u8 vp_get_status(struct nic *nic) +{ + return inb(nic->ioaddr + VIRTIO_PCI_STATUS); +} + +static inline void vp_set_status(struct nic *nic, u8 status) +{ + if (status == 0) /* reset */ + return; + outb(status, nic->ioaddr + VIRTIO_PCI_STATUS); +} + + +static inline void vp_reset(struct nic *nic) +{ + outb(0, nic->ioaddr + VIRTIO_PCI_STATUS); + (void)inb(nic->ioaddr + VIRTIO_PCI_ISR); +} + +static inline void vp_notify(struct nic *nic, int queue_index) +{ + outw(queue_index, nic->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY); +} + +static inline void vp_del_vq(struct nic *nic, int queue_index) +{ + /* select the queue */ + + outw(queue_index, nic->ioaddr + VIRTIO_PCI_QUEUE_SEL); + + /* deactivate the queue */ + + outl(0, nic->ioaddr + VIRTIO_PCI_QUEUE_PFN); +} +#endif /* _VIRTIO_PCI_H_ */ diff --git a/src/drivers/net/virtio-ring.h b/src/drivers/net/virtio-ring.h new file mode 100644 index 00000000..33060b11 --- /dev/null +++ b/src/drivers/net/virtio-ring.h @@ -0,0 +1,93 @@ +#ifndef _VIRTIO_RING_H_ +# define _VIRTIO_RING_H_ +#define PAGE_SHIFT (12) +#define PAGE_SIZE (1<num = num; + + /* physical address of desc must be page aligned */ + + pa = virt_to_phys(queue); + pa = (pa + PAGE_MASK) & ~PAGE_MASK; + vr->desc = phys_to_virt(pa); + + vr->avail = (struct vring_avail *)&vr->desc[num]; + + /* physical address of used must be page aligned */ + + pa = virt_to_phys(&vr->avail->ring[num]); + pa = (pa + PAGE_MASK) & ~PAGE_MASK; + vr->used = phys_to_virt(pa); + + for (i = 0; i < num - 1; i++) + vr->desc[i].next = i + 1; + vr->desc[i].next = 0; +} + +#define vring_size(num) \ + (((((sizeof(struct vring_desc) * num) + \ + (sizeof(struct vring_avail) + sizeof(u16) * num)) \ + + PAGE_MASK) & ~PAGE_MASK) + \ + (sizeof(struct vring_used) + sizeof(struct vring_used_elem) * num)) +#endif /* _VIRTIO_RING_H_ */