From: Arne Welzel Date: Sat, 15 Dec 2012 12:47:37 +0000 (+0100) Subject: virtio: generic virtio library X-Git-Tag: v3.2.1~89 X-Git-Url: http://zhaoyanbai.com/repos/%22http:/www.isc.org/icons/mdoc.3.txt?a=commitdiff_plain;h=a58c7357381876fdbfd571b75ba38ce6b6e70fc5;p=minix.git virtio: generic virtio library --- diff --git a/include/minix/Makefile b/include/minix/Makefile index 0dd522a2d..4aaea7319 100644 --- a/include/minix/Makefile +++ b/include/minix/Makefile @@ -20,6 +20,7 @@ INCS+= acpi.h audio_fw.h bitmap.h \ syslib.h sysutil.h termios.h timers.h type.h \ tty.h u64.h usb.h usb_ch9.h vbox.h \ vboxfs.h vboxif.h vboxtype.h vm.h \ - vfsif.h vtreefs.h libminixfs.h netsock.h + vfsif.h vtreefs.h libminixfs.h netsock.h \ + virtio.h .include diff --git a/include/minix/virtio.h b/include/minix/virtio.h new file mode 100644 index 000000000..ee40921a6 --- /dev/null +++ b/include/minix/virtio.h @@ -0,0 +1,139 @@ +/* + * Generic virtio library for MINIX 3 + * + * Copyright (c) 2013, A. Welzel, + * + * This software is released under the BSD license. See the LICENSE file + * included in the main directory of this source distribution for the + * license terms and conditions. + */ + +#ifndef _MINIX_VIRTIO_H +#define _MINIX_VIRTIO_H 1 + +#include + +#define VIRTIO_VENDOR_ID 0x1AF4 + +#define VIRTIO_HOST_F_OFF 0x0000 +#define VIRTIO_GUEST_F_OFF 0x0004 +#define VIRTIO_QADDR_OFF 0x0008 + +#define VIRTIO_QSIZE_OFF 0x000C +#define VIRTIO_QSEL_OFF 0x000E +#define VIRTIO_QNOTFIY_OFF 0x0010 + +#define VIRTIO_DEV_STATUS_OFF 0x0012 +#define VIRTIO_ISR_STATUS_OFF 0x0013 +#define VIRTIO_DEV_SPECIFIC_OFF 0x0014 +/* if msi is enabled, device specific headers shift by 4 */ +#define VIRTIO_MSI_ADD_OFF 0x0004 +#define VIRTIO_STATUS_ACK 0x01 +#define VIRTIO_STATUS_DRV 0x02 +#define VIRTIO_STATUS_DRV_OK 0x04 +#define VIRTIO_STATUS_FAIL 0x80 + + +/* Feature description */ +struct virtio_feature { + const char *name; + u8_t bit; + u8_t host_support; + u8_t guest_support; +}; + +/* Forward declaration of struct virtio_device. + * + * This structure is opaque to the caller. + */ +struct virtio_device; + +/* Find a virtio device with subdevice id subdevid. Returns a pointer + * to an opaque virtio_device instance. + */ +struct virtio_device *virtio_setup_device(u16_t subdevid, + const char *name, + struct virtio_feature *features, + int feature_count, + int threads, int skip); + +/* Attempt to allocate queue_cnt memory for queues */ +int virtio_alloc_queues(struct virtio_device *dev, int num_queues); + +/* Register the IRQ policy and indicate to the host we are ready to go */ +void virtio_device_ready(struct virtio_device *dev); + +/* Unregister the IRQ and reset the device */ +void virtio_reset_device(struct virtio_device *dev); + +/* Free the memory used by all queues */ +void virtio_free_queues(struct virtio_device *dev); + +/* Free all memory allocated for the device (except the queue memory, + * which has to be freed before with virtio_free_queues()). + * + * Don't touch the device afterwards! This is like free(dev). + */ +void virtio_free_device(struct virtio_device *dev); + + +/* Feature helpers */ +int virtio_guest_supports(struct virtio_device *dev, int bit); +int virtio_host_supports(struct virtio_device *dev, int bit); + +/* + * Use num vumap_phys elements and chain these as vring_desc elements + * into the vring. + * + * Kick the queue if needed. + * + * data is opaque and returned by virtio_from_queue() when the host + * processed the descriptor chain. + * + * Note: The last bit of vp_addr is used to flag whether an iovec is + * writable. This implies that only word aligned buffers can be + * used. + */ +int virtio_to_queue(struct virtio_device *dev, int qidx, + struct vumap_phys *bufs, size_t num, void *data); + +/* + * If the host used a chain of descriptors, return 0 and set data + * as was given to virtio_to_queue(). If the host has not processed + * any element returns -1. + */ +int virtio_from_queue(struct virtio_device *dev, int qidx, void **data); + +/* IRQ related functions */ +void virtio_irq_enable(struct virtio_device *dev); +void virtio_irq_disable(struct virtio_device *dev); + +/* Checks the ISR field of the device and returns true if + * the interrupt was for this device. + */ +int virtio_had_irq(struct virtio_device *dev); + + +u32_t virtio_read32(struct virtio_device *dev, off_t offset); +u16_t virtio_read16(struct virtio_device *dev, off_t offset); +u8_t virtio_read8(struct virtio_device *dev, off_t offset); +void virtio_write32(struct virtio_device *dev, off_t offset, u32_t val); +void virtio_write16(struct virtio_device *dev, off_t offset, u16_t val); +void virtio_write8(struct virtio_device *dev, off_t offset, u8_t val); + + +/* + * Device specific reads take MSI offset into account and all reads + * are at offset 20. + * + * Something like: + * read(off) --> readX(20 + (msi ? 4 : 0) + off) + */ +u32_t virtio_sread32(struct virtio_device *dev, off_t offset); +u16_t virtio_sread16(struct virtio_device *dev, off_t offset); +u8_t virtio_sread8(struct virtio_device *dev, off_t offset); +void virtio_swrite32(struct virtio_device *dev, off_t offset, u32_t val); +void virtio_swrite16(struct virtio_device *dev, off_t offset, u16_t val); +void virtio_swrite8(struct virtio_device *dev, off_t offset, u8_t val); + +#endif /* _MINIX_VIRTIO_H */ diff --git a/lib/Makefile b/lib/Makefile index 777c4d121..d354e13e2 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -46,7 +46,7 @@ SUBDIR+= libcompat_minix libblockdriver libchardriver \ libnetsock libpuffs libsffs .if (${MACHINE} == "i386") -SUBDIR += libvassert libhgfs libvboxfs +SUBDIR += libvassert libhgfs libvboxfs libvirtio .endif .if (${MKRUMP} != "no") diff --git a/lib/libvirtio/Makefile b/lib/libvirtio/Makefile new file mode 100644 index 000000000..055c6a4e9 --- /dev/null +++ b/lib/libvirtio/Makefile @@ -0,0 +1,8 @@ +# Makefile for libvirtio +.include + +LIB= virtio + +SRCS= virtio.c + +.include diff --git a/lib/libvirtio/virtio.c b/lib/libvirtio/virtio.c new file mode 100644 index 000000000..4fb77a6ab --- /dev/null +++ b/lib/libvirtio/virtio.c @@ -0,0 +1,902 @@ +/* + * Generic virtio library for MINIX 3 + * + * Copyright (c) 2013, A. Welzel, + * + * This software is released under the BSD license. See the LICENSE file + * included in the main directory of this source distribution for the + * license terms and conditions. + */ + +#define _SYSTEM 1 + +#include +#include /* for OK... */ +#include /* memset() */ +#include /* malloc() */ + +#include /* PCI_ILR, PCI_BAR... */ +#include /* PAGE_SIZE */ + +#include /* umap, vumap, alloc_..*/ +#include /* panic(), at least */ +#include /* virtio system include */ + +#include "virtio_ring.h" /* virtio types / helper */ + +/* + * About indirect descriptors: + * + * For each possible thread, a single indirect descriptor table is allocated. + * If using direct descriptors would lead to the situation that another thread + * might not be able to add another descriptor to the ring, indirect descriptors + * are used. + * + * Indirect descriptors are pre-allocated. Each alloc_contig() call involves a + * kernel call which is critical for performance. + * + * The size of indirect descriptor tables is chosen based on MAPVEC_NR. A driver + * using this library should never add more than + * + * MAPVEC_NR + MAPVEC_NR / 2 + * + * descriptors to a queue as this represent the maximum size of an indirect + * descriptor table. + */ + +struct indirect_desc_table { + int in_use; + struct vring_desc *descs; + phys_bytes paddr; + size_t len; +}; + +struct virtio_queue { + + void *vaddr; /* virtual addr of ring */ + phys_bytes paddr; /* physical addr of ring */ + u32_t page; /* physical guest page */ + + u16_t num; /* number of descriptors */ + u32_t ring_size; /* size of ring in bytes */ + struct vring vring; + + u16_t free_num; /* free descriptors */ + u16_t free_head; /* next free descriptor */ + u16_t free_tail; /* last free descriptor */ + u16_t last_used; /* we checked in used */ + + void **data; /* points to pointers */ +}; + +struct virtio_device { + + const char *name; /* for debugging */ + + u16_t port; /* io port */ + + struct virtio_feature *features; /* host / guest features */ + u8_t num_features; /* max 32 */ + + struct virtio_queue *queues; /* our queues */ + u16_t num_queues; + + int irq; /* interrupt line */ + int irq_hook; /* hook id */ + int msi; /* is MSI enabled? */ + + int threads; /* max number of threads */ + + struct indirect_desc_table *indirect; /* indirect descriptor tables */ + int num_indirect; +}; + +static int is_matching_device(u16_t expected_sdid, u16_t vid, u16_t sdid); +static int init_device(int devind, struct virtio_device *dev); +static int init_phys_queues(struct virtio_device *dev); +static int exchange_features(struct virtio_device *dev); +static int alloc_phys_queue(struct virtio_queue *q); +static void free_phys_queue(struct virtio_queue *q); +static void init_phys_queue(struct virtio_queue *q); +static int init_indirect_desc_table(struct indirect_desc_table *desc); +static int init_indirect_desc_tables(struct virtio_device *dev); +static void virtio_irq_register(struct virtio_device *dev); +static void virtio_irq_unregister(struct virtio_device *dev); +static int wants_kick(struct virtio_queue *q); +static void kick_queue(struct virtio_device *dev, int qidx); + +struct virtio_device * +virtio_setup_device(u16_t subdevid, const char *name, + struct virtio_feature *features, int num_features, + int threads, int skip) +{ + int r, devind; + u16_t vid, did, sdid; + struct virtio_device *ret; + + /* bogus values? */ + if (skip < 0 || name == NULL || num_features < 0 || threads <= 0) + return NULL; + + pci_init(); + + r = pci_first_dev(&devind, &vid, &did); + + while (r > 0) { + sdid = pci_attr_r16(devind, PCI_SUBDID); + if (is_matching_device(subdevid, vid, sdid)) { + + /* this is the device we are looking for */ + if (skip == 0) + break; + + skip--; + } + + r = pci_next_dev(&devind, &vid, &did); + } + + /* pci_[first|next_dev()] return 0 if no device was found */ + if (r == 0 || skip > 0) + return NULL; + + /* allocate and set known info about the device */ + ret = malloc(sizeof(*ret)); + + if (ret == NULL) + return NULL; + + /* Prepare virtio_device intance */ + memset(ret, 0, sizeof(*ret)); + ret->name = name; + ret->features = features; + ret->num_features = num_features; + ret->threads = threads; + /* see comment in the beginning of this file */ + ret->num_indirect = threads; + + if (init_device(devind, ret) != OK) { + printf("%s: Could not initialize device\n", ret->name); + goto err; + } + + /* Ack the device */ + virtio_write8(ret, VIRTIO_DEV_STATUS_OFF, VIRTIO_STATUS_ACK); + + if (exchange_features(ret) != OK) { + printf("%s: Could not exchange features\n", ret->name); + goto err; + } + + if (init_indirect_desc_tables(ret) != OK) { + printf("%s: Could not initialize indirect tables\n", ret->name); + goto err; + } + + /* We know how to drive the device... */ + virtio_write8(ret, VIRTIO_DEV_STATUS_OFF, VIRTIO_STATUS_DRV); + + return ret; + +/* Error path */ +err: + free(ret); + return NULL; +} + +static int +init_device(int devind, struct virtio_device *dev) +{ + u32_t base, size; + int iof, r; + + pci_reserve(devind); + + if ((r = pci_get_bar(devind, PCI_BAR, &base, &size, &iof)) != OK) { + printf("%s: Could not get BAR (%d)", dev->name, r); + return r; + } + + if (!iof) { + printf("%s: PCI not IO space?", dev->name); + return EINVAL; + } + + if (base & 0xFFFF0000) { + printf("%s: IO port weird (%08x)", dev->name, base); + return EINVAL; + } + + /* store the I/O port */ + dev->port = base; + + /* Reset the device */ + virtio_write8(dev, VIRTIO_DEV_STATUS_OFF, 0); + + /* Read IRQ line */ + dev->irq = pci_attr_r8(devind, PCI_ILR); + + return OK; +} + +static int +exchange_features(struct virtio_device *dev) +{ + u32_t guest_features = 0, host_features = 0; + struct virtio_feature *f; + + host_features = virtio_read32(dev, VIRTIO_HOST_F_OFF); + + for (int i = 0; i < dev->num_features; i++) { + f = &dev->features[i]; + + /* prepare the features the driver supports */ + guest_features |= (f->guest_support << f->bit); + + /* just load the host feature int the struct */ + f->host_support = ((host_features >> f->bit) & 1); + } + + /* let the device know about our features */ + virtio_write32(dev, VIRTIO_GUEST_F_OFF, guest_features); + + return OK; +} + +int +virtio_alloc_queues(struct virtio_device *dev, int num_queues) +{ + int r = OK; + + assert(dev != NULL); + + /* Assume there's no device with more than 256 queues */ + if (num_queues < 0 || num_queues > 256) + return EINVAL; + + dev->num_queues = num_queues; + /* allocate queue memory */ + dev->queues = malloc(num_queues * sizeof(dev->queues[0])); + + if (dev->queues == NULL) + return ENOMEM; + + memset(dev->queues, 0, num_queues * sizeof(dev->queues[0])); + + if ((r = init_phys_queues(dev) != OK)) { + printf("%s: Could not initialize queues (%d)\n", dev->name, r); + free(dev->queues); + dev->queues = NULL; + } + + return r; +} + +static int +init_phys_queues(struct virtio_device *dev) +{ + /* Initialize all queues */ + int i, j, r; + struct virtio_queue *q; + + for (i = 0; i < dev->num_queues; i++) { + q = &dev->queues[i]; + /* select the queue */ + virtio_write16(dev, VIRTIO_QSEL_OFF, i); + q->num = virtio_read16(dev, VIRTIO_QSIZE_OFF); + + if (q->num & (q->num - 1)) { + printf("%s: Queue %d num=%d not ^2", dev->name, i, + q->num); + r = EINVAL; + goto free_phys_queues; + } + + if ((r = alloc_phys_queue(q)) != OK) + goto free_phys_queues; + + init_phys_queue(q); + + /* Let the host know about the guest physical page */ + virtio_write32(dev, VIRTIO_QADDR_OFF, q->page); + } + + return OK; + +/* Error path */ +free_phys_queues: + for (j = 0; j < i; j++) + free_phys_queue(&dev->queues[i]); + + return r; +} + +static int +alloc_phys_queue(struct virtio_queue *q) +{ + assert(q != NULL); + + /* How much memory do we need? */ + q->ring_size = vring_size(q->num, PAGE_SIZE); + + q->vaddr = alloc_contig(q->ring_size, AC_ALIGN4K, &q->paddr); + + if (q->vaddr == NULL) + return ENOMEM; + + q->data = alloc_contig(sizeof(q->data[0]) * q->num, AC_ALIGN4K, NULL); + + if (q->data == NULL) { + free_contig(q->vaddr, q->ring_size); + q->vaddr = NULL; + q->paddr = 0; + return ENOMEM; + } + + return OK; +} + +void +virtio_device_ready(struct virtio_device *dev) +{ + assert(dev != NULL); + + /* Register IRQ line */ + virtio_irq_register(dev); + + /* Driver is ready to go! */ + virtio_write8(dev, VIRTIO_DEV_STATUS_OFF, VIRTIO_STATUS_DRV_OK); +} + +void +virtio_free_queues(struct virtio_device *dev) +{ + int i; + assert(dev != NULL); + assert(dev->queues != NULL); + assert(dev->num_queues > 0); + + for (i = 0; i < dev->num_queues; i++) + free_phys_queue(&dev->queues[i]); + + dev->num_queues = 0; + dev->queues = NULL; +} + +static void +free_phys_queue(struct virtio_queue *q) +{ + assert(q != NULL); + assert(q->vaddr != NULL); + + free_contig(q->vaddr, q->ring_size); + q->vaddr = NULL; + q->paddr = 0; + q->num = 0; + free_contig(q->data, sizeof(q->data[0])); + q->data = NULL; +} + +static void +init_phys_queue(struct virtio_queue *q) +{ + memset(q->vaddr, 0, q->ring_size); + memset(q->data, 0, sizeof(q->data[0]) * q->num); + + /* physical page in guest */ + q->page = q->paddr / PAGE_SIZE; + + /* Set pointers in q->vring according to size */ + vring_init(&q->vring, q->num, q->vaddr, PAGE_SIZE); + + /* Everything's free at this point */ + for (int i = 0; i < q->num; i++) { + q->vring.desc[i].flags = VRING_DESC_F_NEXT; + q->vring.desc[i].next = (i + 1) & (q->num - 1); + } + + q->free_num = q->num; + q->free_head = 0; + q->free_tail = q->num - 1; + q->last_used = 0; + + return; +} + +void +virtio_free_device(struct virtio_device *dev) +{ + int i; + struct indirect_desc_table *desc; + + assert(dev != NULL); + + assert(dev->num_indirect > 0); + + for (i = 0; i < dev->num_indirect; i++) { + desc = &dev->indirect[i]; + free_contig(desc->descs, desc->len); + } + + dev->num_indirect = 0; + + assert(dev->indirect != NULL); + free(dev->indirect); + dev->indirect = NULL; + + free(dev); +} + +static int +init_indirect_desc_table(struct indirect_desc_table *desc) +{ + desc->in_use = 0; + desc->len = (MAPVEC_NR + MAPVEC_NR / 2) * sizeof(struct vring_desc); + + desc->descs = alloc_contig(desc->len, AC_ALIGN4K, &desc->paddr); + memset(desc->descs, 0, desc->len); + + if (desc->descs == NULL) + return ENOMEM; + + return OK; +} + +static int +init_indirect_desc_tables(struct virtio_device *dev) +{ + int i, j, r; + struct indirect_desc_table *desc; + + dev->indirect = malloc(dev->num_indirect * sizeof(dev->indirect[0])); + + if (dev->indirect == NULL) { + printf("%s: Could not allocate indirect tables\n", dev->name); + return ENOMEM; + } + + memset(dev->indirect, 0, dev->num_indirect* sizeof(dev->indirect[0])); + + for (i = 0; i < dev->num_indirect; i++) { + desc = &dev->indirect[i]; + if ((r = init_indirect_desc_table(desc)) != OK) { + + /* error path */ + for (j = 0; j < i; j++) { + desc = &dev->indirect[j]; + free_contig(desc->descs, desc->len); + } + + free(dev->indirect); + + return r; + } + } + + return OK; +} + +static void +clear_indirect_table(struct virtio_device *dev, struct vring_desc *vd) +{ + int i; + struct indirect_desc_table *desc; + + assert(vd->len > 0); + assert(vd->flags & VRING_DESC_F_INDIRECT); + vd->flags = vd->flags & ~VRING_DESC_F_INDIRECT; + vd->len = 0;; + + for (i = 0; i < dev->num_indirect; i++) { + desc = &dev->indirect[i]; + + if (desc->paddr == vd->addr) { + assert(desc->in_use); + desc->in_use = 0; + break; + } + } + + if (i >= dev->num_indirect) + panic("%s: Could not clear indirect descriptor table "); +} + + +static void inline +use_vring_desc(struct vring_desc *vd, struct vumap_phys *vp) +{ + vd->addr = vp->vp_addr & ~1UL; + vd->len = vp->vp_size; + vd->flags = VRING_DESC_F_NEXT; + + if (vp->vp_addr & 1) + vd->flags |= VRING_DESC_F_WRITE; +} + +static void +set_indirect_descriptors(struct virtio_device *dev, struct virtio_queue *q, + struct vumap_phys *bufs, size_t num) +{ + /* Indirect descriptor tables are simply filled from left to right */ + int i; + struct indirect_desc_table *desc; + struct vring *vring = &q->vring; + struct vring_desc *vd, *ivd; + + /* Find the first unused indirect descriptor table */ + for (i = 0; i < dev->num_indirect; i++) { + desc = &dev->indirect[i]; + + /* If an unused indirect descriptor table was found, + * mark it as being used and exit the loop. + */ + if (!desc->in_use) { + desc->in_use = 1; + break; + } + } + + /* Sanity check */ + if (i >= dev->num_indirect) + panic("%s: No indirect descriptor tables left"); + + /* For indirect descriptor tables, only a single descriptor from + * the main ring is used. + */ + vd = &vring->desc[q->free_head]; + vd->flags = VRING_DESC_F_INDIRECT; + vd->addr = desc->paddr; + vd->len = num * sizeof(desc->descs[0]); + + /* Initialize the descriptors in the indirect descriptor table */ + for (i = 0; i < num; i++) { + ivd = &desc->descs[i]; + + use_vring_desc(ivd, &bufs[i]); + ivd->next = i + 1; + } + + /* Unset the next bit of the last descriptor */ + ivd->flags = ivd->flags & ~VRING_DESC_F_NEXT; + + /* Update queue, only a single descriptor was used */ + q->free_num -= 1; + q->free_head = vd->next; +} + +static void +set_direct_descriptors(struct virtio_queue *q, struct vumap_phys *bufs, + size_t num) +{ + u16_t i; + size_t count; + struct vring *vring = &q->vring; + struct vring_desc *vd; + + for (i = q->free_head, count = 0; count < num; count++) { + + /* The next free descriptor */ + vd = &vring->desc[i]; + + /* The descriptor is linked in the free list, so + * it always has the next bit set. + */ + assert(vd->flags & VRING_DESC_F_NEXT); + + use_vring_desc(vd, &bufs[count]); + i = vd->next; + } + + /* Unset the next bit of the last descriptor */ + vd->flags = vd->flags & ~VRING_DESC_F_NEXT; + + /* Update queue */ + q->free_num -= num; + q->free_head = i; +} + +int +virtio_to_queue(struct virtio_device *dev, int qidx, struct vumap_phys *bufs, + size_t num, void *data) +{ + u16_t free_first; + int left; + struct virtio_queue *q = &dev->queues[qidx]; + struct vring *vring = &q->vring; + + assert(0 <= qidx && qidx <= dev->num_queues); + + if (!data) + panic("%s: NULL data received queue %d", dev->name, qidx); + + free_first = q->free_head; + + left = (int)q->free_num - (int)num; + + if (left < dev->threads) + set_indirect_descriptors(dev, q, bufs, num); + else + set_direct_descriptors(q, bufs, num); + + /* Next index for host is old free_head */ + vring->avail->ring[vring->avail->idx % q->num] = free_first; + + /* Provided by the caller to identify this slot */ + q->data[free_first] = data; + + /* Make sure the host sees the new descriptors */ + __insn_barrier(); + + /* advance last idx */ + vring->avail->idx += 1; + + /* Make sure the host sees the avail->idx */ + __insn_barrier(); + + /* kick it! */ + kick_queue(dev, qidx); + return 0; +} + +int +virtio_from_queue(struct virtio_device *dev, int qidx, void **data) +{ + struct virtio_queue *q; + struct vring *vring; + struct vring_used_elem *uel; + struct vring_desc *vd; + int count = 0; + u16_t idx; + u16_t used_idx; + + assert(0 <= qidx && qidx < dev->num_queues); + + q = &dev->queues[qidx]; + vring = &q->vring; + + /* Make sure we see changes done by the host */ + __insn_barrier(); + + /* The index from the host */ + used_idx = vring->used->idx % q->num; + + /* We already saw this one, nothing to do here */ + if (q->last_used == used_idx) + return -1; + + /* Get the vring_used element */ + uel = &q->vring.used->ring[q->last_used]; + + /* Update the last used element */ + q->last_used = (q->last_used + 1) % q->num; + + /* index of the used element */ + idx = uel->id % q->num; + + assert(q->data[idx] != NULL); + + /* Get the descriptor */ + vd = &vring->desc[idx]; + + /* Unconditionally set the tail->next to the first used one */ + assert(vring->desc[q->free_tail].flags & VRING_DESC_F_NEXT); + vring->desc[q->free_tail].next = idx; + + /* Find the last index, eventually there has to be one + * without a the next flag. + * + * FIXME: Protect from endless loop + */ + while (vd->flags & VRING_DESC_F_NEXT) { + + if (vd->flags & VRING_DESC_F_INDIRECT) + clear_indirect_table(dev, vd); + + idx = vd->next; + vd = &vring->desc[idx]; + count++; + } + + /* Didn't count the last one */ + count++; + + if (vd->flags & VRING_DESC_F_INDIRECT) + clear_indirect_table(dev, vd); + + /* idx points to the tail now, update the queue */ + q->free_tail = idx; + assert(!(vd->flags & VRING_DESC_F_NEXT)); + + /* We can always connect the tail with the head */ + vring->desc[q->free_tail].next = q->free_head; + vring->desc[q->free_tail].flags = VRING_DESC_F_NEXT; + + q->free_num += count; + + assert(q->free_num <= q->num); + + *data = q->data[uel->id]; + q->data[uel->id] = NULL; + + return 0; +} + +int +virtio_had_irq(struct virtio_device *dev) +{ + return virtio_read8(dev, VIRTIO_ISR_STATUS_OFF) & 1; +} + +void +virtio_reset_device(struct virtio_device *dev) +{ + virtio_irq_unregister(dev); + virtio_write8(dev, VIRTIO_DEV_STATUS_OFF, 0); +} + + +void +virtio_irq_enable(struct virtio_device *dev) +{ + int r; + if ((r = sys_irqenable(&dev->irq_hook) != OK)) + panic("%s Unable to enable IRQ %d", dev->name, r); +} + +void +virtio_irq_disable(struct virtio_device *dev) +{ + int r; + if ((r = sys_irqdisable(&dev->irq_hook) != OK)) + panic("%s: Unable to disable IRQ %d", dev->name, r); +} + +static int +wants_kick(struct virtio_queue *q) +{ + assert(q != NULL); + return !(q->vring.used->flags & VRING_USED_F_NO_NOTIFY); +} + +static void +kick_queue(struct virtio_device *dev, int qidx) +{ + assert(0 <= qidx && qidx < dev->num_queues); + + if (wants_kick(&dev->queues[qidx])) + virtio_write16(dev, VIRTIO_QNOTFIY_OFF, qidx); + + return; +} + +static int +is_matching_device(u16_t expected_sdid, u16_t vid, u16_t sdid) +{ + return vid == VIRTIO_VENDOR_ID && sdid == expected_sdid; +} + +static void +virtio_irq_register(struct virtio_device *dev) +{ + int r; + if ((r = sys_irqsetpolicy(dev->irq, 0, &dev->irq_hook) != OK)) + panic("%s: Unable to register IRQ %d", dev->name, r); +} + +static void +virtio_irq_unregister(struct virtio_device *dev) +{ + int r; + if ((r = sys_irqrmpolicy(&dev->irq_hook) != OK)) + panic("%s: Unable to unregister IRQ %d", dev->name, r); +} + +static int +_supports(struct virtio_device *dev, int bit, int host) +{ + for (int i = 0; i < dev->num_features; i++) { + struct virtio_feature *f = &dev->features[i]; + + if (f->bit == bit) + return host ? f->host_support : f->guest_support; + } + + panic("%s: Feature not found bit=%d", dev->name, bit); +} + +int +virtio_host_supports(struct virtio_device *dev, int bit) +{ + return _supports(dev, bit, 1); +} + +int +virtio_guest_supports(struct virtio_device *dev, int bit) +{ + return _supports(dev, bit, 0); +} + + +/* Just some wrappers around sys_read */ +#define VIRTIO_READ_XX(xx, suff) \ +u##xx##_t \ +virtio_read##xx(struct virtio_device *dev, off_t off) \ +{ \ + int r; \ + u32_t ret; \ + if ((r = sys_in##suff(dev->port + off, &ret)) != OK) \ + panic("%s: Read failed %d %d r=%d", dev->name, \ + dev->port, \ + off, \ + r); \ + \ + return ret; \ +} + +VIRTIO_READ_XX(32, l) +VIRTIO_READ_XX(16, w) +VIRTIO_READ_XX(8, b) + +/* Just some wrappers around sys_write */ +#define VIRTIO_WRITE_XX(xx, suff) \ +void \ +virtio_write##xx(struct virtio_device *dev, off_t off, u##xx##_t val) \ +{ \ + int r; \ + if ((r = sys_out##suff(dev->port + off, val)) != OK) \ + panic("%s: Write failed %d %d r=%d", dev->name, \ + dev->port, \ + off, \ + r); \ +} + +VIRTIO_WRITE_XX(32, l) +VIRTIO_WRITE_XX(16, w) +VIRTIO_WRITE_XX(8, b) + +/* Just some wrappers around sys_read */ +#define VIRTIO_SREAD_XX(xx, suff) \ +u##xx##_t \ +virtio_sread##xx(struct virtio_device *dev, off_t off) \ +{ \ + int r; \ + u32_t ret; \ + off += VIRTIO_DEV_SPECIFIC_OFF; \ + \ + if (dev->msi) \ + off += VIRTIO_MSI_ADD_OFF; \ + \ + if ((r = sys_in##suff(dev->port + off, &ret)) != OK) \ + panic("%s: Read failed %d %d r=%d", dev->name, \ + dev->port, \ + off, \ + r); \ + \ + return ret; \ +} + +VIRTIO_SREAD_XX(32, l) +VIRTIO_SREAD_XX(16, w) +VIRTIO_SREAD_XX(8, b) + +/* Just some wrappers around sys_write */ +#define VIRTIO_SWRITE_XX(xx, suff) \ +void \ +virtio_swrite##xx(struct virtio_device *dev, off_t off, u##xx##_t val) \ +{ \ + int r; \ + off += VIRTIO_DEV_SPECIFIC_OFF; \ + \ + if (dev->msi) \ + off += VIRTIO_MSI_ADD_OFF; \ + \ + if ((r = sys_out##suff(dev->port + off, val)) != OK) \ + panic("%s: Write failed %d %d r=%d", dev->name, \ + dev->port, \ + off, \ + r); \ +} + +VIRTIO_SWRITE_XX(32, l) +VIRTIO_SWRITE_XX(16, w) +VIRTIO_SWRITE_XX(8, b) diff --git a/lib/libvirtio/virtio_ring.h b/lib/libvirtio/virtio_ring.h new file mode 100644 index 000000000..114f98e37 --- /dev/null +++ b/lib/libvirtio/virtio_ring.h @@ -0,0 +1,183 @@ +#ifndef _LINUX_VIRTIO_RING_H +#define _LINUX_VIRTIO_RING_H +/* An interface for efficient virtio implementation, currently for use by KVM + * and lguest, but hopefully others soon. Do NOT change this since it will + * break existing servers and clients. + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Copyright Rusty Russell IBM Corporation 2007. */ + +/* This marks a buffer as continuing via the next field. */ +#define VRING_DESC_F_NEXT 1 +/* This marks a buffer as write-only (otherwise read-only). */ +#define VRING_DESC_F_WRITE 2 +/* This means the buffer contains a list of buffer descriptors. */ +#define VRING_DESC_F_INDIRECT 4 + +/* The Host uses this in used->flags to advise the Guest: don't kick me when + * you add a buffer. It's unreliable, so it's simply an optimization. Guest + * will still kick if it's out of buffers. */ +#define VRING_USED_F_NO_NOTIFY 1 +/* The Guest uses this in avail->flags to advise the Host: don't interrupt me + * when you consume a buffer. It's unreliable, so it's simply an + * optimization. */ +#define VRING_AVAIL_F_NO_INTERRUPT 1 + +/* We support indirect buffer descriptors */ +#define VIRTIO_RING_F_INDIRECT_DESC 28 + +/* The Guest publishes the used index for which it expects an interrupt + * at the end of the avail ring. Host should ignore the avail->flags field. */ +/* The Host publishes the avail index for which it expects a kick + * at the end of the used ring. Guest should ignore the used->flags field. */ +#define VIRTIO_RING_F_EVENT_IDX 29 + +/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ +struct vring_desc { + /* Address (guest-physical). */ + u64_t addr; + /* Length. */ + u32_t len; + /* The flags as indicated above. */ + u16_t flags; + /* We chain unused descriptors via this, too */ + u16_t next; +}; + +struct vring_avail { + u16_t flags; + u16_t idx; + u16_t ring[]; +}; + +/* u32 is used here for ids for padding reasons. */ +struct vring_used_elem { + /* Index of start of used descriptor chain. */ + u32_t id; + /* Total length of the descriptor chain which was used (written to) */ + u32_t len; +}; + +struct vring_used { + u16_t flags; + u16_t idx; + struct vring_used_elem ring[]; +}; + +struct vring { + unsigned int num; + + struct vring_desc *desc; + + struct vring_avail *avail; + + struct vring_used *used; +}; + +/* The standard layout for the ring is a continuous chunk of memory which looks + * like this. We assume num is a power of 2. + * + * struct vring + * { + * // The actual descriptors (16 bytes each) + * struct vring_desc desc[num]; + * + * // A ring of available descriptor heads with free-running index. + * u16_t avail_flags; + * u16_t avail_idx; + * u16_t available[num]; + * u16_t used_event_idx; + * + * // Padding to the next align boundary. + * char pad[]; + * + * // A ring of used descriptor heads with free-running index. + * u16_t used_flags; + * u16_t used_idx; + * struct vring_used_elem used[num]; + * u16_t avail_event_idx; + * }; + */ +/* We publish the used event index at the end of the available ring, and vice + * versa. They are at the end for backwards compatibility. */ +#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num]) +#define vring_avail_event(vr) (*(u16_t *)&(vr)->used->ring[(vr)->num]) + +static inline void vring_init(struct vring *vr, unsigned int num, void *p, + unsigned long align) +{ + vr->num = num; + vr->desc = p; + vr->avail = p + num*sizeof(struct vring_desc); + vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + sizeof(u16_t) + + align-1) & ~(align - 1)); +} + +static inline unsigned vring_size(unsigned int num, unsigned long align) +{ + return ((sizeof(struct vring_desc) * num + sizeof(u16_t) * (3 + num) + + align - 1) & ~(align - 1)) + + sizeof(u16_t) * 3 + sizeof(struct vring_used_elem) * num; +} + +#if 0 +/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */ +/* Assuming a given event_idx value from the other size, if + * we have just incremented index from old to new_idx, + * should we trigger an event? */ +static inline int vring_need_event(u16_t event_idx, u16_t new_idx, u16_t old) +{ + /* Note: Xen has similar logic for notification hold-off + * in include/xen/interface/io/ring.h with req_event and req_prod + * corresponding to event_idx + 1 and new_idx respectively. + * Note also that req_event and req_prod in Xen start at 1, + * event indexes in virtio start at 0. */ + return (u16_t)(new_idx - event_idx - 1) < (u16_t)(new_idx - old); +} + +#ifdef __KERNEL__ +#include +struct virtio_device; +struct virtqueue; + +struct virtqueue *vring_new_virtqueue(unsigned int num, + unsigned int vring_align, + struct virtio_device *vdev, + bool weak_barriers, + void *pages, + void (*notify)(struct virtqueue *vq), + void (*callback)(struct virtqueue *vq), + const char *name); +void vring_del_virtqueue(struct virtqueue *vq); +/* Filter out transport-specific feature bits. */ +void vring_transport_features(struct virtio_device *vdev); + +irqreturn_t vring_interrupt(int irq, void *_vq); +#endif /* __KERNEL__ */ +#endif /* 0 */ +#endif /* _LINUX_VIRTIO_RING_H */