1814 lines
45 KiB
C
1814 lines
45 KiB
C
/* $OpenBSD: virtio.c,v 1.123 2025/01/08 15:46:10 dv Exp $ */
|
|
|
|
/*
|
|
* Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
|
|
*
|
|
* Permission to use, copy, modify, and distribute this software for any
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
* copyright notice and this permission notice appear in all copies.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
*/
|
|
|
|
#include <sys/param.h> /* PAGE_SIZE */
|
|
#include <sys/socket.h>
|
|
#include <sys/wait.h>
|
|
|
|
#include <dev/pci/pcireg.h>
|
|
#include <dev/pci/pcidevs.h>
|
|
#include <dev/pv/virtioreg.h>
|
|
#include <dev/pci/virtio_pcireg.h>
|
|
#include <dev/pv/vioblkreg.h>
|
|
#include <dev/vmm/vmm.h>
|
|
|
|
#include <net/if.h>
|
|
#include <netinet/in.h>
|
|
#include <netinet/if_ether.h>
|
|
|
|
#include <errno.h>
|
|
#include <event.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <unistd.h>
|
|
|
|
#include "atomicio.h"
|
|
#include "pci.h"
|
|
#include "vioscsi.h"
|
|
#include "virtio.h"
|
|
#include "vmd.h"
|
|
|
|
extern struct vmd *env;
|
|
extern char *__progname;
|
|
|
|
struct viornd_dev viornd;
|
|
struct vioscsi_dev *vioscsi;
|
|
struct vmmci_dev vmmci;
|
|
|
|
/* Devices emulated in subprocesses are inserted into this list. */
|
|
SLIST_HEAD(virtio_dev_head, virtio_dev) virtio_devs;
|
|
|
|
#define MAXPHYS (64 * 1024) /* max raw I/O transfer size */
|
|
|
|
#define VIRTIO_NET_F_MAC (1<<5)
|
|
|
|
#define VMMCI_F_TIMESYNC (1<<0)
|
|
#define VMMCI_F_ACK (1<<1)
|
|
#define VMMCI_F_SYNCRTC (1<<2)
|
|
|
|
#define RXQ 0
|
|
#define TXQ 1
|
|
|
|
static int virtio_dev_launch(struct vmd_vm *, struct virtio_dev *);
|
|
static void virtio_dispatch_dev(int, short, void *);
|
|
static int handle_dev_msg(struct viodev_msg *, struct virtio_dev *);
|
|
static int virtio_dev_closefds(struct virtio_dev *);
|
|
static void vmmci_pipe_dispatch(int, short, void *);
|
|
|
|
const char *
|
|
virtio_reg_name(uint8_t reg)
|
|
{
|
|
switch (reg) {
|
|
case VIRTIO_CONFIG_DEVICE_FEATURES: return "device feature";
|
|
case VIRTIO_CONFIG_GUEST_FEATURES: return "guest feature";
|
|
case VIRTIO_CONFIG_QUEUE_PFN: return "queue address";
|
|
case VIRTIO_CONFIG_QUEUE_SIZE: return "queue size";
|
|
case VIRTIO_CONFIG_QUEUE_SELECT: return "queue select";
|
|
case VIRTIO_CONFIG_QUEUE_NOTIFY: return "queue notify";
|
|
case VIRTIO_CONFIG_DEVICE_STATUS: return "device status";
|
|
case VIRTIO_CONFIG_ISR_STATUS: return "isr status";
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI...VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3:
|
|
return "device config 0";
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4:
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5:
|
|
return "device config 1";
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: return "device config 2";
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12: return "device config 3";
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 16: return "device config 4";
|
|
default: return "unknown";
|
|
}
|
|
}
|
|
|
|
uint32_t
|
|
vring_size(uint32_t vq_size)
|
|
{
|
|
uint32_t allocsize1, allocsize2;
|
|
|
|
/* allocsize1: descriptor table + avail ring + pad */
|
|
allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc) * vq_size
|
|
+ sizeof(uint16_t) * (2 + vq_size));
|
|
/* allocsize2: used ring + pad */
|
|
allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * 2
|
|
+ sizeof(struct vring_used_elem) * vq_size);
|
|
|
|
return allocsize1 + allocsize2;
|
|
}
|
|
|
|
/* Update queue select */
|
|
void
|
|
viornd_update_qs(void)
|
|
{
|
|
struct virtio_vq_info *vq_info;
|
|
|
|
/* Invalid queue? */
|
|
if (viornd.cfg.queue_select > 0) {
|
|
viornd.cfg.queue_size = 0;
|
|
return;
|
|
}
|
|
|
|
vq_info = &viornd.vq[viornd.cfg.queue_select];
|
|
|
|
/* Update queue pfn/size based on queue select */
|
|
viornd.cfg.queue_pfn = vq_info->q_gpa >> 12;
|
|
viornd.cfg.queue_size = vq_info->qs;
|
|
}
|
|
|
|
/* Update queue address */
|
|
void
|
|
viornd_update_qa(void)
|
|
{
|
|
struct virtio_vq_info *vq_info;
|
|
void *hva = NULL;
|
|
|
|
/* Invalid queue? */
|
|
if (viornd.cfg.queue_select > 0)
|
|
return;
|
|
|
|
vq_info = &viornd.vq[viornd.cfg.queue_select];
|
|
vq_info->q_gpa = (uint64_t)viornd.cfg.queue_pfn * VIRTIO_PAGE_SIZE;
|
|
|
|
hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIORND_QUEUE_SIZE));
|
|
if (hva == NULL)
|
|
fatalx("viornd_update_qa");
|
|
vq_info->q_hva = hva;
|
|
}
|
|
|
|
int
|
|
viornd_notifyq(void)
|
|
{
|
|
size_t sz;
|
|
int dxx, ret;
|
|
uint16_t aidx, uidx;
|
|
char *vr, *rnd_data;
|
|
struct vring_desc *desc;
|
|
struct vring_avail *avail;
|
|
struct vring_used *used;
|
|
struct virtio_vq_info *vq_info;
|
|
|
|
ret = 0;
|
|
|
|
/* Invalid queue? */
|
|
if (viornd.cfg.queue_notify > 0)
|
|
return (0);
|
|
|
|
vq_info = &viornd.vq[viornd.cfg.queue_notify];
|
|
vr = vq_info->q_hva;
|
|
if (vr == NULL)
|
|
fatalx("%s: null vring", __func__);
|
|
|
|
desc = (struct vring_desc *)(vr);
|
|
avail = (struct vring_avail *)(vr + vq_info->vq_availoffset);
|
|
used = (struct vring_used *)(vr + vq_info->vq_usedoffset);
|
|
|
|
aidx = avail->idx & VIORND_QUEUE_MASK;
|
|
uidx = used->idx & VIORND_QUEUE_MASK;
|
|
|
|
dxx = avail->ring[aidx] & VIORND_QUEUE_MASK;
|
|
|
|
sz = desc[dxx].len;
|
|
if (sz > MAXPHYS)
|
|
fatalx("viornd descriptor size too large (%zu)", sz);
|
|
|
|
rnd_data = malloc(sz);
|
|
|
|
if (rnd_data != NULL) {
|
|
arc4random_buf(rnd_data, sz);
|
|
if (write_mem(desc[dxx].addr, rnd_data, sz)) {
|
|
log_warnx("viornd: can't write random data @ "
|
|
"0x%llx",
|
|
desc[dxx].addr);
|
|
} else {
|
|
/* ret == 1 -> interrupt needed */
|
|
/* XXX check VIRTIO_F_NO_INTR */
|
|
ret = 1;
|
|
viornd.cfg.isr_status = 1;
|
|
used->ring[uidx].id = dxx;
|
|
used->ring[uidx].len = sz;
|
|
__sync_synchronize();
|
|
used->idx++;
|
|
}
|
|
free(rnd_data);
|
|
} else
|
|
fatal("memory allocation error for viornd data");
|
|
|
|
return (ret);
|
|
}
|
|
|
|
int
|
|
virtio_rnd_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr,
|
|
void *unused, uint8_t sz)
|
|
{
|
|
*intr = 0xFF;
|
|
|
|
if (dir == 0) {
|
|
switch (reg) {
|
|
case VIRTIO_CONFIG_DEVICE_FEATURES:
|
|
case VIRTIO_CONFIG_QUEUE_SIZE:
|
|
case VIRTIO_CONFIG_ISR_STATUS:
|
|
log_warnx("%s: illegal write %x to %s",
|
|
__progname, *data, virtio_reg_name(reg));
|
|
break;
|
|
case VIRTIO_CONFIG_GUEST_FEATURES:
|
|
viornd.cfg.guest_feature = *data;
|
|
break;
|
|
case VIRTIO_CONFIG_QUEUE_PFN:
|
|
viornd.cfg.queue_pfn = *data;
|
|
viornd_update_qa();
|
|
break;
|
|
case VIRTIO_CONFIG_QUEUE_SELECT:
|
|
viornd.cfg.queue_select = *data;
|
|
viornd_update_qs();
|
|
break;
|
|
case VIRTIO_CONFIG_QUEUE_NOTIFY:
|
|
viornd.cfg.queue_notify = *data;
|
|
if (viornd_notifyq())
|
|
*intr = 1;
|
|
break;
|
|
case VIRTIO_CONFIG_DEVICE_STATUS:
|
|
viornd.cfg.device_status = *data;
|
|
break;
|
|
}
|
|
} else {
|
|
switch (reg) {
|
|
case VIRTIO_CONFIG_DEVICE_FEATURES:
|
|
*data = viornd.cfg.device_feature;
|
|
break;
|
|
case VIRTIO_CONFIG_GUEST_FEATURES:
|
|
*data = viornd.cfg.guest_feature;
|
|
break;
|
|
case VIRTIO_CONFIG_QUEUE_PFN:
|
|
*data = viornd.cfg.queue_pfn;
|
|
break;
|
|
case VIRTIO_CONFIG_QUEUE_SIZE:
|
|
*data = viornd.cfg.queue_size;
|
|
break;
|
|
case VIRTIO_CONFIG_QUEUE_SELECT:
|
|
*data = viornd.cfg.queue_select;
|
|
break;
|
|
case VIRTIO_CONFIG_QUEUE_NOTIFY:
|
|
*data = viornd.cfg.queue_notify;
|
|
break;
|
|
case VIRTIO_CONFIG_DEVICE_STATUS:
|
|
*data = viornd.cfg.device_status;
|
|
break;
|
|
case VIRTIO_CONFIG_ISR_STATUS:
|
|
*data = viornd.cfg.isr_status;
|
|
viornd.cfg.isr_status = 0;
|
|
vcpu_deassert_irq(viornd.vm_id, 0, viornd.irq);
|
|
break;
|
|
}
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* vmmci_ctl
|
|
*
|
|
* Inject a command into the vmmci device, potentially delivering interrupt.
|
|
*
|
|
* Called by the vm process's event(3) loop.
|
|
*/
|
|
int
|
|
vmmci_ctl(unsigned int cmd)
|
|
{
|
|
int ret = 0;
|
|
struct timeval tv = { 0, 0 };
|
|
|
|
mutex_lock(&vmmci.mutex);
|
|
|
|
if ((vmmci.cfg.device_status &
|
|
VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) == 0) {
|
|
ret = -1;
|
|
goto unlock;
|
|
}
|
|
|
|
if (cmd == vmmci.cmd)
|
|
goto unlock;
|
|
|
|
switch (cmd) {
|
|
case VMMCI_NONE:
|
|
break;
|
|
case VMMCI_SHUTDOWN:
|
|
case VMMCI_REBOOT:
|
|
/* Update command */
|
|
vmmci.cmd = cmd;
|
|
|
|
/*
|
|
* vmm VMs do not support powerdown, send a reboot request
|
|
* instead and turn it off after the triple fault.
|
|
*/
|
|
if (cmd == VMMCI_SHUTDOWN)
|
|
cmd = VMMCI_REBOOT;
|
|
|
|
/* Trigger interrupt */
|
|
vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE;
|
|
vcpu_assert_irq(vmmci.vm_id, 0, vmmci.irq);
|
|
|
|
/* Add ACK timeout */
|
|
tv.tv_sec = VMMCI_TIMEOUT_SHORT;
|
|
evtimer_add(&vmmci.timeout, &tv);
|
|
break;
|
|
case VMMCI_SYNCRTC:
|
|
if (vmmci.cfg.guest_feature & VMMCI_F_SYNCRTC) {
|
|
/* RTC updated, request guest VM resync of its RTC */
|
|
vmmci.cmd = cmd;
|
|
|
|
vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE;
|
|
vcpu_assert_irq(vmmci.vm_id, 0, vmmci.irq);
|
|
} else {
|
|
log_debug("%s: RTC sync skipped (guest does not "
|
|
"support RTC sync)\n", __func__);
|
|
}
|
|
break;
|
|
default:
|
|
fatalx("invalid vmmci command: %d", cmd);
|
|
}
|
|
|
|
unlock:
|
|
mutex_unlock(&vmmci.mutex);
|
|
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* vmmci_ack
|
|
*
|
|
* Process a write to the command register.
|
|
*
|
|
* Called by the vcpu thread. Must be called with the mutex held.
|
|
*/
|
|
void
|
|
vmmci_ack(unsigned int cmd)
|
|
{
|
|
switch (cmd) {
|
|
case VMMCI_NONE:
|
|
break;
|
|
case VMMCI_SHUTDOWN:
|
|
/*
|
|
* The shutdown was requested by the VM if we don't have
|
|
* a pending shutdown request. In this case add a short
|
|
* timeout to give the VM a chance to reboot before the
|
|
* timer is expired.
|
|
*/
|
|
if (vmmci.cmd == 0) {
|
|
log_debug("%s: vm %u requested shutdown", __func__,
|
|
vmmci.vm_id);
|
|
vm_pipe_send(&vmmci.dev_pipe, VMMCI_SET_TIMEOUT_SHORT);
|
|
return;
|
|
}
|
|
/* FALLTHROUGH */
|
|
case VMMCI_REBOOT:
|
|
/*
|
|
* If the VM acknowledged our shutdown request, give it
|
|
* enough time to shutdown or reboot gracefully. This
|
|
* might take a considerable amount of time (running
|
|
* rc.shutdown on the VM), so increase the timeout before
|
|
* killing it forcefully.
|
|
*/
|
|
if (cmd == vmmci.cmd) {
|
|
log_debug("%s: vm %u acknowledged shutdown request",
|
|
__func__, vmmci.vm_id);
|
|
vm_pipe_send(&vmmci.dev_pipe, VMMCI_SET_TIMEOUT_LONG);
|
|
}
|
|
break;
|
|
case VMMCI_SYNCRTC:
|
|
log_debug("%s: vm %u acknowledged RTC sync request",
|
|
__func__, vmmci.vm_id);
|
|
vmmci.cmd = VMMCI_NONE;
|
|
break;
|
|
default:
|
|
log_warnx("%s: illegal request %u", __func__, cmd);
|
|
break;
|
|
}
|
|
}
|
|
|
|
void
|
|
vmmci_timeout(int fd, short type, void *arg)
|
|
{
|
|
log_debug("%s: vm %u shutdown", __progname, vmmci.vm_id);
|
|
vm_shutdown(vmmci.cmd == VMMCI_REBOOT ? VMMCI_REBOOT : VMMCI_SHUTDOWN);
|
|
}
|
|
|
|
int
|
|
vmmci_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr,
|
|
void *unused, uint8_t sz)
|
|
{
|
|
*intr = 0xFF;
|
|
|
|
mutex_lock(&vmmci.mutex);
|
|
if (dir == 0) {
|
|
switch (reg) {
|
|
case VIRTIO_CONFIG_DEVICE_FEATURES:
|
|
case VIRTIO_CONFIG_QUEUE_SIZE:
|
|
case VIRTIO_CONFIG_ISR_STATUS:
|
|
log_warnx("%s: illegal write %x to %s",
|
|
__progname, *data, virtio_reg_name(reg));
|
|
break;
|
|
case VIRTIO_CONFIG_GUEST_FEATURES:
|
|
vmmci.cfg.guest_feature = *data;
|
|
break;
|
|
case VIRTIO_CONFIG_QUEUE_PFN:
|
|
vmmci.cfg.queue_pfn = *data;
|
|
break;
|
|
case VIRTIO_CONFIG_QUEUE_SELECT:
|
|
vmmci.cfg.queue_select = *data;
|
|
break;
|
|
case VIRTIO_CONFIG_QUEUE_NOTIFY:
|
|
vmmci.cfg.queue_notify = *data;
|
|
break;
|
|
case VIRTIO_CONFIG_DEVICE_STATUS:
|
|
vmmci.cfg.device_status = *data;
|
|
break;
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI:
|
|
vmmci_ack(*data);
|
|
break;
|
|
}
|
|
} else {
|
|
switch (reg) {
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI:
|
|
*data = vmmci.cmd;
|
|
break;
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4:
|
|
/* Update time once when reading the first register */
|
|
gettimeofday(&vmmci.time, NULL);
|
|
*data = (uint64_t)vmmci.time.tv_sec;
|
|
break;
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8:
|
|
*data = (uint64_t)vmmci.time.tv_sec << 32;
|
|
break;
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12:
|
|
*data = (uint64_t)vmmci.time.tv_usec;
|
|
break;
|
|
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 16:
|
|
*data = (uint64_t)vmmci.time.tv_usec << 32;
|
|
break;
|
|
case VIRTIO_CONFIG_DEVICE_FEATURES:
|
|
*data = vmmci.cfg.device_feature;
|
|
break;
|
|
case VIRTIO_CONFIG_GUEST_FEATURES:
|
|
*data = vmmci.cfg.guest_feature;
|
|
break;
|
|
case VIRTIO_CONFIG_QUEUE_PFN:
|
|
*data = vmmci.cfg.queue_pfn;
|
|
break;
|
|
case VIRTIO_CONFIG_QUEUE_SIZE:
|
|
*data = vmmci.cfg.queue_size;
|
|
break;
|
|
case VIRTIO_CONFIG_QUEUE_SELECT:
|
|
*data = vmmci.cfg.queue_select;
|
|
break;
|
|
case VIRTIO_CONFIG_QUEUE_NOTIFY:
|
|
*data = vmmci.cfg.queue_notify;
|
|
break;
|
|
case VIRTIO_CONFIG_DEVICE_STATUS:
|
|
*data = vmmci.cfg.device_status;
|
|
break;
|
|
case VIRTIO_CONFIG_ISR_STATUS:
|
|
*data = vmmci.cfg.isr_status;
|
|
vmmci.cfg.isr_status = 0;
|
|
vcpu_deassert_irq(vmmci.vm_id, 0, vmmci.irq);
|
|
break;
|
|
}
|
|
}
|
|
mutex_unlock(&vmmci.mutex);
|
|
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
virtio_get_base(int fd, char *path, size_t npath, int type, const char *dpath)
|
|
{
|
|
switch (type) {
|
|
case VMDF_RAW:
|
|
return 0;
|
|
case VMDF_QCOW2:
|
|
return virtio_qcow2_get_base(fd, path, npath, dpath);
|
|
}
|
|
log_warnx("%s: invalid disk format", __func__);
|
|
return -1;
|
|
}
|
|
|
|
static void
|
|
vmmci_pipe_dispatch(int fd, short event, void *arg)
|
|
{
|
|
enum pipe_msg_type msg;
|
|
struct timeval tv = { 0, 0 };
|
|
|
|
msg = vm_pipe_recv(&vmmci.dev_pipe);
|
|
switch (msg) {
|
|
case VMMCI_SET_TIMEOUT_SHORT:
|
|
tv.tv_sec = VMMCI_TIMEOUT_SHORT;
|
|
evtimer_add(&vmmci.timeout, &tv);
|
|
break;
|
|
case VMMCI_SET_TIMEOUT_LONG:
|
|
tv.tv_sec = VMMCI_TIMEOUT_LONG;
|
|
evtimer_add(&vmmci.timeout, &tv);
|
|
break;
|
|
default:
|
|
log_warnx("%s: invalid pipe message type %d", __func__, msg);
|
|
}
|
|
}
|
|
|
|
void
|
|
virtio_init(struct vmd_vm *vm, int child_cdrom,
|
|
int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps)
|
|
{
|
|
struct vmop_create_params *vmc = &vm->vm_params;
|
|
struct vm_create_params *vcp = &vmc->vmc_params;
|
|
struct virtio_dev *dev;
|
|
uint8_t id;
|
|
uint8_t i, j;
|
|
int ret = 0;
|
|
|
|
/* Virtio entropy device */
|
|
if (pci_add_device(&id, PCI_VENDOR_QUMRANET,
|
|
PCI_PRODUCT_QUMRANET_VIO_RNG, PCI_CLASS_SYSTEM,
|
|
PCI_SUBCLASS_SYSTEM_MISC,
|
|
PCI_VENDOR_OPENBSD,
|
|
PCI_PRODUCT_VIRTIO_ENTROPY, 1, NULL)) {
|
|
log_warnx("%s: can't add PCI virtio rng device",
|
|
__progname);
|
|
return;
|
|
}
|
|
|
|
if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_rnd_io, NULL)) {
|
|
log_warnx("%s: can't add bar for virtio rng device",
|
|
__progname);
|
|
return;
|
|
}
|
|
|
|
memset(&viornd, 0, sizeof(viornd));
|
|
viornd.vq[0].qs = VIORND_QUEUE_SIZE;
|
|
viornd.vq[0].vq_availoffset = sizeof(struct vring_desc) *
|
|
VIORND_QUEUE_SIZE;
|
|
viornd.vq[0].vq_usedoffset = VIRTQUEUE_ALIGN(
|
|
sizeof(struct vring_desc) * VIORND_QUEUE_SIZE
|
|
+ sizeof(uint16_t) * (2 + VIORND_QUEUE_SIZE));
|
|
viornd.pci_id = id;
|
|
viornd.irq = pci_get_dev_irq(id);
|
|
viornd.vm_id = vcp->vcp_id;
|
|
|
|
SLIST_INIT(&virtio_devs);
|
|
|
|
if (vmc->vmc_nnics > 0) {
|
|
for (i = 0; i < vmc->vmc_nnics; i++) {
|
|
dev = calloc(1, sizeof(struct virtio_dev));
|
|
if (dev == NULL) {
|
|
log_warn("%s: calloc failure allocating vionet",
|
|
__progname);
|
|
return;
|
|
}
|
|
/* Virtio network */
|
|
dev->dev_type = VMD_DEVTYPE_NET;
|
|
|
|
if (pci_add_device(&id, PCI_VENDOR_QUMRANET,
|
|
PCI_PRODUCT_QUMRANET_VIO_NET, PCI_CLASS_SYSTEM,
|
|
PCI_SUBCLASS_SYSTEM_MISC, PCI_VENDOR_OPENBSD,
|
|
PCI_PRODUCT_VIRTIO_NETWORK, 1, NULL)) {
|
|
log_warnx("%s: can't add PCI virtio net device",
|
|
__progname);
|
|
return;
|
|
}
|
|
dev->pci_id = id;
|
|
dev->sync_fd = -1;
|
|
dev->async_fd = -1;
|
|
dev->vm_id = vcp->vcp_id;
|
|
dev->vm_vmid = vm->vm_vmid;
|
|
dev->irq = pci_get_dev_irq(id);
|
|
|
|
/* The vionet pci bar function is called by the vcpu. */
|
|
if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_pci_io,
|
|
dev)) {
|
|
log_warnx("%s: can't add bar for virtio net "
|
|
"device", __progname);
|
|
return;
|
|
}
|
|
|
|
dev->vionet.vq[RXQ].qs = VIONET_QUEUE_SIZE;
|
|
dev->vionet.vq[RXQ].vq_availoffset =
|
|
sizeof(struct vring_desc) * VIONET_QUEUE_SIZE;
|
|
dev->vionet.vq[RXQ].vq_usedoffset = VIRTQUEUE_ALIGN(
|
|
sizeof(struct vring_desc) * VIONET_QUEUE_SIZE
|
|
+ sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE));
|
|
dev->vionet.vq[RXQ].last_avail = 0;
|
|
dev->vionet.vq[RXQ].notified_avail = 0;
|
|
|
|
dev->vionet.vq[TXQ].qs = VIONET_QUEUE_SIZE;
|
|
dev->vionet.vq[TXQ].vq_availoffset =
|
|
sizeof(struct vring_desc) * VIONET_QUEUE_SIZE;
|
|
dev->vionet.vq[TXQ].vq_usedoffset = VIRTQUEUE_ALIGN(
|
|
sizeof(struct vring_desc) * VIONET_QUEUE_SIZE
|
|
+ sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE));
|
|
dev->vionet.vq[TXQ].last_avail = 0;
|
|
dev->vionet.vq[TXQ].notified_avail = 0;
|
|
|
|
dev->vionet.data_fd = child_taps[i];
|
|
|
|
/* MAC address has been assigned by the parent */
|
|
memcpy(&dev->vionet.mac, &vmc->vmc_macs[i], 6);
|
|
dev->vionet.cfg.device_feature = VIRTIO_NET_F_MAC;
|
|
|
|
dev->vionet.lockedmac =
|
|
vmc->vmc_ifflags[i] & VMIFF_LOCKED ? 1 : 0;
|
|
dev->vionet.local =
|
|
vmc->vmc_ifflags[i] & VMIFF_LOCAL ? 1 : 0;
|
|
if (i == 0 && vmc->vmc_bootdevice & VMBOOTDEV_NET)
|
|
dev->vionet.pxeboot = 1;
|
|
memcpy(&dev->vionet.local_prefix,
|
|
&env->vmd_cfg.cfg_localprefix,
|
|
sizeof(dev->vionet.local_prefix));
|
|
log_debug("%s: vm \"%s\" vio%u lladdr %s%s%s%s",
|
|
__func__, vcp->vcp_name, i,
|
|
ether_ntoa((void *)dev->vionet.mac),
|
|
dev->vionet.lockedmac ? ", locked" : "",
|
|
dev->vionet.local ? ", local" : "",
|
|
dev->vionet.pxeboot ? ", pxeboot" : "");
|
|
|
|
/* Add the vionet to our device list. */
|
|
dev->vionet.idx = i;
|
|
SLIST_INSERT_HEAD(&virtio_devs, dev, dev_next);
|
|
}
|
|
}
|
|
|
|
if (vmc->vmc_ndisks > 0) {
|
|
for (i = 0; i < vmc->vmc_ndisks; i++) {
|
|
dev = calloc(1, sizeof(struct virtio_dev));
|
|
if (dev == NULL) {
|
|
log_warn("%s: calloc failure allocating vioblk",
|
|
__progname);
|
|
return;
|
|
}
|
|
|
|
/* One vioblk device for each disk defined in vcp */
|
|
dev->dev_type = VMD_DEVTYPE_DISK;
|
|
|
|
if (pci_add_device(&id, PCI_VENDOR_QUMRANET,
|
|
PCI_PRODUCT_QUMRANET_VIO_BLOCK,
|
|
PCI_CLASS_MASS_STORAGE,
|
|
PCI_SUBCLASS_MASS_STORAGE_SCSI,
|
|
PCI_VENDOR_OPENBSD,
|
|
PCI_PRODUCT_VIRTIO_BLOCK, 1, NULL)) {
|
|
log_warnx("%s: can't add PCI virtio block "
|
|
"device", __progname);
|
|
return;
|
|
}
|
|
dev->pci_id = id;
|
|
dev->sync_fd = -1;
|
|
dev->async_fd = -1;
|
|
dev->vm_id = vcp->vcp_id;
|
|
dev->vm_vmid = vm->vm_vmid;
|
|
dev->irq = pci_get_dev_irq(id);
|
|
|
|
if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_pci_io,
|
|
&dev->vioblk)) {
|
|
log_warnx("%s: can't add bar for virtio block "
|
|
"device", __progname);
|
|
return;
|
|
}
|
|
dev->vioblk.vq[0].qs = VIOBLK_QUEUE_SIZE;
|
|
dev->vioblk.vq[0].vq_availoffset =
|
|
sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE;
|
|
dev->vioblk.vq[0].vq_usedoffset = VIRTQUEUE_ALIGN(
|
|
sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE
|
|
+ sizeof(uint16_t) * (2 + VIOBLK_QUEUE_SIZE));
|
|
dev->vioblk.vq[0].last_avail = 0;
|
|
dev->vioblk.cfg.device_feature =
|
|
VIRTIO_BLK_F_SEG_MAX;
|
|
dev->vioblk.seg_max = VIOBLK_SEG_MAX;
|
|
|
|
/*
|
|
* Initialize disk fds to an invalid fd (-1), then
|
|
* set any child disk fds.
|
|
*/
|
|
memset(&dev->vioblk.disk_fd, -1,
|
|
sizeof(dev->vioblk.disk_fd));
|
|
dev->vioblk.ndisk_fd = vmc->vmc_diskbases[i];
|
|
for (j = 0; j < dev->vioblk.ndisk_fd; j++)
|
|
dev->vioblk.disk_fd[j] = child_disks[i][j];
|
|
|
|
dev->vioblk.idx = i;
|
|
SLIST_INSERT_HEAD(&virtio_devs, dev, dev_next);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Launch virtio devices that support subprocess execution.
|
|
*/
|
|
SLIST_FOREACH(dev, &virtio_devs, dev_next) {
|
|
if (virtio_dev_launch(vm, dev) != 0)
|
|
fatalx("failed to launch virtio device");
|
|
}
|
|
|
|
/* vioscsi cdrom */
|
|
if (strlen(vmc->vmc_cdrom)) {
|
|
vioscsi = calloc(1, sizeof(struct vioscsi_dev));
|
|
if (vioscsi == NULL) {
|
|
log_warn("%s: calloc failure allocating vioscsi",
|
|
__progname);
|
|
return;
|
|
}
|
|
|
|
if (pci_add_device(&id, PCI_VENDOR_QUMRANET,
|
|
PCI_PRODUCT_QUMRANET_VIO_SCSI,
|
|
PCI_CLASS_MASS_STORAGE,
|
|
PCI_SUBCLASS_MASS_STORAGE_SCSI,
|
|
PCI_VENDOR_OPENBSD,
|
|
PCI_PRODUCT_VIRTIO_SCSI, 1, NULL)) {
|
|
log_warnx("%s: can't add PCI vioscsi device",
|
|
__progname);
|
|
return;
|
|
}
|
|
|
|
if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vioscsi_io, vioscsi)) {
|
|
log_warnx("%s: can't add bar for vioscsi device",
|
|
__progname);
|
|
return;
|
|
}
|
|
|
|
for (i = 0; i < VIRTIO_MAX_QUEUES; i++) {
|
|
vioscsi->vq[i].qs = VIOSCSI_QUEUE_SIZE;
|
|
vioscsi->vq[i].vq_availoffset =
|
|
sizeof(struct vring_desc) * VIOSCSI_QUEUE_SIZE;
|
|
vioscsi->vq[i].vq_usedoffset = VIRTQUEUE_ALIGN(
|
|
sizeof(struct vring_desc) * VIOSCSI_QUEUE_SIZE
|
|
+ sizeof(uint16_t) * (2 + VIOSCSI_QUEUE_SIZE));
|
|
vioscsi->vq[i].last_avail = 0;
|
|
}
|
|
if (virtio_raw_init(&vioscsi->file, &vioscsi->sz, &child_cdrom,
|
|
1) == -1) {
|
|
log_warnx("%s: unable to determine iso format",
|
|
__func__);
|
|
return;
|
|
}
|
|
vioscsi->locked = 0;
|
|
vioscsi->lba = 0;
|
|
vioscsi->n_blocks = vioscsi->sz / VIOSCSI_BLOCK_SIZE_CDROM;
|
|
vioscsi->max_xfer = VIOSCSI_BLOCK_SIZE_CDROM;
|
|
vioscsi->pci_id = id;
|
|
vioscsi->vm_id = vcp->vcp_id;
|
|
vioscsi->irq = pci_get_dev_irq(id);
|
|
}
|
|
|
|
/* virtio control device */
|
|
if (pci_add_device(&id, PCI_VENDOR_OPENBSD,
|
|
PCI_PRODUCT_OPENBSD_CONTROL,
|
|
PCI_CLASS_COMMUNICATIONS,
|
|
PCI_SUBCLASS_COMMUNICATIONS_MISC,
|
|
PCI_VENDOR_OPENBSD,
|
|
PCI_PRODUCT_VIRTIO_VMMCI, 1, NULL)) {
|
|
log_warnx("%s: can't add PCI vmm control device",
|
|
__progname);
|
|
return;
|
|
}
|
|
|
|
if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vmmci_io, NULL)) {
|
|
log_warnx("%s: can't add bar for vmm control device",
|
|
__progname);
|
|
return;
|
|
}
|
|
|
|
memset(&vmmci, 0, sizeof(vmmci));
|
|
vmmci.cfg.device_feature = VMMCI_F_TIMESYNC | VMMCI_F_ACK |
|
|
VMMCI_F_SYNCRTC;
|
|
vmmci.vm_id = vcp->vcp_id;
|
|
vmmci.irq = pci_get_dev_irq(id);
|
|
vmmci.pci_id = id;
|
|
ret = pthread_mutex_init(&vmmci.mutex, NULL);
|
|
if (ret) {
|
|
errno = ret;
|
|
fatal("could not initialize vmmci mutex");
|
|
}
|
|
|
|
evtimer_set(&vmmci.timeout, vmmci_timeout, NULL);
|
|
vm_pipe_init(&vmmci.dev_pipe, vmmci_pipe_dispatch);
|
|
event_add(&vmmci.dev_pipe.read_ev, NULL);
|
|
}
|
|
|
|
/*
|
|
* vionet_set_hostmac
|
|
*
|
|
* Sets the hardware address for the host-side tap(4) on a vionet_dev.
|
|
*
|
|
* This should only be called from the event-loop thread
|
|
*
|
|
* vm: pointer to the current vmd_vm instance
|
|
* idx: index into the array of vionet_dev's for the target vionet_dev
|
|
* addr: ethernet address to set
|
|
*/
|
|
void
|
|
vionet_set_hostmac(struct vmd_vm *vm, unsigned int idx, uint8_t *addr)
|
|
{
|
|
struct vmop_create_params *vmc = &vm->vm_params;
|
|
struct virtio_dev *dev;
|
|
struct vionet_dev *vionet = NULL;
|
|
int ret;
|
|
|
|
if (idx > vmc->vmc_nnics)
|
|
fatalx("%s: invalid vionet index: %u", __func__, idx);
|
|
|
|
SLIST_FOREACH(dev, &virtio_devs, dev_next) {
|
|
if (dev->dev_type == VMD_DEVTYPE_NET
|
|
&& dev->vionet.idx == idx) {
|
|
vionet = &dev->vionet;
|
|
break;
|
|
}
|
|
}
|
|
if (vionet == NULL)
|
|
fatalx("%s: dev == NULL, idx = %u", __func__, idx);
|
|
|
|
/* Set the local vm process copy. */
|
|
memcpy(vionet->hostmac, addr, sizeof(vionet->hostmac));
|
|
|
|
/* Send the information to the device process. */
|
|
ret = imsg_compose_event(&dev->async_iev, IMSG_DEVOP_HOSTMAC, 0, 0, -1,
|
|
vionet->hostmac, sizeof(vionet->hostmac));
|
|
if (ret == -1) {
|
|
log_warnx("%s: failed to queue hostmac to vionet dev %u",
|
|
__func__, idx);
|
|
return;
|
|
}
|
|
}
|
|
|
|
void
|
|
virtio_shutdown(struct vmd_vm *vm)
|
|
{
|
|
int ret, status;
|
|
pid_t pid = 0;
|
|
struct virtio_dev *dev, *tmp;
|
|
struct viodev_msg msg;
|
|
struct imsgbuf *ibuf;
|
|
|
|
/* Ensure that our disks are synced. */
|
|
if (vioscsi != NULL)
|
|
vioscsi->file.close(vioscsi->file.p, 0);
|
|
|
|
/*
|
|
* Broadcast shutdown to child devices. We need to do this
|
|
* synchronously as we have already stopped the async event thread.
|
|
*/
|
|
SLIST_FOREACH(dev, &virtio_devs, dev_next) {
|
|
memset(&msg, 0, sizeof(msg));
|
|
msg.type = VIODEV_MSG_SHUTDOWN;
|
|
ibuf = &dev->sync_iev.ibuf;
|
|
ret = imsg_compose(ibuf, VIODEV_MSG_SHUTDOWN, 0, 0, -1,
|
|
&msg, sizeof(msg));
|
|
if (ret == -1)
|
|
fatalx("%s: failed to send shutdown to device",
|
|
__func__);
|
|
if (imsgbuf_flush(ibuf) == -1)
|
|
fatalx("%s: imsgbuf_flush", __func__);
|
|
}
|
|
|
|
/*
|
|
* Wait for all children to shutdown using a simple approach of
|
|
* iterating over known child devices and waiting for them to die.
|
|
*/
|
|
SLIST_FOREACH_SAFE(dev, &virtio_devs, dev_next, tmp) {
|
|
log_debug("%s: waiting on device pid %d", __func__,
|
|
dev->dev_pid);
|
|
do {
|
|
pid = waitpid(dev->dev_pid, &status, WNOHANG);
|
|
} while (pid == 0 || (pid == -1 && errno == EINTR));
|
|
if (pid == dev->dev_pid)
|
|
log_debug("%s: device for pid %d is stopped",
|
|
__func__, pid);
|
|
else
|
|
log_warnx("%s: unexpected pid %d", __func__, pid);
|
|
free(dev);
|
|
}
|
|
}
|
|
|
|
int
|
|
vmmci_restore(int fd, uint32_t vm_id)
|
|
{
|
|
log_debug("%s: receiving vmmci", __func__);
|
|
if (atomicio(read, fd, &vmmci, sizeof(vmmci)) != sizeof(vmmci)) {
|
|
log_warnx("%s: error reading vmmci from fd", __func__);
|
|
return (-1);
|
|
}
|
|
|
|
if (pci_set_bar_fn(vmmci.pci_id, 0, vmmci_io, NULL)) {
|
|
log_warnx("%s: can't set bar fn for vmm control device",
|
|
__progname);
|
|
return (-1);
|
|
}
|
|
vmmci.vm_id = vm_id;
|
|
vmmci.irq = pci_get_dev_irq(vmmci.pci_id);
|
|
memset(&vmmci.timeout, 0, sizeof(struct event));
|
|
evtimer_set(&vmmci.timeout, vmmci_timeout, NULL);
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
viornd_restore(int fd, struct vmd_vm *vm)
|
|
{
|
|
void *hva = NULL;
|
|
|
|
log_debug("%s: receiving viornd", __func__);
|
|
if (atomicio(read, fd, &viornd, sizeof(viornd)) != sizeof(viornd)) {
|
|
log_warnx("%s: error reading viornd from fd", __func__);
|
|
return (-1);
|
|
}
|
|
if (pci_set_bar_fn(viornd.pci_id, 0, virtio_rnd_io, NULL)) {
|
|
log_warnx("%s: can't set bar fn for virtio rng device",
|
|
__progname);
|
|
return (-1);
|
|
}
|
|
viornd.vm_id = vm->vm_params.vmc_params.vcp_id;
|
|
viornd.irq = pci_get_dev_irq(viornd.pci_id);
|
|
|
|
hva = hvaddr_mem(viornd.vq[0].q_gpa, vring_size(VIORND_QUEUE_SIZE));
|
|
if (hva == NULL)
|
|
fatal("failed to restore viornd virtqueue");
|
|
viornd.vq[0].q_hva = hva;
|
|
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
vionet_restore(int fd, struct vmd_vm *vm, int *child_taps)
|
|
{
|
|
struct vmop_create_params *vmc = &vm->vm_params;
|
|
struct vm_create_params *vcp = &vmc->vmc_params;
|
|
struct virtio_dev *dev;
|
|
uint8_t i;
|
|
|
|
if (vmc->vmc_nnics == 0)
|
|
return (0);
|
|
|
|
for (i = 0; i < vmc->vmc_nnics; i++) {
|
|
dev = calloc(1, sizeof(struct virtio_dev));
|
|
if (dev == NULL) {
|
|
log_warn("%s: calloc failure allocating vionet",
|
|
__progname);
|
|
return (-1);
|
|
}
|
|
|
|
log_debug("%s: receiving virtio network device", __func__);
|
|
if (atomicio(read, fd, dev, sizeof(struct virtio_dev))
|
|
!= sizeof(struct virtio_dev)) {
|
|
log_warnx("%s: error reading vionet from fd",
|
|
__func__);
|
|
return (-1);
|
|
}
|
|
|
|
/* Virtio network */
|
|
if (dev->dev_type != VMD_DEVTYPE_NET) {
|
|
log_warnx("%s: invalid device type", __func__);
|
|
return (-1);
|
|
}
|
|
|
|
dev->sync_fd = -1;
|
|
dev->async_fd = -1;
|
|
dev->vm_id = vcp->vcp_id;
|
|
dev->vm_vmid = vm->vm_vmid;
|
|
dev->irq = pci_get_dev_irq(dev->pci_id);
|
|
|
|
if (pci_set_bar_fn(dev->pci_id, 0, virtio_pci_io, dev)) {
|
|
log_warnx("%s: can't set bar fn for virtio net "
|
|
"device", __progname);
|
|
return (-1);
|
|
}
|
|
|
|
dev->vionet.data_fd = child_taps[i];
|
|
dev->vionet.idx = i;
|
|
|
|
SLIST_INSERT_HEAD(&virtio_devs, dev, dev_next);
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
vioblk_restore(int fd, struct vmd_vm *vm,
|
|
int child_disks[][VM_MAX_BASE_PER_DISK])
|
|
{
|
|
struct vmop_create_params *vmc = &vm->vm_params;
|
|
struct virtio_dev *dev;
|
|
uint8_t i, j;
|
|
|
|
if (vmc->vmc_ndisks == 0)
|
|
return (0);
|
|
|
|
for (i = 0; i < vmc->vmc_ndisks; i++) {
|
|
dev = calloc(1, sizeof(struct virtio_dev));
|
|
if (dev == NULL) {
|
|
log_warn("%s: calloc failure allocating vioblks",
|
|
__progname);
|
|
return (-1);
|
|
}
|
|
|
|
log_debug("%s: receiving vioblk", __func__);
|
|
if (atomicio(read, fd, dev, sizeof(struct virtio_dev))
|
|
!= sizeof(struct virtio_dev)) {
|
|
log_warnx("%s: error reading vioblk from fd", __func__);
|
|
return (-1);
|
|
}
|
|
if (dev->dev_type != VMD_DEVTYPE_DISK) {
|
|
log_warnx("%s: invalid device type", __func__);
|
|
return (-1);
|
|
}
|
|
|
|
dev->sync_fd = -1;
|
|
dev->async_fd = -1;
|
|
|
|
if (pci_set_bar_fn(dev->pci_id, 0, virtio_pci_io, dev)) {
|
|
log_warnx("%s: can't set bar fn for virtio block "
|
|
"device", __progname);
|
|
return (-1);
|
|
}
|
|
dev->vm_id = vmc->vmc_params.vcp_id;
|
|
dev->irq = pci_get_dev_irq(dev->pci_id);
|
|
|
|
memset(&dev->vioblk.disk_fd, -1, sizeof(dev->vioblk.disk_fd));
|
|
dev->vioblk.ndisk_fd = vmc->vmc_diskbases[i];
|
|
for (j = 0; j < dev->vioblk.ndisk_fd; j++)
|
|
dev->vioblk.disk_fd[j] = child_disks[i][j];
|
|
|
|
dev->vioblk.idx = i;
|
|
SLIST_INSERT_HEAD(&virtio_devs, dev, dev_next);
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
vioscsi_restore(int fd, struct vmd_vm *vm, int child_cdrom)
|
|
{
|
|
void *hva = NULL;
|
|
unsigned int i;
|
|
|
|
if (!strlen(vm->vm_params.vmc_cdrom))
|
|
return (0);
|
|
|
|
vioscsi = calloc(1, sizeof(struct vioscsi_dev));
|
|
if (vioscsi == NULL) {
|
|
log_warn("%s: calloc failure allocating vioscsi", __progname);
|
|
return (-1);
|
|
}
|
|
|
|
log_debug("%s: receiving vioscsi", __func__);
|
|
|
|
if (atomicio(read, fd, vioscsi, sizeof(struct vioscsi_dev)) !=
|
|
sizeof(struct vioscsi_dev)) {
|
|
log_warnx("%s: error reading vioscsi from fd", __func__);
|
|
return (-1);
|
|
}
|
|
|
|
if (pci_set_bar_fn(vioscsi->pci_id, 0, vioscsi_io, vioscsi)) {
|
|
log_warnx("%s: can't set bar fn for vmm control device",
|
|
__progname);
|
|
return (-1);
|
|
}
|
|
|
|
vioscsi->vm_id = vm->vm_params.vmc_params.vcp_id;
|
|
vioscsi->irq = pci_get_dev_irq(vioscsi->pci_id);
|
|
|
|
/* vioscsi uses 3 virtqueues. */
|
|
for (i = 0; i < 3; i++) {
|
|
hva = hvaddr_mem(vioscsi->vq[i].q_gpa,
|
|
vring_size(VIOSCSI_QUEUE_SIZE));
|
|
if (hva == NULL)
|
|
fatal("failed to restore vioscsi virtqueue");
|
|
vioscsi->vq[i].q_hva = hva;
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
virtio_restore(int fd, struct vmd_vm *vm, int child_cdrom,
|
|
int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps)
|
|
{
|
|
struct virtio_dev *dev;
|
|
int ret;
|
|
|
|
SLIST_INIT(&virtio_devs);
|
|
|
|
if ((ret = viornd_restore(fd, vm)) == -1)
|
|
return (ret);
|
|
|
|
if ((ret = vioblk_restore(fd, vm, child_disks)) == -1)
|
|
return (ret);
|
|
|
|
if ((ret = vioscsi_restore(fd, vm, child_cdrom)) == -1)
|
|
return (ret);
|
|
|
|
if ((ret = vionet_restore(fd, vm, child_taps)) == -1)
|
|
return (ret);
|
|
|
|
if ((ret = vmmci_restore(fd, vm->vm_params.vmc_params.vcp_id)) == -1)
|
|
return (ret);
|
|
|
|
SLIST_FOREACH(dev, &virtio_devs, dev_next) {
|
|
if (virtio_dev_launch(vm, dev) != 0)
|
|
fatalx("%s: failed to restore virtio dev", __func__);
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
viornd_dump(int fd)
|
|
{
|
|
log_debug("%s: sending viornd", __func__);
|
|
|
|
viornd.vq[0].q_hva = NULL;
|
|
|
|
if (atomicio(vwrite, fd, &viornd, sizeof(viornd)) != sizeof(viornd)) {
|
|
log_warnx("%s: error writing viornd to fd", __func__);
|
|
return (-1);
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
vmmci_dump(int fd)
|
|
{
|
|
log_debug("%s: sending vmmci", __func__);
|
|
|
|
if (atomicio(vwrite, fd, &vmmci, sizeof(vmmci)) != sizeof(vmmci)) {
|
|
log_warnx("%s: error writing vmmci to fd", __func__);
|
|
return (-1);
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
vionet_dump(int fd)
|
|
{
|
|
struct virtio_dev *dev, temp;
|
|
struct viodev_msg msg;
|
|
struct imsg imsg;
|
|
struct imsgbuf *ibuf = NULL;
|
|
size_t sz;
|
|
int ret;
|
|
|
|
log_debug("%s: dumping vionet", __func__);
|
|
|
|
SLIST_FOREACH(dev, &virtio_devs, dev_next) {
|
|
if (dev->dev_type != VMD_DEVTYPE_NET)
|
|
continue;
|
|
|
|
memset(&msg, 0, sizeof(msg));
|
|
memset(&imsg, 0, sizeof(imsg));
|
|
|
|
ibuf = &dev->sync_iev.ibuf;
|
|
msg.type = VIODEV_MSG_DUMP;
|
|
|
|
ret = imsg_compose(ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
|
|
sizeof(msg));
|
|
if (ret == -1) {
|
|
log_warnx("%s: failed requesting dump of vionet[%d]",
|
|
__func__, dev->vionet.idx);
|
|
return (-1);
|
|
}
|
|
if (imsgbuf_flush(ibuf) == -1) {
|
|
log_warnx("%s: imsgbuf_flush", __func__);
|
|
return (-1);
|
|
}
|
|
|
|
sz = atomicio(read, dev->sync_fd, &temp, sizeof(temp));
|
|
if (sz != sizeof(temp)) {
|
|
log_warnx("%s: failed to dump vionet[%d]", __func__,
|
|
dev->vionet.idx);
|
|
return (-1);
|
|
}
|
|
|
|
/* Clear volatile state. Will reinitialize on restore. */
|
|
temp.vionet.vq[RXQ].q_hva = NULL;
|
|
temp.vionet.vq[TXQ].q_hva = NULL;
|
|
temp.async_fd = -1;
|
|
temp.sync_fd = -1;
|
|
memset(&temp.async_iev, 0, sizeof(temp.async_iev));
|
|
memset(&temp.sync_iev, 0, sizeof(temp.sync_iev));
|
|
|
|
if (atomicio(vwrite, fd, &temp, sizeof(temp)) != sizeof(temp)) {
|
|
log_warnx("%s: error writing vionet to fd", __func__);
|
|
return (-1);
|
|
}
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
vioblk_dump(int fd)
|
|
{
|
|
struct virtio_dev *dev, temp;
|
|
struct viodev_msg msg;
|
|
struct imsg imsg;
|
|
struct imsgbuf *ibuf = NULL;
|
|
size_t sz;
|
|
int ret;
|
|
|
|
log_debug("%s: dumping vioblk", __func__);
|
|
|
|
SLIST_FOREACH(dev, &virtio_devs, dev_next) {
|
|
if (dev->dev_type != VMD_DEVTYPE_DISK)
|
|
continue;
|
|
|
|
memset(&msg, 0, sizeof(msg));
|
|
memset(&imsg, 0, sizeof(imsg));
|
|
|
|
ibuf = &dev->sync_iev.ibuf;
|
|
msg.type = VIODEV_MSG_DUMP;
|
|
|
|
ret = imsg_compose(ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
|
|
sizeof(msg));
|
|
if (ret == -1) {
|
|
log_warnx("%s: failed requesting dump of vioblk[%d]",
|
|
__func__, dev->vioblk.idx);
|
|
return (-1);
|
|
}
|
|
if (imsgbuf_flush(ibuf) == -1) {
|
|
log_warnx("%s: imsgbuf_flush", __func__);
|
|
return (-1);
|
|
}
|
|
|
|
|
|
sz = atomicio(read, dev->sync_fd, &temp, sizeof(temp));
|
|
if (sz != sizeof(temp)) {
|
|
log_warnx("%s: failed to dump vioblk[%d]", __func__,
|
|
dev->vioblk.idx);
|
|
return (-1);
|
|
}
|
|
|
|
/* Clear volatile state. Will reinitialize on restore. */
|
|
temp.vioblk.vq[0].q_hva = NULL;
|
|
temp.async_fd = -1;
|
|
temp.sync_fd = -1;
|
|
memset(&temp.async_iev, 0, sizeof(temp.async_iev));
|
|
memset(&temp.sync_iev, 0, sizeof(temp.sync_iev));
|
|
|
|
if (atomicio(vwrite, fd, &temp, sizeof(temp)) != sizeof(temp)) {
|
|
log_warnx("%s: error writing vioblk to fd", __func__);
|
|
return (-1);
|
|
}
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
vioscsi_dump(int fd)
|
|
{
|
|
unsigned int i;
|
|
|
|
if (vioscsi == NULL)
|
|
return (0);
|
|
|
|
log_debug("%s: sending vioscsi", __func__);
|
|
|
|
for (i = 0; i < 3; i++)
|
|
vioscsi->vq[i].q_hva = NULL;
|
|
|
|
if (atomicio(vwrite, fd, vioscsi, sizeof(struct vioscsi_dev)) !=
|
|
sizeof(struct vioscsi_dev)) {
|
|
log_warnx("%s: error writing vioscsi to fd", __func__);
|
|
return (-1);
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
virtio_dump(int fd)
|
|
{
|
|
int ret;
|
|
|
|
if ((ret = viornd_dump(fd)) == -1)
|
|
return ret;
|
|
|
|
if ((ret = vioblk_dump(fd)) == -1)
|
|
return ret;
|
|
|
|
if ((ret = vioscsi_dump(fd)) == -1)
|
|
return ret;
|
|
|
|
if ((ret = vionet_dump(fd)) == -1)
|
|
return ret;
|
|
|
|
if ((ret = vmmci_dump(fd)) == -1)
|
|
return ret;
|
|
|
|
return (0);
|
|
}
|
|
|
|
void virtio_broadcast_imsg(struct vmd_vm *vm, uint16_t type, void *data,
|
|
uint16_t datalen)
|
|
{
|
|
struct virtio_dev *dev;
|
|
int ret;
|
|
|
|
SLIST_FOREACH(dev, &virtio_devs, dev_next) {
|
|
ret = imsg_compose_event(&dev->async_iev, type, 0, 0, -1, data,
|
|
datalen);
|
|
if (ret == -1) {
|
|
log_warnx("%s: failed to broadcast imsg type %u",
|
|
__func__, type);
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
void
|
|
virtio_stop(struct vmd_vm *vm)
|
|
{
|
|
return virtio_broadcast_imsg(vm, IMSG_VMDOP_PAUSE_VM, NULL, 0);
|
|
}
|
|
|
|
void
|
|
virtio_start(struct vmd_vm *vm)
|
|
{
|
|
return virtio_broadcast_imsg(vm, IMSG_VMDOP_UNPAUSE_VM, NULL, 0);
|
|
}
|
|
|
|
/*
|
|
* Fork+exec a child virtio device. Returns 0 on success.
|
|
*/
|
|
static int
|
|
virtio_dev_launch(struct vmd_vm *vm, struct virtio_dev *dev)
|
|
{
|
|
char *nargv[12], num[32], vmm_fd[32], vm_name[VM_NAME_MAX], t[2];
|
|
pid_t dev_pid;
|
|
int sync_fds[2], async_fds[2], ret = 0;
|
|
size_t i, sz = 0;
|
|
struct viodev_msg msg;
|
|
struct virtio_dev *dev_entry;
|
|
struct imsg imsg;
|
|
struct imsgev *iev = &dev->sync_iev;
|
|
|
|
switch (dev->dev_type) {
|
|
case VMD_DEVTYPE_NET:
|
|
log_debug("%s: launching vionet%d",
|
|
vm->vm_params.vmc_params.vcp_name, dev->vionet.idx);
|
|
break;
|
|
case VMD_DEVTYPE_DISK:
|
|
log_debug("%s: launching vioblk%d",
|
|
vm->vm_params.vmc_params.vcp_name, dev->vioblk.idx);
|
|
break;
|
|
/* NOTREACHED */
|
|
default:
|
|
log_warn("%s: invalid device type", __func__);
|
|
return (EINVAL);
|
|
}
|
|
|
|
/* We need two channels: one synchronous (IO reads) and one async. */
|
|
if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, PF_UNSPEC,
|
|
sync_fds) == -1) {
|
|
log_warn("failed to create socketpair");
|
|
return (errno);
|
|
}
|
|
if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, PF_UNSPEC,
|
|
async_fds) == -1) {
|
|
log_warn("failed to create async socketpair");
|
|
return (errno);
|
|
}
|
|
|
|
/* Fork... */
|
|
dev_pid = fork();
|
|
if (dev_pid == -1) {
|
|
ret = errno;
|
|
log_warn("%s: fork failed", __func__);
|
|
goto err;
|
|
}
|
|
|
|
if (dev_pid > 0) {
|
|
/* Parent */
|
|
close_fd(sync_fds[1]);
|
|
close_fd(async_fds[1]);
|
|
|
|
/* Save the child's pid to help with cleanup. */
|
|
dev->dev_pid = dev_pid;
|
|
|
|
/* Set the channel fds to the child's before sending. */
|
|
dev->sync_fd = sync_fds[1];
|
|
dev->async_fd = async_fds[1];
|
|
|
|
/* 1. Send over our configured device. */
|
|
log_debug("%s: sending '%c' type device struct", __func__,
|
|
dev->dev_type);
|
|
sz = atomicio(vwrite, sync_fds[0], dev, sizeof(*dev));
|
|
if (sz != sizeof(*dev)) {
|
|
log_warnx("%s: failed to send device", __func__);
|
|
ret = EIO;
|
|
goto err;
|
|
}
|
|
|
|
/* Close data fds. Only the child device needs them now. */
|
|
if (virtio_dev_closefds(dev) == -1) {
|
|
log_warnx("%s: failed to close device data fds",
|
|
__func__);
|
|
goto err;
|
|
}
|
|
|
|
/* 2. Send over details on the VM (including memory fds). */
|
|
log_debug("%s: sending vm message for '%s'", __func__,
|
|
vm->vm_params.vmc_params.vcp_name);
|
|
sz = atomicio(vwrite, sync_fds[0], vm, sizeof(*vm));
|
|
if (sz != sizeof(*vm)) {
|
|
log_warnx("%s: failed to send vm details", __func__);
|
|
ret = EIO;
|
|
goto err;
|
|
}
|
|
|
|
/*
|
|
* Initialize our imsg channel to the child device. The initial
|
|
* communication will be synchronous. We expect the child to
|
|
* report itself "ready" to confirm the launch was a success.
|
|
*/
|
|
if (imsgbuf_init(&iev->ibuf, sync_fds[0]) == -1) {
|
|
log_warn("%s: failed to init imsgbuf", __func__);
|
|
goto err;
|
|
}
|
|
imsgbuf_allow_fdpass(&iev->ibuf);
|
|
ret = imsgbuf_read_one(&iev->ibuf, &imsg);
|
|
if (ret == 0 || ret == -1) {
|
|
log_warnx("%s: failed to receive ready message from "
|
|
"'%c' type device", __func__, dev->dev_type);
|
|
ret = EIO;
|
|
goto err;
|
|
}
|
|
ret = 0;
|
|
|
|
IMSG_SIZE_CHECK(&imsg, &msg);
|
|
memcpy(&msg, imsg.data, sizeof(msg));
|
|
imsg_free(&imsg);
|
|
|
|
if (msg.type != VIODEV_MSG_READY) {
|
|
log_warnx("%s: expected ready message, got type %d",
|
|
__func__, msg.type);
|
|
ret = EINVAL;
|
|
goto err;
|
|
}
|
|
log_debug("%s: device reports ready via sync channel",
|
|
__func__);
|
|
|
|
/*
|
|
* Wire in the async event handling, but after reverting back
|
|
* to the parent's fd's.
|
|
*/
|
|
dev->sync_fd = sync_fds[0];
|
|
dev->async_fd = async_fds[0];
|
|
vm_device_pipe(dev, virtio_dispatch_dev, NULL);
|
|
} else {
|
|
/* Child */
|
|
close_fd(async_fds[0]);
|
|
close_fd(sync_fds[0]);
|
|
|
|
/* Close pty. Virtio devices do not need it. */
|
|
close_fd(vm->vm_tty);
|
|
vm->vm_tty = -1;
|
|
|
|
if (vm->vm_cdrom != -1) {
|
|
close_fd(vm->vm_cdrom);
|
|
vm->vm_cdrom = -1;
|
|
}
|
|
|
|
/* Keep data file descriptors open after exec. */
|
|
SLIST_FOREACH(dev_entry, &virtio_devs, dev_next) {
|
|
if (dev_entry == dev)
|
|
continue;
|
|
if (virtio_dev_closefds(dev_entry) == -1)
|
|
fatalx("unable to close other virtio devs");
|
|
}
|
|
|
|
memset(num, 0, sizeof(num));
|
|
snprintf(num, sizeof(num), "%d", sync_fds[1]);
|
|
memset(vmm_fd, 0, sizeof(vmm_fd));
|
|
snprintf(vmm_fd, sizeof(vmm_fd), "%d", env->vmd_fd);
|
|
memset(vm_name, 0, sizeof(vm_name));
|
|
snprintf(vm_name, sizeof(vm_name), "%s",
|
|
vm->vm_params.vmc_params.vcp_name);
|
|
|
|
t[0] = dev->dev_type;
|
|
t[1] = '\0';
|
|
|
|
i = 0;
|
|
nargv[i++] = env->argv0;
|
|
nargv[i++] = "-X";
|
|
nargv[i++] = num;
|
|
nargv[i++] = "-t";
|
|
nargv[i++] = t;
|
|
nargv[i++] = "-i";
|
|
nargv[i++] = vmm_fd;
|
|
nargv[i++] = "-p";
|
|
nargv[i++] = vm_name;
|
|
if (env->vmd_debug)
|
|
nargv[i++] = "-d";
|
|
if (env->vmd_verbose == 1)
|
|
nargv[i++] = "-v";
|
|
else if (env->vmd_verbose > 1)
|
|
nargv[i++] = "-vv";
|
|
nargv[i++] = NULL;
|
|
if (i > sizeof(nargv) / sizeof(nargv[0]))
|
|
fatalx("%s: nargv overflow", __func__);
|
|
|
|
/* Control resumes in vmd.c:main(). */
|
|
execvp(nargv[0], nargv);
|
|
|
|
ret = errno;
|
|
log_warn("%s: failed to exec device", __func__);
|
|
_exit(ret);
|
|
/* NOTREACHED */
|
|
}
|
|
|
|
return (ret);
|
|
|
|
err:
|
|
close_fd(sync_fds[0]);
|
|
close_fd(sync_fds[1]);
|
|
close_fd(async_fds[0]);
|
|
close_fd(async_fds[1]);
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* Initialize an async imsg channel for a virtio device.
|
|
*/
|
|
int
|
|
vm_device_pipe(struct virtio_dev *dev, void (*cb)(int, short, void *),
|
|
struct event_base *ev_base)
|
|
{
|
|
struct imsgev *iev = &dev->async_iev;
|
|
int fd = dev->async_fd;
|
|
|
|
log_debug("%s: initializing '%c' device pipe (fd=%d)", __func__,
|
|
dev->dev_type, fd);
|
|
|
|
if (imsgbuf_init(&iev->ibuf, fd) == -1)
|
|
fatal("imsgbuf_init");
|
|
imsgbuf_allow_fdpass(&iev->ibuf);
|
|
iev->handler = cb;
|
|
iev->data = dev;
|
|
iev->events = EV_READ;
|
|
imsg_event_add2(iev, ev_base);
|
|
|
|
return (0);
|
|
}
|
|
|
|
void
|
|
virtio_dispatch_dev(int fd, short event, void *arg)
|
|
{
|
|
struct virtio_dev *dev = (struct virtio_dev*)arg;
|
|
struct imsgev *iev = &dev->async_iev;
|
|
struct imsgbuf *ibuf = &iev->ibuf;
|
|
struct imsg imsg;
|
|
struct viodev_msg msg;
|
|
ssize_t n = 0;
|
|
|
|
if (event & EV_READ) {
|
|
if ((n = imsgbuf_read(ibuf)) == -1)
|
|
fatal("%s: imsgbuf_read", __func__);
|
|
if (n == 0) {
|
|
/* this pipe is dead, so remove the event handler */
|
|
log_debug("%s: pipe dead (EV_READ)", __func__);
|
|
event_del(&iev->ev);
|
|
event_loopexit(NULL);
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (event & EV_WRITE) {
|
|
if (imsgbuf_write(ibuf) == -1) {
|
|
if (errno == EPIPE) {
|
|
/* this pipe is dead, remove the handler */
|
|
log_debug("%s: pipe dead (EV_WRITE)", __func__);
|
|
event_del(&iev->ev);
|
|
event_loopexit(NULL);
|
|
return;
|
|
}
|
|
fatal("%s: imsgbuf_write", __func__);
|
|
}
|
|
}
|
|
|
|
for (;;) {
|
|
if ((n = imsg_get(ibuf, &imsg)) == -1)
|
|
fatal("%s: imsg_get", __func__);
|
|
if (n == 0)
|
|
break;
|
|
|
|
switch (imsg.hdr.type) {
|
|
case IMSG_DEVOP_MSG:
|
|
IMSG_SIZE_CHECK(&imsg, &msg);
|
|
memcpy(&msg, imsg.data, sizeof(msg));
|
|
handle_dev_msg(&msg, dev);
|
|
break;
|
|
default:
|
|
log_warnx("%s: got non devop imsg %d", __func__,
|
|
imsg.hdr.type);
|
|
break;
|
|
}
|
|
imsg_free(&imsg);
|
|
}
|
|
imsg_event_add(iev);
|
|
}
|
|
|
|
|
|
static int
|
|
handle_dev_msg(struct viodev_msg *msg, struct virtio_dev *gdev)
|
|
{
|
|
uint32_t vm_id = gdev->vm_id;
|
|
int irq = gdev->irq;
|
|
|
|
switch (msg->type) {
|
|
case VIODEV_MSG_KICK:
|
|
if (msg->state == INTR_STATE_ASSERT)
|
|
vcpu_assert_irq(vm_id, msg->vcpu, irq);
|
|
else if (msg->state == INTR_STATE_DEASSERT)
|
|
vcpu_deassert_irq(vm_id, msg->vcpu, irq);
|
|
break;
|
|
case VIODEV_MSG_READY:
|
|
log_debug("%s: device reports ready", __func__);
|
|
break;
|
|
case VIODEV_MSG_ERROR:
|
|
log_warnx("%s: device reported error", __func__);
|
|
break;
|
|
case VIODEV_MSG_INVALID:
|
|
case VIODEV_MSG_IO_READ:
|
|
case VIODEV_MSG_IO_WRITE:
|
|
/* FALLTHROUGH */
|
|
default:
|
|
log_warnx("%s: unsupported device message type %d", __func__,
|
|
msg->type);
|
|
return (1);
|
|
}
|
|
|
|
return (0);
|
|
};
|
|
|
|
/*
|
|
* Called by the VM process while processing IO from the VCPU thread.
|
|
*
|
|
* N.b. Since the VCPU thread calls this function, we cannot mutate the event
|
|
* system. All ipc messages must be sent manually and cannot be queued for
|
|
* the event loop to push them. (We need to perform a synchronous read, so
|
|
* this isn't really a big deal.)
|
|
*/
|
|
int
|
|
virtio_pci_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr,
|
|
void *cookie, uint8_t sz)
|
|
{
|
|
struct virtio_dev *dev = (struct virtio_dev *)cookie;
|
|
struct imsgbuf *ibuf = &dev->sync_iev.ibuf;
|
|
struct imsg imsg;
|
|
struct viodev_msg msg;
|
|
int ret = 0;
|
|
|
|
memset(&msg, 0, sizeof(msg));
|
|
msg.reg = reg;
|
|
msg.io_sz = sz;
|
|
|
|
if (dir == 0) {
|
|
msg.type = VIODEV_MSG_IO_WRITE;
|
|
msg.data = *data;
|
|
msg.data_valid = 1;
|
|
} else
|
|
msg.type = VIODEV_MSG_IO_READ;
|
|
|
|
if (msg.type == VIODEV_MSG_IO_WRITE) {
|
|
/*
|
|
* Write request. No reply expected.
|
|
*/
|
|
ret = imsg_compose(ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
|
|
sizeof(msg));
|
|
if (ret == -1) {
|
|
log_warn("%s: failed to send async io event to virtio"
|
|
" device", __func__);
|
|
return (ret);
|
|
}
|
|
if (imsgbuf_flush(ibuf) == -1) {
|
|
log_warnx("%s: imsgbuf_flush (write)", __func__);
|
|
return (-1);
|
|
}
|
|
} else {
|
|
/*
|
|
* Read request. Requires waiting for a reply.
|
|
*/
|
|
ret = imsg_compose(ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
|
|
sizeof(msg));
|
|
if (ret == -1) {
|
|
log_warnx("%s: failed to send sync io event to virtio"
|
|
" device", __func__);
|
|
return (ret);
|
|
}
|
|
if (imsgbuf_flush(ibuf) == -1) {
|
|
log_warnx("%s: imsgbuf_flush (read)", __func__);
|
|
return (-1);
|
|
}
|
|
|
|
/* Read our reply. */
|
|
ret = imsgbuf_read_one(ibuf, &imsg);
|
|
if (ret == 0 || ret == -1) {
|
|
log_warn("%s: imsgbuf_read (n=%d)", __func__, ret);
|
|
return (-1);
|
|
}
|
|
IMSG_SIZE_CHECK(&imsg, &msg);
|
|
memcpy(&msg, imsg.data, sizeof(msg));
|
|
imsg_free(&imsg);
|
|
|
|
if (msg.type == VIODEV_MSG_IO_READ && msg.data_valid) {
|
|
#if DEBUG
|
|
log_debug("%s: got sync read response (reg=%s)",
|
|
__func__, virtio_reg_name(msg.reg));
|
|
#endif /* DEBUG */
|
|
*data = msg.data;
|
|
/*
|
|
* It's possible we're asked to {de,}assert after the
|
|
* device performs a register read.
|
|
*/
|
|
if (msg.state == INTR_STATE_ASSERT)
|
|
vcpu_assert_irq(dev->vm_id, msg.vcpu, msg.irq);
|
|
else if (msg.state == INTR_STATE_DEASSERT)
|
|
vcpu_deassert_irq(dev->vm_id, msg.vcpu, msg.irq);
|
|
} else {
|
|
log_warnx("%s: expected IO_READ, got %d", __func__,
|
|
msg.type);
|
|
return (-1);
|
|
}
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
void
|
|
virtio_assert_irq(struct virtio_dev *dev, int vcpu)
|
|
{
|
|
struct viodev_msg msg;
|
|
int ret;
|
|
|
|
memset(&msg, 0, sizeof(msg));
|
|
msg.irq = dev->irq;
|
|
msg.vcpu = vcpu;
|
|
msg.type = VIODEV_MSG_KICK;
|
|
msg.state = INTR_STATE_ASSERT;
|
|
|
|
ret = imsg_compose_event(&dev->async_iev, IMSG_DEVOP_MSG, 0, 0, -1,
|
|
&msg, sizeof(msg));
|
|
if (ret == -1)
|
|
log_warnx("%s: failed to assert irq %d", __func__, dev->irq);
|
|
}
|
|
|
|
void
|
|
virtio_deassert_irq(struct virtio_dev *dev, int vcpu)
|
|
{
|
|
struct viodev_msg msg;
|
|
int ret;
|
|
|
|
memset(&msg, 0, sizeof(msg));
|
|
msg.irq = dev->irq;
|
|
msg.vcpu = vcpu;
|
|
msg.type = VIODEV_MSG_KICK;
|
|
msg.state = INTR_STATE_DEASSERT;
|
|
|
|
ret = imsg_compose_event(&dev->async_iev, IMSG_DEVOP_MSG, 0, 0, -1,
|
|
&msg, sizeof(msg));
|
|
if (ret == -1)
|
|
log_warnx("%s: failed to deassert irq %d", __func__, dev->irq);
|
|
}
|
|
|
|
/*
|
|
* Close all underlying file descriptors for a given virtio device.
|
|
*/
|
|
static int
|
|
virtio_dev_closefds(struct virtio_dev *dev)
|
|
{
|
|
size_t i;
|
|
|
|
switch (dev->dev_type) {
|
|
case VMD_DEVTYPE_DISK:
|
|
for (i = 0; i < dev->vioblk.ndisk_fd; i++) {
|
|
close_fd(dev->vioblk.disk_fd[i]);
|
|
dev->vioblk.disk_fd[i] = -1;
|
|
}
|
|
break;
|
|
case VMD_DEVTYPE_NET:
|
|
close_fd(dev->vionet.data_fd);
|
|
dev->vionet.data_fd = -1;
|
|
break;
|
|
default:
|
|
log_warnx("%s: invalid device type", __func__);
|
|
return (-1);
|
|
}
|
|
|
|
close_fd(dev->async_fd);
|
|
dev->async_fd = -1;
|
|
close_fd(dev->sync_fd);
|
|
dev->sync_fd = -1;
|
|
|
|
return (0);
|
|
}
|