From: David van Moolenbroek Date: Wed, 18 Sep 2013 12:02:17 +0000 (+0200) Subject: Add VND driver, providing loopback devices X-Git-Tag: v3.3.0~564 X-Git-Url: http://zhaoyanbai.com/repos/?a=commitdiff_plain;h=6989311826e5836158c02f2210a4e348350ead90;p=minix.git Add VND driver, providing loopback devices Change-Id: I40fa695e28c67477a75383e6f1550e451afcab41 --- diff --git a/commands/DESCRIBE/DESCRIBE.sh b/commands/DESCRIBE/DESCRIBE.sh index 63560ae36..b028e7667 100644 --- a/commands/DESCRIBE/DESCRIBE.sh +++ b/commands/DESCRIBE/DESCRIBE.sh @@ -196,6 +196,22 @@ do 18,0) des="UNIX domain socket" dev=uds ;; + 5[6-9],0|6[0-3],0) + drive=`expr $major - 56` + des="vnode disk $drive" dev=vnd$drive + ;; + 5[6-9],[1-4]|6[0-3],[1-4]) + drive=`expr $major - 56` + par=`expr $minor - 1` + des="vnode disk $drive partition $par" dev=vnd${drive}p${par} + ;; + 5[6-9],12[89]|5[6-9],13[0-9]|5[6-9],14[0-3]|6[0-3],12[89]|5[6-9],13[0-9]|5[6-9],14[0-3]) + drive=`expr $major - 56` + par=`expr \\( \\( $minor - 128 \\) / 4 \\) % 4` + sub=`expr \\( $minor - 128 \\) % 4` + des="vnode disk $drive partition $par slice $sub" + dev=vnd${drive}p${par}s${sub} + ;; BAD,BAD) des= dev= ;; diff --git a/commands/MAKEDEV/MAKEDEV.sh b/commands/MAKEDEV/MAKEDEV.sh index 9a4c1a9cd..83e2270a8 100644 --- a/commands/MAKEDEV/MAKEDEV.sh +++ b/commands/MAKEDEV/MAKEDEV.sh @@ -33,7 +33,9 @@ case $#:$1 in eepromb3s54 eepromb3s55 eepromb3s56 eepromb3s57 \ tsl2550b1s39 tsl2550b2s39 tsl2550b3s39 \ sht21b1s40 sht21b2s40 sht21b3s40 \ - bmp085b1s77 bmp085b2s77 bmp085b3s77 + bmp085b1s77 bmp085b2s77 bmp085b3s77 \ + vnd0 vnd0p0 vnd0p0s0 vnd1 vnd1p0 vnd1p0s0 \ + vnd2 vnd3 vnd4 vnd5 vnd6 vnd7 ;; 0:|1:-\?) cat >&2 <&2 ex=1 diff --git a/distrib/sets/lists/minix/mi b/distrib/sets/lists/minix/mi index 17b3d94f1..ded48abb5 100644 --- a/distrib/sets/lists/minix/mi +++ b/distrib/sets/lists/minix/mi @@ -668,6 +668,7 @@ ./usr/include/dev minix-sys ./usr/include/dev/i2c minix-sys ./usr/include/dev/i2c/i2c_io.h minix-sys +./usr/include/dev/vndvar.h minix-sys ./usr/include/dirent.h minix-sys ./usr/include/disktab.h minix-sys ./usr/include/dlfcn.h minix-sys @@ -4766,6 +4767,7 @@ ./usr/sbin/vfs minix-sys ./usr/sbin/vipw minix-sys ./usr/sbin/vm minix-sys +./usr/sbin/vnd minix-sys ./usr/sbin/zic minix-sys ./usr/share minix-sys ./usr/share/atf minix-sys atf diff --git a/drivers/Makefile b/drivers/Makefile index 907ad6d9d..dd7ed24d4 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -19,12 +19,12 @@ SUBDIR= log tty SUBDIR= ahci amddev atl2 at_wini audio dec21140A dp8390 dpeth \ e1000 fbd filter floppy fxp hello lance log mmc orinoco pci printer \ random readclock rtl8139 rtl8169 ti1225 tty vbox acpi \ - virtio_blk virtio_net + virtio_blk virtio_net vnd .endif .if ${MACHINE_ARCH} == "earm" SUBDIR= bmp085 cat24c256 fb gpio i2c mmc lan8710a log readclock \ - sht21 tda19988 tps65217 tps65950 tsl2550 tty random + sht21 tda19988 tps65217 tps65950 tsl2550 tty random vnd .endif .endif # ${MKIMAGEONLY} != "yes" diff --git a/drivers/vnd/Makefile b/drivers/vnd/Makefile new file mode 100644 index 000000000..493430b27 --- /dev/null +++ b/drivers/vnd/Makefile @@ -0,0 +1,12 @@ +# Makefile for the VNode Disk driver (VND) +PROG= vnd +SRCS= vnd.c + +DPADD+= ${LIBBLOCKDRIVER} ${LIBSYS} +LDADD+= -lblockdriver -lsys + +MAN= + +BINDIR?= /usr/sbin + +.include diff --git a/drivers/vnd/NOTES b/drivers/vnd/NOTES new file mode 100644 index 000000000..382ead657 --- /dev/null +++ b/drivers/vnd/NOTES @@ -0,0 +1,85 @@ +Development notes regarding VND. Original document by David van Moolenbroek. + + +DESIGN DECISIONS + +As simple as the VND driver implementation looks, several important decisions +had to be made in the design process. These decisions are listed here. + +Multiple instances instead of a single instance: The decision to spawn a +separate driver instance for each VND unit was not ideologically inspired, but +rather based on a practical issue. Namely, users may reasonably expect to be +able to set up a VND using a backing file that resides on a file system hosted +on another VND. If one single driver instance were to host both VND units, its +implementation would have to perform all its backcalls to VFS asynchronously, +so as to be able to process another incoming request that was initiated as part +of such an ongoing backcall. As of writing, MINIX3 does not support any form of +asynchronous I/O, but this would not even be sufficient: the asynchrony would +have to extend even to the close(2) call that takes place during device +unconfiguration, as this call could spark I/O to another VND device. +Ultimately, using one driver instance per VND unit avoids these complications +altogether, thus making nesting possible with a maximum depth of the number of +VFS threads. Of course, this comes at the cost of having more VND driver +processes; in order to avoid this cost in the common case, driver instances are +dynamically started and stopped by vndconfig(8). + +dupfrom(2) instead of openas(2): Compared to the NetBSD interface, the MINIX3 +VND API requires that the user program configuring a device pass in a file +descriptor in the vnd_ioctl structure instead of a pointer to a path name. +While binary compatibility with NetBSD would be impossible anyway (MINIX3 can +not support pointers in IOCTL data structures), providing a path name buffer +would be closer to what NetBSD does. There are two reasons behind the choice to +pass in a file descriptor instead. First, performing an open(2)-like call as +a driver backcall is tricky in terms of avoiding deadlocks in VFS, since it +would by nature violate the VFS locking order. On top of that, special +provisions would have to be added to support opening a file in the context of +another process so that chrooted processes would be supported, for example. +In contrast, copying a file descriptor to a remote process is relatively easy +because there is only one potential deadlock case to cover - that of the given +file descriptor identifying the VFS filp object used to control the very same +device - and VFS need only implement a procedure that very much resembles +sending a file descriptor across a UNIX domain socket. Second, since passing a +file descriptor is effectively passing an object capability, it is easier to +improve the isolation of the VND drivers in the future, as described below. + +No separate control device: The driver uses the same minor (block) device for +configuration and for actual (whole-disk) I/O, instead of exposing a separate +device that exists only for the purpose of configuring the device. The reason +for this is that such a control device simply does not fit the NetBSD +opendisk(3) API. While MINIX3 may at some point implement support for NetBSD's +notion of raw devices, such raw devices are still expected to support I/O, and +that means they cannot be control-only. In this regard, it should be mentioned +that the entire VND infrastructure relies on block caches being invalidated +properly upon (un)configuration of VND units, and that such invalidation +(through the REQ_FLUSH file system request) is currently initiated only by +closing block devices. Support for configuration or I/O through character +devices would thus require more work on that side first. In any case, the +primary downside of not having a separate control device is that handling +access permissions on device open is a bit of a hack in order to keep the +MINIX3 userland happy. + + +FUTURE IMPROVEMENTS + +Currently, the VND driver instances are run as root just and only because the +dupfrom(2) call requires root. Obviously, nonroot user processes should never +be able to copy file descriptors from arbitrary processes, and thus, some +security check is required there. However, an access control list for VFS calls +would be a much better solution: in that case, VND driver processes can be +given exclusive rights to the use of the dupfrom(2) call, while they can be +given a normal driver UID at the same time. + +In MINIX3's dependability model, drivers are generally not considered to be +malicious. However, the VND case is interesting because it is possible to +isolate individual driver instances to the point of actual "least authority". +The dupfrom(2) call currently allows any file descriptor to be copied, but it +would be possible to extend the scheme to let user processes (and vndconfig(8) +in particular) mark the file descriptors that may be the target of a dupfrom(2) +call. One of several schemes may be implemented in VFS for this purpose. For +example, each process could be allowed to mark one of its file descriptors as +"copyable" using a new VFS call, and VFS would then allow dupfrom(2) only on a +"copyable" file descriptor from a process blocked on a call to the driver that +invoked dupfrom(2). This approach precludes hiding a VND driver behind a RAID +or FBD (etc) driver, but more sophisticated approaches can solve that as well. +Regardless of the scheme, the end result would be a situation where the VND +drivers are strictly limited to operating on the resources given to them. diff --git a/drivers/vnd/vnd.c b/drivers/vnd/vnd.c new file mode 100644 index 000000000..422ce0088 --- /dev/null +++ b/drivers/vnd/vnd.c @@ -0,0 +1,601 @@ +/* VNode Disk driver, by D.C. van Moolenbroek */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define VND_BUF_SIZE 65536 + +static struct { + int fd; /* file descriptor for the underlying file */ + int openct; /* number of times the device is open */ + int exiting; /* exit after the last close? */ + int rdonly; /* is the device set up read-only? */ + dev_t dev; /* device on which the file resides */ + ino_t ino; /* inode number of the file */ + struct device part[DEV_PER_DRIVE]; /* partition bases and sizes */ + struct device subpart[SUB_PER_DRIVE]; /* same for subpartitions */ + struct part_geom geom; /* geometry information */ + char *buf; /* intermediate I/O transfer buffer */ +} state; + +static unsigned int instance; + +static int vnd_open(devminor_t, int); +static int vnd_close(devminor_t); +static int vnd_transfer(devminor_t, int, u64_t, endpoint_t, iovec_t *, + unsigned int, int); +static int vnd_ioctl(devminor_t, unsigned long, endpoint_t, cp_grant_id_t, + endpoint_t); +static struct device *vnd_part(devminor_t); +static void vnd_geometry(devminor_t, struct part_geom *); + +static struct blockdriver vnd_dtab = { + .bdr_type = BLOCKDRIVER_TYPE_DISK, + .bdr_open = vnd_open, + .bdr_close = vnd_close, + .bdr_transfer = vnd_transfer, + .bdr_ioctl = vnd_ioctl, + .bdr_part = vnd_part, + .bdr_geometry = vnd_geometry +}; + +/* + * Parse partition tables. + */ +static void +vnd_partition(void) +{ + memset(state.part, 0, sizeof(state.part)); + memset(state.subpart, 0, sizeof(state.subpart)); + + state.part[0].dv_size = state.geom.size; + + partition(&vnd_dtab, 0, P_PRIMARY, FALSE /*atapi*/); +} + +/* + * Open a device. + */ +static int +vnd_open(devminor_t minor, int access) +{ + /* No sub/partition devices are available before initialization. */ + if (state.fd == -1 && minor != 0) + return ENXIO; + else if (state.fd != -1 && vnd_part(minor) == NULL) + return ENXIO; + + /* + * If the device either is not configured or configured as read-only, + * block open calls that request write permission. This is what user- + * land expects, although it does mean that vnconfig(8) has to open the + * device as read-only in order to (un)configure it. + */ + if (access & BDEV_W_BIT) { + if (state.fd == -1) + return ENXIO; + if (state.rdonly) + return EACCES; + } + + /* + * Userland expects that if the device is opened after having been + * fully closed, partition tables are (re)parsed. Since we already + * parse partition tables upon initialization, we could skip this for + * the first open, but that would introduce more state. + */ + if (state.fd != -1 && state.openct == 0) { + vnd_partition(); + + /* Make sure our target device didn't just disappear. */ + if (vnd_part(minor) == NULL) + return ENXIO; + } + + state.openct++; + + return OK; +} + +/* + * Close a device. + */ +static int +vnd_close(devminor_t UNUSED(minor)) +{ + if (state.openct == 0) { + printf("VND%u: closing already-closed device\n", instance); + return EINVAL; + } + + state.openct--; + + if (state.exiting) + blockdriver_terminate(); + + return OK; +} + +/* + * Copy a number of bytes from or to the caller, to or from the intermediate + * buffer. If the given endpoint is SELF, a local memory copy must be made. + */ +static int +vnd_copy(iovec_s_t *iov, size_t iov_off, size_t bytes, endpoint_t endpt, + int do_write) +{ + struct vscp_vec vvec[SCPVEC_NR], *vvp; + size_t off, chunk; + int count; + char *ptr; + + assert(bytes > 0 && bytes <= VND_BUF_SIZE); + + vvp = vvec; + count = 0; + + for (off = 0; off < bytes; off += chunk) { + chunk = MIN(bytes - off, iov->iov_size - iov_off); + + if (endpt == SELF) { + ptr = (char *) iov->iov_grant + iov_off; + + if (do_write) + memcpy(&state.buf[off], ptr, chunk); + else + memcpy(ptr, &state.buf[off], chunk); + } else { + assert(count < SCPVEC_NR); /* SCPVEC_NR >= NR_IOREQS */ + + vvp->v_from = do_write ? endpt : SELF; + vvp->v_to = do_write ? SELF : endpt; + vvp->v_bytes = chunk; + vvp->v_gid = iov->iov_grant; + vvp->v_offset = iov_off; + vvp->v_addr = (vir_bytes) &state.buf[off]; + + vvp++; + count++; + } + + iov_off += chunk; + if (iov_off == iov->iov_size) { + iov++; + iov_off = 0; + } + } + + if (endpt != SELF) + return sys_vsafecopy(vvec, count); + else + return OK; +} + +/* + * Advance the given I/O vector, and the offset into its first element, by the + * given number of bytes. + */ +static iovec_s_t * +vnd_advance(iovec_s_t *iov, size_t *iov_offp, size_t bytes) +{ + size_t iov_off; + + assert(bytes > 0 && bytes <= VND_BUF_SIZE); + + iov_off = *iov_offp; + + while (bytes > 0) { + if (bytes >= iov->iov_size - iov_off) { + bytes -= iov->iov_size - iov_off; + iov++; + iov_off = 0; + } else { + iov_off += bytes; + bytes = 0; + } + } + + *iov_offp = iov_off; + return iov; +} + +/* + * Perform data transfer on the selected device. + */ +static int +vnd_transfer(devminor_t minor, int do_write, u64_t position, + endpoint_t endpt, iovec_t *iovt, unsigned int nr_req, int flags) +{ + struct device *dv; + iovec_s_t *iov; + size_t off, chunk, bytes, iov_off; + ssize_t r; + unsigned int i; + + iov = (iovec_s_t *) iovt; + + if (state.fd == -1 || (dv = vnd_part(minor)) == NULL) + return ENXIO; + + /* Prevent write operations on devices opened as write-only. */ + if (do_write && state.rdonly) + return EACCES; + + /* Determine the total number of bytes to transfer. */ + if (position >= dv->dv_size) + return 0; + + bytes = 0; + + for (i = 0; i < nr_req; i++) { + if (iov[i].iov_size == 0 || iov[i].iov_size > LONG_MAX) + return EINVAL; + bytes += iov[i].iov_size; + if (bytes > LONG_MAX) + return EINVAL; + } + + if (bytes > dv->dv_size - position) + bytes = dv->dv_size - position; + + position += dv->dv_base; + + /* Perform the actual transfer, in chunks if necessary. */ + iov_off = 0; + + for (off = 0; off < bytes; off += chunk) { + chunk = MIN(bytes - off, VND_BUF_SIZE); + + assert((unsigned int) (iov - (iovec_s_t *) iovt) < nr_req); + + /* For reads, read in the data for the chunk; possibly less. */ + if (!do_write) { + chunk = r = pread64(state.fd, state.buf, chunk, + position); + + if (r < 0) { + printf("VND%u: pread failed (%d)\n", instance, + -errno); + return -errno; + } + if (r == 0) + break; + } + + /* Copy the data for this chunk from or to the caller. */ + if ((r = vnd_copy(iov, iov_off, chunk, endpt, do_write)) < 0) { + printf("VND%u: data copy failed (%d)\n", instance, r); + return r; + } + + /* For writes, write the data to the file; possibly less. */ + if (do_write) { + chunk = r = pwrite64(state.fd, state.buf, chunk, + position); + + if (r <= 0) { + if (r < 0) + r = -errno; + printf("VND%u: pwrite failed (%d)\n", instance, + r); + return (r < 0) ? r : EIO; + } + } + + /* Move ahead on the I/O vector and the file position. */ + iov = vnd_advance(iov, &iov_off, chunk); + + position += chunk; + } + + /* If force-write is requested, flush the underlying file to disk. */ + if (do_write && (flags & BDEV_FORCEWRITE)) + fsync(state.fd); + + /* Return the number of bytes transferred. */ + return off; +} + +/* + * Initialize the size and geometry for the device and any partitions. If the + * user provided a geometry, this will be used; otherwise, a geometry will be + * computed. + */ +static int +vnd_layout(u64_t size, struct vnd_ioctl *vnd) +{ + u64_t sectors; + + state.geom.base = 0ULL; + + if (vnd->vnd_flags & VNDIOF_HASGEOM) { + /* + * The geometry determines the accessible part of the file. + * The resulting size must not exceed the file size. + */ + state.geom.cylinders = vnd->vnd_geom.vng_ncylinders; + state.geom.heads = vnd->vnd_geom.vng_ntracks; + state.geom.sectors = vnd->vnd_geom.vng_nsectors; + + state.geom.size = (u64_t) state.geom.cylinders * + state.geom.heads * state.geom.sectors * + vnd->vnd_geom.vng_secsize; + if (state.geom.size == 0 || state.geom.size > size) + return EINVAL; + } else { + sectors = size / SECTOR_SIZE; + state.geom.size = sectors * SECTOR_SIZE; + + if (sectors >= 32 * 64) { + state.geom.cylinders = sectors / (32 * 64); + state.geom.heads = 64; + state.geom.sectors = 32; + } else { + state.geom.cylinders = sectors; + state.geom.heads = 1; + state.geom.sectors = 1; + } + } + + /* + * Parse partition tables immediately, so that (sub)partitions can be + * opened right away. The first open will perform the same procedure, + * but that is only necessary to match userland expectations. + */ + vnd_partition(); + + return OK; +} + +/* + * Process I/O control requests. + */ +static int +vnd_ioctl(devminor_t UNUSED(minor), unsigned long request, endpoint_t endpt, + cp_grant_id_t grant, endpoint_t user_endpt) +{ + struct vnd_ioctl vnd; + struct vnd_user vnu; + struct stat st; + int r; + + switch (request) { + case VNDIOCSET: + /* + * The VND must not be busy. Note that the caller has the + * device open to perform the IOCTL request. + */ + if (state.fd != -1 || state.openct != 1) + return EBUSY; + + if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &vnd, + sizeof(vnd))) != OK) + return r; + + /* + * Issue a special VFS backcall that copies a file descriptor + * to the current process, from the user process ultimately + * making the IOCTL call. The result is either a newly + * allocated file descriptor or an error. + */ + if ((state.fd = dupfrom(user_endpt, vnd.vnd_fildes)) == -1) + return -errno; + + /* The target file must be regular. */ + if (fstat(state.fd, &st) == -1) { + printf("VND%u: fstat failed (%d)\n", instance, -errno); + r = -errno; + } + if (r == OK && !S_ISREG(st.st_mode)) + r = EINVAL; + + /* + * Allocate memory for an intermediate I/O transfer buffer. In + * order to save on memory in the common case, the buffer is + * only allocated when the vnd is in use. We use mmap instead + * of malloc to allow the memory to be actually freed later. + */ + if (r == OK) { + state.buf = minix_mmap(NULL, VND_BUF_SIZE, PROT_READ | + PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); + if (state.buf == MAP_FAILED) + r = ENOMEM; + } + + if (r != OK) { + close(state.fd); + state.fd = -1; + return r; + } + + /* Set various device state fields. */ + state.dev = st.st_dev; + state.ino = st.st_ino; + state.rdonly = !!(vnd.vnd_flags & VNDIOF_READONLY); + + r = vnd_layout(st.st_size, &vnd); + + /* Upon success, return the device size to userland. */ + if (r == OK) { + vnd.vnd_size = state.geom.size; + + r = sys_safecopyto(endpt, grant, 0, (vir_bytes) &vnd, + sizeof(vnd)); + } + + if (r != OK) { + minix_munmap(state.buf, VND_BUF_SIZE); + close(state.fd); + state.fd = -1; + } + + return r; + + case VNDIOCCLR: + /* The VND can only be cleared if it has been configured. */ + if (state.fd == -1) + return ENXIO; + + if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &vnd, + sizeof(vnd))) != OK) + return r; + + /* The caller has the device open to do the IOCTL request. */ + if (!(vnd.vnd_flags & VNDIOF_FORCE) && state.openct != 1) + return EBUSY; + + /* + * Close the associated file descriptor immediately, but do not + * allow reuse until the device has been closed by the other + * users. + */ + minix_munmap(state.buf, VND_BUF_SIZE); + close(state.fd); + state.fd = -1; + + return OK; + + case VNDIOCGET: + /* + * We need not copy in the given structure. It would contain + * the requested unit number, but each driver instance provides + * only one unit anyway. + */ + + memset(&vnu, 0, sizeof(vnu)); + + vnu.vnu_unit = instance; + + /* Leave these fields zeroed if the device is not in use. */ + if (state.fd != -1) { + vnu.vnu_dev = state.dev; + vnu.vnu_ino = state.ino; + } + + return sys_safecopyto(endpt, grant, 0, (vir_bytes) &vnu, + sizeof(vnu)); + + case DIOCOPENCT: + return sys_safecopyto(endpt, grant, 0, + (vir_bytes) &state.openct, sizeof(state.openct)); + + case DIOCFLUSH: + if (state.fd == -1) + return ENXIO; + + fsync(state.fd); + + return OK; + } + + return ENOTTY; +} + +/* + * Return a pointer to the partition structure for the given minor device. + */ +static struct device * +vnd_part(devminor_t minor) +{ + if (minor >= 0 && minor < DEV_PER_DRIVE) + return &state.part[minor]; + else if ((unsigned int) (minor -= MINOR_d0p0s0) < SUB_PER_DRIVE) + return &state.subpart[minor]; + else + return NULL; +} + +/* + * Return geometry information. + */ +static void +vnd_geometry(devminor_t UNUSED(minor), struct part_geom *part) +{ + part->cylinders = state.geom.cylinders; + part->heads = state.geom.heads; + part->sectors = state.geom.sectors; +} + +/* + * Initialize the device. + */ +static int +vnd_init(int UNUSED(type), sef_init_info_t *UNUSED(info)) +{ + long v; + + /* + * No support for crash recovery. The driver would have no way to + * reacquire the file descriptor for the target file. + */ + + /* + * The instance number is used for two purposes: reporting errors, and + * returning the proper unit number to userland in VNDIOCGET calls. + */ + v = 0; + (void) env_parse("instance", "d", 0, &v, 0, 255); + instance = (unsigned int) v; + + state.openct = 0; + state.exiting = FALSE; + state.fd = -1; + + return OK; +} + +/* + * Process an incoming signal. + */ +static void +vnd_signal(int signo) +{ + + /* In case of a termination signal, initiate driver shutdown. */ + if (signo != SIGTERM) + return; + + state.exiting = TRUE; + + /* Keep running until the device has been fully closed. */ + if (state.openct == 0) + blockdriver_terminate(); +} + +/* + * Set callbacks and initialize the System Event Framework (SEF). + */ +static void +vnd_startup(void) +{ + + /* Register init and signal callbacks. */ + sef_setcb_init_fresh(vnd_init); + sef_setcb_signal_handler(vnd_signal); + + /* Let SEF perform startup. */ + sef_startup(); +} + +/* + * Driver task. + */ +int +main(int argc, char **argv) +{ + + /* Initialize the driver. */ + env_setargs(argc, argv); + vnd_startup(); + + /* Process requests until shutdown. */ + blockdriver_task(&vnd_dtab); + + return 0; +} diff --git a/etc/system.conf b/etc/system.conf index 950cd6b08..591b70962 100644 --- a/etc/system.conf +++ b/etc/system.conf @@ -684,3 +684,11 @@ service fbd at_wini ; }; + +service vnd +{ + ipc + SYSTEM VFS RS VM + ; + uid 0; # only for dupfrom(2) +}; diff --git a/include/minix/dmap.h b/include/minix/dmap.h index 087eac85e..c7b59d11e 100644 --- a/include/minix/dmap.h +++ b/include/minix/dmap.h @@ -71,7 +71,7 @@ enum dev_style { STYLE_NDEV, STYLE_DEV, STYLE_TTY, STYLE_CTTY }; #define BMP085B1S77_MAJOR 53 /* 53 = /dev/bmp085b1s77 (bmp085) */ #define BMP085B2S77_MAJOR 54 /* 54 = /dev/bmp085b2s77 (bmp085) */ #define BMP085B3S77_MAJOR 55 /* 55 = /dev/bmp085b3s77 (bmp085) */ - + /* 56-63 = /dev/vnd[0-7] (vnd) */ /* Minor device numbers for memory driver. */ # define RAM_DEV_OLD 0 /* minor device for /dev/ram */ diff --git a/include/minix/ioctl.h b/include/minix/ioctl.h index f679fff01..f85c59f27 100644 --- a/include/minix/ioctl.h +++ b/include/minix/ioctl.h @@ -34,6 +34,7 @@ _IOC_IN) #define _IORW(x,y,t) ((x << 8) | y | ((sizeof(t) & _IOCPARM_MASK) << 16) |\ _IOC_INOUT) +#define _IOWR(x,y,t) _IORW(x,y,t) /* NetBSD compatibility */ #define _IOW_BIG(y,t) (y | ((sizeof(t) & _IOCPARM_MASK_BIG) << 8) \ | _IOC_IN | _IOC_BIG) diff --git a/lib/libminc/Makefile b/lib/libminc/Makefile index 2530ed938..99ba07153 100644 --- a/lib/libminc/Makefile +++ b/lib/libminc/Makefile @@ -143,12 +143,13 @@ CPPFLAGS.${i}+= -I${LIBCDIR}/locale .endfor # Import from sys-minix -.for i in access.c brk.c close.c environ.c execve.c fork.c \ +.for i in access.c brk.c close.c environ.c execve.c fork.c fsync.c \ getgid.c getpid.c geteuid.c getuid.c gettimeofday.c getvfsstat.c \ - link.c loadname.c _mcontext.c mknod.c mmap.c nanosleep.c open.c \ - read.c reboot.c sbrk.c select.c setuid.c sigprocmask.c stack_utils.c \ - stat.c stime.c syscall.c _ucontext.c umask.c unlink.c waitpid.c \ - brksize.S _ipc.S _senda.S ucontext.S mmap.c init.c + init.c link.c loadname.c lseek.c lseek64.c _mcontext.c mknod.c \ + mmap.c nanosleep.c open.c pread.c pwrite.c read.c reboot.c sbrk.c \ + select.c setuid.c sigprocmask.c stack_utils.c stat.c stime.c \ + syscall.c _ucontext.c umask.c unlink.c waitpid.c write.c \ + brksize.S _ipc.S _senda.S ucontext.S .PATH.c: ${LIBCDIR}/sys-minix .PATH.S: ${ARCHDIR}/sys-minix SRCS+= ${i} diff --git a/releasetools/nbsd_ports b/releasetools/nbsd_ports index e55078b9e..fda6c052c 100644 --- a/releasetools/nbsd_ports +++ b/releasetools/nbsd_ports @@ -114,6 +114,7 @@ 2013/04/23 12:00:00,sys/dev/i2c/Makefile 2013/04/23 12:00:00,sys/dev/i2c/i2c_io.h 2013/07/22 12:00:00,sys/dev/videomode +2013/07/31 12:00:00,sys/dev/vndvar.h 2012/01/16 18:47:57,sys/lib/libsa 2012/10/17 12:00:00,sys/lib/libz 2012/10/17 12:00:00,sys/Makefile diff --git a/sys/dev/Makefile b/sys/dev/Makefile index 00d1bc537..ed2838983 100644 --- a/sys/dev/Makefile +++ b/sys/dev/Makefile @@ -21,6 +21,8 @@ INCSDIR= /usr/include/dev # Only install includes which are used by userland INCS= biovar.h ccdvar.h cgdvar.h fssvar.h keylock.h kttcpio.h lockstat.h \ md.h vndvar.h +.else +INCS= vndvar.h .endif .include diff --git a/sys/dev/vndvar.h b/sys/dev/vndvar.h new file mode 100644 index 000000000..cc06ce867 --- /dev/null +++ b/sys/dev/vndvar.h @@ -0,0 +1,218 @@ +/* $NetBSD: vndvar.h,v 1.31 2011/06/29 09:12:42 hannken Exp $ */ + +/*- + * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Jason R. Thorpe. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 1988 University of Utah. + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: Utah $Hdr: fdioctl.h 1.1 90/07/09$ + * + * @(#)vnioctl.h 8.1 (Berkeley) 6/10/93 + */ + +#ifndef _SYS_DEV_VNDVAR_H_ +#define _SYS_DEV_VNDVAR_H_ + +#ifndef __minix +#include +#endif + +/* + * Vnode disk pseudo-geometry information. + */ +struct vndgeom { + u_int32_t vng_secsize; /* # bytes per sector */ + u_int32_t vng_nsectors; /* # data sectors per track */ + u_int32_t vng_ntracks; /* # tracks per cylinder */ + u_int32_t vng_ncylinders; /* # cylinders per unit */ +}; + +/* + * Ioctl definitions for file (vnode) disk pseudo-device. + */ +struct vnd_ioctl { +#ifndef __minix + char *vnd_file; /* pathname of file to mount */ +#else + int vnd_fildes; /* file descriptor of file to mount */ +#endif + int vnd_flags; /* flags; see below */ + struct vndgeom vnd_geom; /* geometry to emulate */ +#ifndef __minix + unsigned int vnd_osize; /* (returned) size of disk */ +#endif + uint64_t vnd_size; /* (returned) size of disk */ +}; + +/* vnd_flags */ +#define VNDIOF_HASGEOM 0x01 /* use specified geometry */ +#define VNDIOF_READONLY 0x02 /* as read-only device */ +#define VNDIOF_FORCE 0x04 /* force close */ + +#ifndef __minix +#ifdef _KERNEL + +struct vnode; + +/* + * A vnode disk's state information. + */ +struct vnd_softc { + device_t sc_dev; + int sc_flags; /* flags */ + size_t sc_size; /* size of vnd */ + struct vnode *sc_vp; /* vnode */ + kauth_cred_t sc_cred; /* credentials */ + int sc_maxactive; /* max # of active requests */ + struct bufq_state *sc_tab; /* transfer queue */ + int sc_active; /* number of active transfers */ + struct disk sc_dkdev; /* generic disk device info */ + struct vndgeom sc_geom; /* virtual geometry */ + struct pool sc_vxpool; /* vndxfer pool */ + struct pool sc_vbpool; /* vndbuf pool */ + struct lwp *sc_kthread; /* kernel thread */ + u_int32_t sc_comp_blksz; /* precompressed block size */ + u_int32_t sc_comp_numoffs;/* count of compressed block offsets */ + u_int64_t *sc_comp_offsets;/* file idx's to compressed blocks */ + unsigned char *sc_comp_buff; /* compressed data buffer */ + unsigned char *sc_comp_decombuf;/* decompressed data buffer */ + int32_t sc_comp_buffblk;/*current decompressed block */ + z_stream sc_comp_stream;/* decompress descriptor */ +}; +#endif + +/* sc_flags */ +#define VNF_INITED 0x001 /* unit has been initialized */ +#define VNF_WLABEL 0x002 /* label area is writable */ +#define VNF_LABELLING 0x004 /* unit is currently being labelled */ +#define VNF_WANTED 0x008 /* someone is waiting to obtain a lock */ +#define VNF_LOCKED 0x010 /* unit is locked */ +#define VNF_READONLY 0x020 /* unit is read-only */ +#define VNF_KLABEL 0x040 /* keep label on close */ +#define VNF_VLABEL 0x080 /* label is valid */ +#define VNF_KTHREAD 0x100 /* thread is running */ +#define VNF_VUNCONF 0x200 /* device is unconfiguring */ +#define VNF_COMP 0x400 /* file is compressed */ +#define VNF_CLEARING 0x800 /* unit is being torn down */ +#define VNF_USE_VN_RDWR 0x1000 /* have to use vn_rdwr() */ + +/* structure of header in a compressed file */ +struct vnd_comp_header +{ + char preamble[128]; + u_int32_t block_size; + u_int32_t num_blocks; +}; +#endif + +/* + * A simple structure for describing which vnd units are in use. + */ + +struct vnd_user { + int vnu_unit; /* which vnd unit */ + dev_t vnu_dev; /* file is on this device... */ + ino_t vnu_ino; /* ...at this inode */ +}; + +/* + * Before you can use a unit, it must be configured with VNDIOCSET. + * The configuration persists across opens and closes of the device; + * an VNDIOCCLR must be used to reset a configuration. An attempt to + * VNDIOCSET an already active unit will return EBUSY. + */ +#define VNDIOCSET _IOWR('F', 0, struct vnd_ioctl) /* enable disk */ +#define VNDIOCCLR _IOW('F', 1, struct vnd_ioctl) /* disable disk */ +#define VNDIOCGET _IOWR('F', 3, struct vnd_user) /* get list */ + +#ifdef _KERNEL +/* + * Everything else is kernel-private, mostly exported for compat/netbsd32. + * + * NetBSD 3.0 had a 32-bit value for vnu_ino. + * + * NetBSD 5.0 had a 32-bit value for vnu_dev, and vnd_size. + */ +struct vnd_user30 { + int vnu_unit; /* which vnd unit */ + uint32_t vnu_dev; /* file is on this device... */ + uint32_t vnu_ino; /* ...at this inode */ +}; +#define VNDIOCGET30 _IOWR('F', 2, struct vnd_user30) /* get list */ + +struct vnd_user50 { + int vnu_unit; /* which vnd unit */ + uint32_t vnu_dev; /* file is on this device... */ + ino_t vnu_ino; /* ...at this inode */ +}; +#define VNDIOCGET50 _IOWR('F', 3, struct vnd_user50) /* get list */ + +struct vnd_ioctl50 { + char *vnd_file; /* pathname of file to mount */ + int vnd_flags; /* flags; see below */ + struct vndgeom vnd_geom; /* geometry to emulate */ + unsigned int vnd_size; /* (returned) size of disk */ +}; +#define VNDIOCSET50 _IOWR('F', 0, struct vnd_ioctl50) +#define VNDIOCCLR50 _IOW('F', 1, struct vnd_ioctl50) + +#endif /* _KERNEL */ + +#endif /* _SYS_DEV_VNDVAR_H_ */ diff --git a/sys/sys/ioctl.h b/sys/sys/ioctl.h index ddd097926..b85abb839 100644 --- a/sys/sys/ioctl.h +++ b/sys/sys/ioctl.h @@ -23,6 +23,7 @@ #include /* 'b' */ #include /* 'B' */ #include /* 'V' */ +#include /* 'F' */ #if defined(_NETBSD_SOURCE) #define TIOCDRAIN TCDRAIN