.include <bsd.own.mk>
+.if ${BUILDAVFS} == "yes"
+VFS= "avfs"
+PFS= "apfs"
+.else
+VFS= "vfs"
+PFS= "pfs"
+.endif
+
.if ${MKIMAGEONLY} == "yes"
-SUBDIR= ds init mfs pfs pm rs sched vfs vm
+SUBDIR= ds init mfs ${PFS} pm rs sched ${VFS} vm
.else
SUBDIR= ds ext2 hgfs inet init ipc is iso9660fs \
- mfs pfs pm procfs rs sched vfs vm devman
+ mfs ${PFS} pm procfs rs sched ${VFS} vm devman
.endif
--- /dev/null
+# Makefile for Pipe File System (PFS)
+PROG= pfs
+SRCS= open.c table.c inode.c main.c super.c link.c \
+ buffer.c read.c misc.c mount.c utility.c stadir.c \
+ uds.c dev_uds.c
+
+DPADD+= ${LIBDRIVER} ${LIBSYS}
+LDADD+= -ldriver -lsys
+
+MAN=
+
+BINDIR?= /usr/sbin
+
+.include <minix.bootprog.mk>
--- /dev/null
+#ifndef __PFS_BUF_H__
+#define __PFS_BUF_H__
+
+/* Buffer (block) cache.
+ */
+
+struct buf {
+ /* Data portion of the buffer. */
+ char b_data[PIPE_BUF]; /* ordinary user data */
+
+ /* Header portion of the buffer. */
+ struct buf *b_next; /* used to link all free bufs in a chain */
+ struct buf *b_prev; /* used to link all free bufs the other way */
+ ino_t b_num; /* inode number on minor device */
+ dev_t b_dev; /* major | minor device where block resides */
+ int b_bytes; /* Number of bytes allocated in bp */
+ int b_count; /* Number of users of this buffer */
+};
+
+/* A block is free if b_dev == NO_DEV. */
+
+
+EXTERN struct buf *front; /* points to least recently used free block */
+EXTERN struct buf *rear; /* points to most recently used free block */
+
+#endif
--- /dev/null
+#include "fs.h"
+#include "buf.h"
+#include "inode.h"
+#include <sys/types.h>
+#include <stdlib.h>
+#include <string.h>
+
+FORWARD _PROTOTYPE( struct buf *new_block, (dev_t dev, ino_t inum) );
+
+/*===========================================================================*
+ * buf_pool *
+ *===========================================================================*/
+PUBLIC void buf_pool(void)
+{
+/* Initialize the buffer pool. */
+
+ front = NULL;
+ rear = NULL;
+}
+
+
+
+/*===========================================================================*
+ * get_block *
+ *===========================================================================*/
+PUBLIC struct buf *get_block(dev_t dev, ino_t inum)
+{
+ struct buf *bp = front;
+
+ while(bp != NULL) {
+ if (bp->b_dev == dev && bp->b_num == inum) {
+ bp->b_count++;
+ return(bp);
+ }
+ bp = bp->b_next;
+ }
+
+ /* Buffer was not found. Try to allocate a new one */
+ return new_block(dev, inum);
+}
+
+
+/*===========================================================================*
+ * new_block *
+ *===========================================================================*/
+PRIVATE struct buf *new_block(dev_t dev, ino_t inum)
+{
+/* Allocate a new buffer and add it to the double linked buffer list */
+ struct buf *bp;
+
+ bp = malloc(sizeof(struct buf));
+ if (bp == NULL) {
+ err_code = ENOSPC;
+ return(NULL);
+ }
+ bp->b_num = inum;
+ bp->b_dev = dev;
+ bp->b_bytes = 0;
+ bp->b_count = 1;
+ memset(bp->b_data, 0 , PIPE_BUF);
+
+ /* Add at the end of the buffer */
+ if (front == NULL) { /* Empty list? */
+ front = bp;
+ bp->b_prev = NULL;
+ } else {
+ rear->b_next = bp;
+ bp->b_prev = rear;
+ }
+ bp->b_next = NULL;
+ rear = bp;
+
+ return(bp);
+}
+
+
+/*===========================================================================*
+ * put_block *
+ *===========================================================================*/
+PUBLIC void put_block(dev_t dev, ino_t inum)
+{
+ struct buf *bp;
+
+ bp = get_block(dev, inum);
+ if (bp == NULL) return; /* We didn't find the block. Nothing to put. */
+
+ bp->b_count--; /* Compensate for above 'get_block'. */
+ if (--bp->b_count > 0) return;
+
+ /* Cut bp out of the loop */
+ if (bp->b_prev == NULL)
+ front = bp->b_next;
+ else
+ bp->b_prev->b_next = bp->b_next;
+
+ if (bp->b_next == NULL)
+ rear = bp->b_prev;
+ else
+ bp->b_next->b_prev = bp->b_prev;
+
+ /* Buffer administration is done. Now it's safe to free up bp. */
+ free(bp);
+}
--- /dev/null
+#ifndef __PFS_CONST_H__
+#define __PFS_CONST_H__
+
+#define NR_INODES 256 /* # slots in "in core" inode table */
+
+/* Size of descriptor table for unix domain sockets. This should be
+ * equal to the maximum number of minor devices (currently 256).
+ */
+#define NR_FDS 256
+
+#define INODE_HASH_LOG2 7 /* 2 based logarithm of the inode hash size */
+#define INODE_HASH_SIZE ((unsigned long)1<<INODE_HASH_LOG2)
+#define INODE_HASH_MASK (((unsigned long)1<<INODE_HASH_LOG2)-1)
+
+
+/* The type of sizeof may be (unsigned) long. Use the following macro for
+ * taking the sizes of small objects so that there are no surprises like
+ * (small) long constants being passed to routines expecting an int.
+ */
+#define usizeof(t) ((unsigned) sizeof(t))
+
+/* Miscellaneous constants */
+#define INVAL_UID ((uid_t) -1) /* Invalid user ID */
+#define INVAL_GID ((gid_t) -1) /* Invalid group ID */
+#define NORMAL 0 /* forces get_block to do disk read */
+#define NO_READ 1 /* prevents get_block from doing disk read */
+#define PREFETCH 2 /* tells get_block not to read or mark dev */
+
+#define NO_BIT ((bit_t) 0) /* returned by alloc_bit() to signal failure */
+
+#define ATIME 002 /* set if atime field needs updating */
+#define CTIME 004 /* set if ctime field needs updating */
+#define MTIME 010 /* set if mtime field needs updating */
+
+#define FS_BITMAP_CHUNKS(b) ((b)/usizeof (bitchunk_t))/* # map chunks/blk */
+#define FS_BITCHUNK_BITS (usizeof(bitchunk_t) * CHAR_BIT)
+#define FS_BITS_PER_BLOCK(b) (FS_BITMAP_CHUNKS(b) * FS_BITCHUNK_BITS)
+
+#define FS_CALL_VEC_SIZE 31
+#define DEV_CALL_VEC_SIZE 25
+
+#endif
--- /dev/null
+/*
+ * Unix Domain Sockets Implementation (PF_UNIX, PF_LOCAL)
+ * This code handles requests generated by operations on /dev/uds
+ *
+ * The entry points into this file are...
+ *
+ * uds_open: handles the open(2) syscall on /dev/uds
+ * uds_close: handles the close(2) syscall on /dev/uds
+ * uds_select: handles the select(2) syscall on /dev/uds
+ * uds_read: handles the read(2) syscall on /dev/uds
+ * uds_write: handles the write(2) syscall on /dev/uds
+ * uds_ioctl: handles the ioctl(2) syscall on /dev/uds
+ * uds_status: handles status requests.
+ * uds_cancel: handles cancelled syscalls.
+ *
+ * Also See...
+ *
+ * table.c, uds.c, uds.h
+ *
+ * Overview
+ *
+ * The interface to unix domain sockets is similar to the
+ * the interface to network sockets. There is a character
+ * device (/dev/uds) that uses STYLE_CLONE and this server
+ * is a 'driver' for that device.
+ */
+
+#define DEBUG 0
+
+#include "inc.h"
+#include "const.h"
+#include "glo.h"
+#include "uds.h"
+
+FORWARD _PROTOTYPE( int uds_perform_read, (int minor, endpoint_t m_source,
+ size_t size, int pretend));
+FORWARD _PROTOTYPE( int uds_perform_write, (int minor, endpoint_t m_source,
+ size_t size, int pretend));
+
+PUBLIC int uds_open(message *dev_m_in, message *dev_m_out)
+{
+ message fs_m_in, fs_m_out;
+ struct ucred ucred;
+ int rc, i;
+ int minor;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] uds_open() call_count=%d\n", uds_minor(dev_m_in),
+ ++call_count);
+ printf("Endpoint: 0x%x\n", dev_m_in->USER_ENDPT);
+#endif
+
+ /*
+ * Find a slot in the descriptor table for the new descriptor.
+ * The index of the descriptor in the table will be returned.
+ * Subsequent calls to read/write/close/ioctl/etc will use this
+ * minor number. The minor number must be different from the
+ * the /dev/uds device's minor number (currently 0).
+ */
+
+ minor = -1; /* to trap error */
+
+ for (i = 1; i < NR_FDS; i++) {
+ if (uds_fd_table[i].state == UDS_FREE) {
+ minor = i;
+ break;
+ }
+ }
+
+ if (minor == -1) {
+
+ /* descriptor table full */
+ uds_set_reply(dev_m_out, DEV_OPEN_REPL, dev_m_in->USER_ENDPT,
+ (cp_grant_id_t) dev_m_in->IO_GRANT, ENFILE);
+ return ENFILE;
+ }
+
+ /*
+ * We found a slot in uds_fd_table, now initialize the descriptor
+ */
+
+ /* mark this one as 'in use' so that it doesn't get assigned to
+ * another socket
+ */
+ uds_fd_table[minor].state = UDS_INUSE;
+
+ /* track the system call we are performing in case it gets cancelled */
+ uds_fd_table[minor].call_nr = dev_m_in->m_type;
+ uds_fd_table[minor].ioctl = 0;
+ uds_fd_table[minor].syscall_done = 0;
+
+ /* set the socket owner */
+ uds_fd_table[minor].owner = dev_m_in->USER_ENDPT;
+ uds_fd_table[minor].endpoint = dev_m_in->USER_ENDPT;
+
+ /* setup select(2) framework */
+ uds_fd_table[minor].selecting = 0;
+ uds_fd_table[minor].select_proc = 0;
+ uds_fd_table[minor].sel_ops_in = 0;
+ uds_fd_table[minor].sel_ops_out = 0;
+ uds_fd_table[minor].status_updated = 0;
+
+ /* initialize the data pointer (pos) to the start of the PIPE */
+ uds_fd_table[minor].pos = 0;
+
+ /* the PIPE is initially empty */
+ uds_fd_table[minor].size = 0;
+
+ /* the default for a new socket is to allow reading and writing.
+ * shutdown(2) will remove one or both flags.
+ */
+ uds_fd_table[minor].mode = S_IRUSR|S_IWUSR;
+
+ /* In libc socket(2) sets this to the actual value later with the
+ * NWIOSUDSTYPE ioctl().
+ */
+ uds_fd_table[minor].type = -1;
+
+ /* Clear the backlog by setting each entry to -1 */
+ for (i = 0; i < UDS_SOMAXCONN; i++) {
+ /* initially no connections are pending */
+ uds_fd_table[minor].backlog[i] = -1;
+ }
+
+ memset(&uds_fd_table[minor].ancillary_data, '\0', sizeof(struct
+ ancillary));
+ for (i = 0; i < OPEN_MAX; i++) {
+ uds_fd_table[minor].ancillary_data.fds[i] = -1;
+ }
+
+ /* default the size to UDS_SOMAXCONN */
+ uds_fd_table[minor].backlog_size = UDS_SOMAXCONN;
+
+ /* the socket isn't listening for incoming connections until
+ * listen(2) is called
+ */
+ uds_fd_table[minor].listening = 0;
+
+ /* initially the socket is not connected to a peer */
+ uds_fd_table[minor].peer = -1;
+
+ /* there isn't a child waiting to be accept(2)'d */
+ uds_fd_table[minor].child = -1;
+
+ /* initially the socket is not bound or listening on an address */
+ memset(&(uds_fd_table[minor].addr), '\0', sizeof(struct sockaddr_un));
+ memset(&(uds_fd_table[minor].source), '\0', sizeof(struct sockaddr_un));
+ memset(&(uds_fd_table[minor].target), '\0', sizeof(struct sockaddr_un));
+
+ /* Initially the socket isn't suspended. */
+ uds_fd_table[minor].suspended = UDS_NOT_SUSPENDED;
+
+ /* and the socket doesn't have an I/O grant initially */
+ uds_fd_table[minor].io_gr = (cp_grant_id_t) 0;
+
+ /* since there is no I/O grant it effectively has no size either */
+ uds_fd_table[minor].io_gr_size = 0;
+
+ /* The process isn't suspended so we don't flag it as revivable */
+ uds_fd_table[minor].ready_to_revive = 0;
+
+ /* get the effective user id and effective group id from the endpoint */
+ /* this is needed in the REQ_NEWNODE request to PFS. */
+ rc = getnucred(uds_fd_table[minor].endpoint, &ucred);
+ if (rc == -1) {
+ /* roll back the changes we made to the descriptor */
+ memset(&(uds_fd_table[minor]), '\0', sizeof(uds_fd_t));
+
+ /* likely error: invalid endpoint / proc doesn't exist */
+ uds_set_reply(dev_m_out, DEV_OPEN_REPL, dev_m_in->USER_ENDPT,
+ (cp_grant_id_t) dev_m_in->IO_GRANT, errno);
+ return errno;
+ }
+
+ /* Prepare Request to the FS side of PFS */
+
+ fs_m_in.m_type = REQ_NEWNODE;
+ fs_m_in.REQ_MODE = I_NAMED_PIPE;
+ fs_m_in.REQ_DEV = NO_DEV;
+ fs_m_in.REQ_UID = ucred.uid;
+ fs_m_in.REQ_GID = ucred.gid;
+
+ /* Request a new inode on the pipe file system */
+
+ rc = fs_newnode(&fs_m_in, &fs_m_out);
+ if (rc != OK) {
+ /* roll back the changes we made to the descriptor */
+ memset(&(uds_fd_table[minor]), '\0', sizeof(uds_fd_t));
+
+ /* likely error: get_block() failed */
+ uds_set_reply(dev_m_out, DEV_OPEN_REPL, dev_m_in->USER_ENDPT,
+ (cp_grant_id_t) dev_m_in->IO_GRANT, rc);
+ return rc;
+ }
+
+ /* Process the response */
+
+ uds_fd_table[minor].inode_nr = fs_m_out.RES_INODE_NR;
+
+ /* prepare the reply */
+
+ uds_fd_table[minor].syscall_done = 1;
+ uds_set_reply(dev_m_out, DEV_OPEN_REPL, dev_m_in->USER_ENDPT,
+ (cp_grant_id_t) dev_m_in->IO_GRANT, minor);
+ return minor;
+}
+
+PUBLIC int uds_close(message *dev_m_in, message *dev_m_out)
+{
+ int minor;
+ message fs_m_in, fs_m_out;
+ int rc;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] uds_close() call_count=%d\n", uds_minor(dev_m_in),
+ ++call_count);
+ printf("Endpoint: 0x%x\n", dev_m_in->USER_ENDPT);
+#endif
+
+ minor = uds_minor(dev_m_in);
+
+ if (uds_fd_table[minor].state != UDS_INUSE) {
+ /* attempted to close a socket that hasn't been opened --
+ * something is very wrong :(
+ */
+ uds_set_reply(dev_m_out, DEV_CLOSE_REPL, dev_m_in->USER_ENDPT,
+ (cp_grant_id_t) dev_m_in->IO_GRANT, EINVAL);
+ return EINVAL;
+ }
+
+ /* no need to track the syscall in case of cancellation. close() is
+ * atomic and can't be cancelled. no need to update the endpoint here,
+ * we won't be needing it to kill the socket
+ */
+
+ /* if the socket is connected, disconnect it */
+ if (uds_fd_table[minor].peer != -1) {
+
+ /* set peer of this peer to -1 */
+ uds_fd_table[uds_fd_table[minor].peer].peer = -1;
+
+ /* error to pass to peer */
+ uds_fd_table[uds_fd_table[minor].peer].err = ECONNRESET;
+
+ /* if peer was blocked on I/O revive peer */
+ if (uds_fd_table[uds_fd_table[minor].peer].suspended) {
+ int peer = uds_fd_table[minor].peer;
+
+ uds_fd_table[peer].ready_to_revive = 1;
+ uds_unsuspend(dev_m_in->m_source, peer);
+ }
+ }
+
+ if (uds_fd_table[minor].ancillary_data.nfiledes > 0) {
+ clear_fds(minor, &(uds_fd_table[minor].ancillary_data));
+ }
+
+ /* Prepare Request to the FS side of PFS */
+
+ fs_m_in.m_type = REQ_PUTNODE;
+ fs_m_in.REQ_INODE_NR = uds_fd_table[minor].inode_nr;
+ fs_m_in.REQ_COUNT = 1;
+
+ /* set the socket back to its original UDS_FREE state */
+ memset(&(uds_fd_table[minor]), '\0', sizeof(uds_fd_t));
+
+ /* Request the removal of the inode from the pipe file system */
+
+ rc = fs_putnode(&fs_m_in, &fs_m_out);
+ if (rc != OK) {
+ perror("fs_putnode");
+ /* likely error: get_block() failed */
+ return rc;
+ }
+
+ uds_set_reply(dev_m_out, DEV_CLOSE_REPL, dev_m_in->USER_ENDPT,
+ (cp_grant_id_t) dev_m_in->IO_GRANT, OK);
+ return OK;
+}
+
+PUBLIC int uds_select(message *dev_m_in, message *dev_m_out)
+{
+ int i, bytes;
+ int minor;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] uds_select() call_count=%d\n", uds_minor(dev_m_in),
+ ++call_count);
+ printf("Endpoint: 0x%x\n", dev_m_in->USER_ENDPT);
+#endif
+
+ minor = uds_minor(dev_m_in);
+
+ if (uds_fd_table[minor].state != UDS_INUSE) {
+
+ /* attempted to close a socket that hasn't been opened --
+ * something is very wrong :(
+ */
+
+ uds_sel_reply(dev_m_out, DEV_SEL_REPL1, minor, EINVAL);
+ return EINVAL;
+ }
+
+ /* setup select(2) framework */
+ uds_fd_table[minor].selecting = 1;
+ uds_fd_table[minor].select_proc = dev_m_in->m_source;
+
+ /* track the system call we are performing in case it gets cancelled */
+ uds_fd_table[minor].call_nr = dev_m_in->m_type;
+ uds_fd_table[minor].ioctl = 0;
+ uds_fd_table[minor].syscall_done = 0;
+
+ /* Can't update the process endpoint here, no info. */
+
+ uds_fd_table[minor].sel_ops_in = dev_m_in->USER_ENDPT;
+ uds_fd_table[minor].sel_ops_out = 0;
+
+ /* check if there is data available to read */
+ bytes = uds_perform_read(minor, dev_m_in->m_source, 1, 1);
+ if (bytes > 0) {
+
+ /* there is data in the pipe for us to read */
+ uds_fd_table[minor].sel_ops_out |= SEL_RD;
+
+ } else if (uds_fd_table[minor].listening == 1) {
+
+ /* check for pending connections */
+ for (i = 0; i < uds_fd_table[minor].backlog_size; i++) {
+ if (uds_fd_table[minor].backlog[i] != -1) {
+ uds_fd_table[minor].sel_ops_out |= SEL_RD;
+ break;
+ }
+ }
+ }
+
+ /* check if we can write without blocking */
+ bytes = uds_perform_write(minor, dev_m_in->m_source, PIPE_BUF, 1);
+ if (bytes > 0) {
+ uds_fd_table[minor].sel_ops_out |= SEL_WR;
+ }
+
+ uds_fd_table[minor].syscall_done = 1;
+ uds_sel_reply(dev_m_out, DEV_SEL_REPL1, minor,
+ uds_fd_table[minor].sel_ops_out);
+
+ return uds_fd_table[minor].sel_ops_out;
+}
+
+PRIVATE int uds_perform_read(int minor, endpoint_t m_source,
+ size_t size, int pretend)
+{
+ int rc;
+ message fs_m_in;
+ message fs_m_out;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] uds_perform_read() call_count=%d\n", minor,
+ ++call_count);
+#endif
+
+ /* skip reads and writes of 0 (or less!) bytes */
+ if (size <= 0) {
+ return 0;
+ }
+
+ /* check if we are allowed to read */
+ if (!(uds_fd_table[minor].mode & S_IRUSR)) {
+
+ /* socket is shutdown for reading */
+ return EPIPE;
+ }
+
+ if (uds_fd_table[minor].size == 0) {
+
+ if (pretend) {
+ return SUSPEND;
+ }
+
+ /* maybe a process is blocked waiting to write? if
+ * needed revive the writer
+ */
+ if (uds_fd_table[minor].peer != -1 &&
+ uds_fd_table[uds_fd_table[minor].peer].suspended) {
+ int peer = uds_fd_table[minor].peer;
+
+ uds_fd_table[peer].ready_to_revive = 1;
+ uds_unsuspend(m_source, peer);
+ }
+
+#if DEBUG == 1
+ printf("(uds) [%d] suspending read request\n", minor);
+#endif
+
+ /* Process is reading from an empty pipe,
+ * suspend it so some bytes can be written
+ */
+ uds_fd_table[minor].suspended = UDS_SUSPENDED_READ;
+ return SUSPEND;
+ }
+
+ if (pretend) {
+
+ return (size > uds_fd_table[minor].size) ?
+ uds_fd_table[minor].size : size;
+ }
+
+
+ /* Prepare Request to the FS side of PFS */
+ fs_m_in.m_type = REQ_READ;
+ fs_m_in.REQ_INODE_NR = uds_fd_table[minor].inode_nr;
+ fs_m_in.REQ_GRANT = uds_fd_table[minor].io_gr;
+ fs_m_in.REQ_SEEK_POS_HI = 0;
+ fs_m_in.REQ_SEEK_POS_LO = uds_fd_table[minor].pos;
+ fs_m_in.REQ_NBYTES = (size > uds_fd_table[minor].size) ?
+ uds_fd_table[minor].size : size;
+
+ /* perform the read */
+ rc = fs_readwrite(&fs_m_in, &fs_m_out);
+ if (rc != OK) {
+ perror("fs_readwrite");
+ return rc;
+ }
+
+ /* Process the response */
+#if DEBUG == 1
+ printf("(uds) [%d] read complete\n", minor);
+#endif
+
+ /* move the position of the data pointer up to data we haven't
+ * read yet
+ */
+ uds_fd_table[minor].pos += fs_m_out.RES_NBYTES;
+
+ /* decrease the number of unread bytes */
+ uds_fd_table[minor].size -= fs_m_out.RES_NBYTES;
+
+ /* if we have 0 unread bytes, move the data pointer back to the
+ * start of the buffer
+ */
+ if (uds_fd_table[minor].size == 0) {
+ uds_fd_table[minor].pos = 0;
+ }
+
+ /* maybe a big write was waiting for us to read some data, if
+ * needed revive the writer
+ */
+ if (uds_fd_table[minor].peer != -1 &&
+ uds_fd_table[uds_fd_table[minor].peer].suspended) {
+ int peer = uds_fd_table[minor].peer;
+
+ uds_fd_table[peer].ready_to_revive = 1;
+ uds_unsuspend(m_source, peer);
+ }
+
+ /* see if peer is blocked on select() and a write is possible
+ * (from peer to minor)
+ */
+ if (uds_fd_table[minor].peer != -1 &&
+ uds_fd_table[uds_fd_table[minor].peer].selecting == 1 &&
+ (uds_fd_table[minor].size + uds_fd_table[minor].pos + 1
+ < PIPE_BUF)) {
+
+ int peer = uds_fd_table[minor].peer;
+
+ /* if the peer wants to know about write being possible
+ * and it doesn't know about it already, then let the peer know.
+ */
+ if ((uds_fd_table[peer].sel_ops_in & SEL_WR) &&
+ !(uds_fd_table[peer].sel_ops_out & SEL_WR)) {
+
+ /* a write on peer is possible now */
+ uds_fd_table[peer].sel_ops_out |= SEL_WR;
+ uds_fd_table[peer].status_updated = 1;
+ uds_unsuspend(m_source, peer);
+ }
+ }
+
+ return fs_m_out.RES_NBYTES; /* return number of bytes read */
+}
+
+PRIVATE int uds_perform_write(int minor, endpoint_t m_source,
+ size_t size, int pretend)
+{
+ int rc, peer, i;
+ message fs_m_in;
+ message fs_m_out;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] uds_perform_write() call_count=%d\n", minor,
+ ++call_count);
+#endif
+
+ /* skip reads and writes of 0 (or less!) bytes */
+ if (size <= 0) {
+ return 0;
+ }
+
+ /* check if we are allowed to write */
+ if (!(uds_fd_table[minor].mode & S_IWUSR)) {
+
+ /* socket is shutdown for writing */
+ return EPIPE;
+ }
+
+ if (size > PIPE_BUF) {
+
+ /* message is too big to ever write to the PIPE */
+ return EMSGSIZE;
+ }
+
+ if (uds_fd_table[minor].type == SOCK_STREAM ||
+ uds_fd_table[minor].type == SOCK_SEQPACKET) {
+
+ /* if we're writing with a connection oriented socket,
+ * then it needs a peer to write to
+ */
+ if (uds_fd_table[minor].peer == -1) {
+ if (uds_fd_table[minor].err == ECONNRESET) {
+
+ uds_fd_table[minor].err = 0;
+ return ECONNRESET;
+ } else {
+ return ENOTCONN;
+ }
+ } else {
+
+ peer = uds_fd_table[minor].peer;
+ }
+
+ } else /* uds_fd_table[minor].type == SOCK_DGRAM */ {
+
+ peer = -1;
+
+ /* locate the "peer" we want to write to */
+ for (i = 0; i < NR_FDS; i++) {
+
+ /* look for a SOCK_DGRAM socket that is bound on
+ * the target address
+ */
+ if (uds_fd_table[i].type == SOCK_DGRAM &&
+ uds_fd_table[i].addr.sun_family == AF_UNIX &&
+ !strncmp(uds_fd_table[minor].target.sun_path,
+ uds_fd_table[i].addr.sun_path, UNIX_PATH_MAX)) {
+
+ peer = i;
+ break;
+ }
+ }
+
+ if (peer == -1) {
+ return ENOENT;
+ }
+ }
+
+ /* check if write would overrun buffer. check if message
+ * boundry preserving types (SEQPACKET and DGRAM) wouldn't write
+ * to an empty buffer. check if connectionless sockets have a
+ * target to write to.
+ */
+ if ((uds_fd_table[peer].pos+uds_fd_table[peer].size+size > PIPE_BUF) ||
+ ((uds_fd_table[minor].type == SOCK_SEQPACKET ||
+ uds_fd_table[minor].type == SOCK_DGRAM) &&
+ uds_fd_table[peer].size > 0) || (peer == -1)) {
+
+ if (pretend) {
+ return SUSPEND;
+ }
+
+ /* if needed revive the reader */
+ if (uds_fd_table[peer].suspended) {
+ uds_fd_table[peer].ready_to_revive = 1;
+ uds_unsuspend(m_source, peer);
+ }
+
+#if DEBUG == 1
+ printf("(uds) [%d] suspending write request\n", minor);
+#endif
+
+ /* Process is reading from an empty pipe,
+ * suspend it so some bytes can be written
+ */
+ uds_fd_table[minor].suspended = UDS_SUSPENDED_WRITE;
+ return SUSPEND;
+ }
+
+ if (pretend) {
+ return size;
+ }
+
+ /* Prepare Request to the FS side of PFS */
+ fs_m_in.m_type = REQ_WRITE;
+ fs_m_in.REQ_INODE_NR = uds_fd_table[peer].inode_nr;
+ fs_m_in.REQ_GRANT = uds_fd_table[minor].io_gr;
+ fs_m_in.REQ_SEEK_POS_HI = 0;
+ fs_m_in.REQ_SEEK_POS_LO = uds_fd_table[peer].pos +
+ uds_fd_table[peer].size;
+ fs_m_in.REQ_NBYTES = size;
+
+ /* Request the write */
+ rc = fs_readwrite(&fs_m_in, &fs_m_out);
+ if (rc != OK) {
+ perror("fs_readwrite");
+ return rc;
+ }
+
+ /* Process the response */
+#if DEBUG == 1
+ printf("(uds) [%d] write complete\n", minor);
+#endif
+ /* increase the count of unread bytes */
+ uds_fd_table[peer].size += fs_m_out.RES_NBYTES;
+
+
+ /* fill in the source address to be returned by recvfrom & recvmsg */
+ if (uds_fd_table[minor].type == SOCK_DGRAM) {
+ memcpy(&uds_fd_table[peer].source, &uds_fd_table[minor].addr,
+ sizeof(struct sockaddr_un));
+ }
+
+ /* revive peer that was waiting for us to write */
+ if (uds_fd_table[peer].suspended) {
+ uds_fd_table[peer].ready_to_revive = 1;
+ uds_unsuspend(m_source, peer);
+ }
+
+ /* see if peer is blocked on select()*/
+ if (uds_fd_table[peer].selecting == 1 && fs_m_out.RES_NBYTES > 0) {
+
+ /* if the peer wants to know about data ready to read
+ * and it doesn't know about it already, then let the peer
+ * know we have data for it.
+ */
+ if ((uds_fd_table[peer].sel_ops_in & SEL_RD) &&
+ !(uds_fd_table[peer].sel_ops_out & SEL_RD)) {
+
+ /* a read on peer is possible now */
+ uds_fd_table[peer].sel_ops_out |= SEL_RD;
+ uds_fd_table[peer].status_updated = 1;
+ uds_unsuspend(m_source, peer);
+ }
+ }
+
+ return fs_m_out.RES_NBYTES; /* return number of bytes written */
+}
+
+PUBLIC int uds_read(message *dev_m_in, message *dev_m_out)
+{
+ int bytes;
+ int minor;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] uds_read() call_count=%d\n", uds_minor(dev_m_in),
+ ++call_count);
+ printf("Endpoint: 0x%x | Position 0x%x\n", dev_m_in->USER_ENDPT,
+ dev_m_in->POSITION);
+#endif
+
+ minor = uds_minor(dev_m_in);
+
+ if (uds_fd_table[minor].state != UDS_INUSE) {
+
+ /* attempted to close a socket that hasn't been opened --
+ * something is very wrong :(
+ */
+ uds_set_reply(dev_m_out, DEV_REVIVE, dev_m_in->USER_ENDPT,
+ (cp_grant_id_t) dev_m_in->IO_GRANT, EINVAL);
+
+ return EINVAL;
+ }
+
+ /* track the system call we are performing in case it gets cancelled */
+ uds_fd_table[minor].call_nr = dev_m_in->m_type;
+ uds_fd_table[minor].ioctl = 0;
+ uds_fd_table[minor].syscall_done = 0;
+
+ /* Update the process endpoint. */
+ uds_fd_table[minor].endpoint = dev_m_in->USER_ENDPT;
+
+ /* setup select(2) framework */
+ uds_fd_table[minor].selecting = 0;
+
+ /* save I/O Grant info */
+ uds_fd_table[minor].io_gr = (cp_grant_id_t) dev_m_in->IO_GRANT;
+ uds_fd_table[minor].io_gr_size = dev_m_in->COUNT;
+
+ bytes = uds_perform_read(minor, dev_m_in->m_source,
+ uds_fd_table[minor].io_gr_size, 0);
+
+ uds_set_reply(dev_m_out, DEV_REVIVE, uds_fd_table[minor].endpoint,
+ uds_fd_table[minor].io_gr, bytes);
+
+ return bytes;
+}
+
+PUBLIC int uds_write(message *dev_m_in, message *dev_m_out)
+{
+ int bytes;
+ int minor;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] uds_write() call_count=%d\n", uds_minor(dev_m_in),
+ ++call_count);
+ printf("Endpoint: 0x%x | Position 0x%x\n", dev_m_in->USER_ENDPT,
+ dev_m_in->POSITION);
+#endif
+
+ minor = uds_minor(dev_m_in);
+
+ if (uds_fd_table[minor].state != UDS_INUSE) {
+
+ /* attempted to close a socket that hasn't been opened --
+ * something is very wrong :(
+ */
+ uds_set_reply(dev_m_out, DEV_REVIVE, dev_m_in->USER_ENDPT,
+ (cp_grant_id_t) dev_m_in->IO_GRANT, EINVAL);
+
+ return EINVAL;
+ }
+
+ /* track the system call we are performing in case it gets cancelled */
+ uds_fd_table[minor].call_nr = dev_m_in->m_type;
+ uds_fd_table[minor].ioctl = 0;
+ uds_fd_table[minor].syscall_done = 0;
+
+ /* Update the process endpoint. */
+ uds_fd_table[minor].endpoint = dev_m_in->USER_ENDPT;
+
+ /* setup select(2) framework */
+ uds_fd_table[minor].selecting = 0;
+
+ /* save I/O Grant info */
+ uds_fd_table[minor].io_gr = (cp_grant_id_t) dev_m_in->IO_GRANT;
+ uds_fd_table[minor].io_gr_size = dev_m_in->COUNT;
+
+ bytes = uds_perform_write(minor, dev_m_in->m_source,
+ uds_fd_table[minor].io_gr_size, 0);
+
+ uds_set_reply(dev_m_out, DEV_REVIVE, uds_fd_table[minor].endpoint,
+ uds_fd_table[minor].io_gr, bytes);
+
+ return bytes;
+}
+
+PUBLIC int uds_ioctl(message *dev_m_in, message *dev_m_out)
+{
+ int rc, minor;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] uds_ioctl() call_count=%d\n", uds_minor(dev_m_in),
+ ++call_count);
+ printf("Endpoint: 0x%x | Position 0x%x\n", dev_m_in->USER_ENDPT,
+ dev_m_in->POSITION);
+#endif
+
+ minor = uds_minor(dev_m_in);
+
+ if (uds_fd_table[minor].state != UDS_INUSE) {
+
+ /* attempted to close a socket that hasn't been opened --
+ * something is very wrong :(
+ */
+ uds_set_reply(dev_m_out, DEV_REVIVE, dev_m_in->USER_ENDPT,
+ (cp_grant_id_t) dev_m_in->IO_GRANT, EINVAL);
+
+ return EINVAL;
+ }
+
+ /* track the system call we are performing in case it gets cancelled */
+ uds_fd_table[minor].call_nr = dev_m_in->m_type;
+ uds_fd_table[minor].ioctl = dev_m_in->COUNT;
+ uds_fd_table[minor].syscall_done = 0;
+
+ /* setup select(2) framework */
+ uds_fd_table[minor].selecting = 0;
+
+ /* update the owner endpoint - yes it's really stored in POSITION */
+ uds_fd_table[minor].owner = dev_m_in->POSITION;
+
+ switch (dev_m_in->COUNT) { /* Handle the ioctl(2) command */
+
+ case NWIOSUDSCONN:
+
+ /* connect to a listening socket -- connect() */
+ rc = do_connect(dev_m_in, dev_m_out);
+
+ break;
+
+ case NWIOSUDSACCEPT:
+
+ /* accept an incoming connection -- accept() */
+ rc = do_accept(dev_m_in, dev_m_out);
+
+ break;
+
+ case NWIOSUDSBLOG:
+
+ /* set the backlog_size and put the socket into the
+ * listening state -- listen()
+ */
+ rc = do_listen(dev_m_in, dev_m_out);
+
+ break;
+
+ case NWIOSUDSTYPE:
+
+ /* set the type for this socket (i.e.
+ * SOCK_STREAM, SOCK_DGRAM, etc) -- socket()
+ */
+ rc = do_socket(dev_m_in, dev_m_out);
+
+ break;
+
+ case NWIOSUDSADDR:
+
+ /* set the address for this socket -- bind() */
+ rc = do_bind(dev_m_in, dev_m_out);
+
+ break;
+
+ case NWIOGUDSADDR:
+
+ /* get the address for this socket -- getsockname() */
+ rc = do_getsockname(dev_m_in, dev_m_out);
+
+ break;
+
+ case NWIOGUDSPADDR:
+
+ /* get the address for the peer -- getpeername() */
+ rc = do_getpeername(dev_m_in, dev_m_out);
+
+ break;
+
+ case NWIOSUDSSHUT:
+
+ /* shutdown a socket for reading, writing, or
+ * both -- shutdown()
+ */
+ rc = do_shutdown(dev_m_in, dev_m_out);
+
+ break;
+
+ case NWIOSUDSPAIR:
+
+ /* connect two sockets -- socketpair() */
+ rc = do_socketpair(dev_m_in, dev_m_out);
+
+ break;
+
+ case NWIOGUDSSOTYPE:
+
+ /* get socket type -- getsockopt(SO_TYPE) */
+ rc = do_getsockopt_sotype(dev_m_in, dev_m_out);
+
+ break;
+
+ case NWIOGUDSPEERCRED:
+
+ /* get peer endpoint -- getsockopt(SO_PEERCRED) */
+ rc = do_getsockopt_peercred(dev_m_in, dev_m_out);
+
+ break;
+
+ case NWIOSUDSTADDR:
+
+ /* set target address -- sendto() */
+ rc = do_sendto(dev_m_in, dev_m_out);
+
+ break;
+
+ case NWIOGUDSFADDR:
+
+ /* get from address -- recvfrom() */
+ rc = do_recvfrom(dev_m_in, dev_m_out);
+
+ break;
+
+ case NWIOGUDSSNDBUF:
+
+ /* get the send buffer size -- getsockopt(SO_SNDBUF) */
+ rc = do_getsockopt_sndbuf(dev_m_in, dev_m_out);
+
+ break;
+
+ case NWIOSUDSSNDBUF:
+
+ /* set the send buffer size -- setsockopt(SO_SNDBUF) */
+ rc = do_setsockopt_sndbuf(dev_m_in, dev_m_out);
+
+ break;
+
+ case NWIOGUDSRCVBUF:
+
+ /* get the send buffer size -- getsockopt(SO_SNDBUF) */
+ rc = do_getsockopt_rcvbuf(dev_m_in, dev_m_out);
+
+ break;
+
+ case NWIOSUDSRCVBUF:
+
+ /* set the send buffer size -- setsockopt(SO_SNDBUF) */
+ rc = do_setsockopt_rcvbuf(dev_m_in, dev_m_out);
+
+ break;
+
+ case NWIOSUDSCTRL:
+
+ /* set the control data -- sendmsg() */
+ rc = do_sendmsg(dev_m_in, dev_m_out);
+
+ break;
+
+ case NWIOGUDSCTRL:
+
+ /* set the control data -- recvmsg() */
+ rc = do_recvmsg(dev_m_in, dev_m_out);
+
+ break;
+
+ default:
+
+ /* the IOCTL command is not valid for /dev/uds --
+ * this happens a lot and is normal. a lot of
+ * libc functions determine the socket type with
+ * IOCTLs. Any not for us simply get a EBADIOCTL
+ * response.
+ */
+
+ rc = EBADIOCTL;
+ }
+
+ if (rc != SUSPEND)
+ uds_fd_table[minor].syscall_done = 1;
+
+ uds_set_reply(dev_m_out, DEV_REVIVE, dev_m_in->USER_ENDPT,
+ (cp_grant_id_t) dev_m_in->IO_GRANT, rc);
+
+ return rc;
+}
+
+PUBLIC int uds_unsuspend(endpoint_t m_source, int minor)
+{
+ int r, bytes;
+ message m_out;
+ uds_fd_t *fdp;
+
+ fdp = &uds_fd_table[minor];
+
+ if (fdp->status_updated == 1) {
+
+ /* clear the status_updated flag */
+ fdp->status_updated = 0;
+ fdp->selecting = 0;
+
+ /* prepare the response */
+ uds_sel_reply(&m_out, DEV_SEL_REPL2, minor, fdp->sel_ops_out);
+ r = OK;
+ } else if (fdp->ready_to_revive == 1) {
+
+ /* clear the ready to revive flag */
+ fdp->ready_to_revive = 0;
+
+ switch (fdp->suspended) {
+
+ case UDS_SUSPENDED_READ:
+
+ bytes = uds_perform_read(minor, m_source,
+ fdp->io_gr_size, 0);
+
+ if (bytes == SUSPEND) {
+ r = SUSPEND;
+ break;
+ }
+
+ fdp->suspended = UDS_NOT_SUSPENDED;
+
+ uds_set_reply(&m_out, DEV_REVIVE, fdp->endpoint,
+ fdp->io_gr, bytes);
+
+ r = OK;
+ break;
+
+ case UDS_SUSPENDED_WRITE:
+
+ bytes = uds_perform_write(minor, m_source,
+ fdp->io_gr_size, 0);
+
+ if (bytes == SUSPEND) {
+ r = SUSPEND;
+ break;
+ }
+
+ fdp->suspended = UDS_NOT_SUSPENDED;
+
+ uds_set_reply(&m_out, DEV_REVIVE, fdp->endpoint,
+ fdp->io_gr, bytes);
+
+ r = OK;
+ break;
+
+ case UDS_SUSPENDED_CONNECT:
+ case UDS_SUSPENDED_ACCEPT:
+
+ /* In both cases, the process
+ * that send the notify()
+ * already performed the connection.
+ * The only thing to do here is
+ * unblock.
+ */
+
+ fdp->suspended = UDS_NOT_SUSPENDED;
+
+ uds_set_reply(&m_out, DEV_REVIVE, fdp->endpoint,
+ fdp->io_gr, OK);
+
+ r = OK;
+ break;
+
+ default:
+ return(OK);
+ }
+
+ }
+
+ if (r == OK) reply(m_source, &m_out);
+ return(r);
+}
+
+PUBLIC int uds_cancel(message *dev_m_in, message *dev_m_out)
+{
+ int i, j;
+ int minor;
+ /* XXX: should become a noop? */
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] uds_cancel() call_count=%d\n", uds_minor(dev_m_in),
+ ++call_count);
+ printf("Endpoint: 0x%x\n", dev_m_in->USER_ENDPT);
+#endif
+
+ minor = uds_minor(dev_m_in);
+
+ if (uds_fd_table[minor].state != UDS_INUSE) {
+
+ /* attempted to close a socket that hasn't been opened --
+ * something is very wrong :(
+ */
+ uds_set_reply(dev_m_out, DEV_NO_STATUS, dev_m_in->USER_ENDPT,
+ (cp_grant_id_t) dev_m_in->IO_GRANT, EINVAL);
+
+ return EINVAL;
+ }
+
+ /* Update the process endpoint. */
+ uds_fd_table[minor].endpoint = dev_m_in->USER_ENDPT;
+
+ /* setup select(2) framework */
+ uds_fd_table[minor].selecting = 0;
+
+ /* the system call was cancelled, so if the socket was suspended
+ * (which is likely the case), then it is not suspended anymore.
+ */
+ uds_fd_table[minor].suspended = UDS_NOT_SUSPENDED;
+
+ /* If there is a system call and it isn't complete, roll back */
+ if (uds_fd_table[minor].call_nr && !uds_fd_table[minor].syscall_done) {
+
+
+ if (uds_fd_table[minor].call_nr == DEV_IOCTL_S) {
+
+ switch (uds_fd_table[minor].ioctl) {
+
+ case NWIOSUDSACCEPT: /* accept() */
+
+ /* partial accept() only changes
+ * uds_fd_table[minorparent].child
+ */
+
+ for (i = 0; i < NR_FDS; i++) {
+ if (uds_fd_table[i].child ==
+ minor) {
+
+ uds_fd_table[i].child = -1;
+
+ }
+ }
+
+ break;
+
+ case NWIOSUDSCONN: /* connect() */
+
+ /* partial connect() sets addr
+ * and adds minor to server backlog
+ */
+
+ for (i = 0; i < NR_FDS; i++) {
+
+ /* find a socket that is in
+ * use.
+ */
+ if (uds_fd_table[i].state ==
+ UDS_INUSE) {
+
+ /* see if minor is in
+ * the backlog
+ */
+ for (j = 0; j < uds_fd_table[i].backlog_size; j++) {
+
+ if (uds_fd_table[i].backlog[j] == minor) {
+
+ /* remove from backlog */
+ uds_fd_table[i].backlog[j] = -1;
+ }
+ }
+
+ }
+ }
+
+ /* clear the address */
+ memset(&(uds_fd_table[minor].addr),
+ '\0',
+ sizeof(struct sockaddr_un));
+
+ break;
+
+ case NWIOSUDSTADDR: /* sendto() */
+ case NWIOSUDSADDR: /* bind() */
+ case NWIOGUDSADDR: /* getsockname() */
+ case NWIOGUDSPADDR: /* getpeername() */
+ case NWIOSUDSTYPE: /* socket() */
+ case NWIOSUDSBLOG: /* listen() */
+ case NWIOSUDSSHUT: /* shutdown() */
+ case NWIOSUDSPAIR: /* socketpair() */
+ case NWIOGUDSSOTYPE: /* SO_TYPE */
+ case NWIOGUDSPEERCRED: /* SO_PEERCRED */
+ default:
+ /* these are atomic, never suspend,
+ * and can't be cancelled once called
+ */
+ break;
+ }
+
+ }
+
+ /* DEV_READ_S or DEV_WRITE_S don't need to do anything
+ * when cancelled. DEV_OPEN, DEV_REOPEN, DEV_SELECT,
+ * DEV_CLOSE are atomic, never suspend, and can't
+ * be cancelled once called.
+ */
+
+ uds_fd_table[minor].syscall_done = 1;
+ }
+
+
+ uds_set_reply(dev_m_out, DEV_NO_STATUS, dev_m_in->USER_ENDPT,
+ (cp_grant_id_t) dev_m_in->IO_GRANT, EINTR);
+
+ return EINTR;
+}
--- /dev/null
+#ifndef __PFS_FS_H__
+#define __PFS_FS_H__
+
+/* This is the master header for pfs. It includes some other files
+ * and defines the principal constants.
+ */
+#define _POSIX_SOURCE 1 /* tell headers to include POSIX stuff */
+#define _MINIX 1 /* tell headers to include MINIX stuff */
+#define _SYSTEM 1 /* tell headers that this is the kernel */
+
+/* The following are so basic, all the *.c files get them automatically. */
+#include <minix/config.h> /* MUST be first */
+#include <minix/ansi.h> /* MUST be second */
+#include <sys/types.h>
+#include <minix/const.h>
+#include <minix/type.h>
+#include <minix/dmap.h>
+#include <minix/vfsif.h>
+#include <limits.h>
+#include <errno.h>
+#include <minix/syslib.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <minix/sysutil.h>
+
+#include "const.h"
+#include "proto.h"
+#include "glo.h"
+
+#endif
--- /dev/null
+#ifndef __PFS_GLO_H__
+#define __PFS_GLO_H__
+
+/* EXTERN should be extern except for the table file */
+#ifdef _TABLE
+#undef EXTERN
+#define EXTERN
+#endif
+
+#include <minix/vfsif.h>
+
+/* The following variables are used for returning results to the caller. */
+EXTERN int err_code; /* temporary storage for error number */
+
+EXTERN _PROTOTYPE (int (*fs_call_vec[]), (message *fs_m_in, message *fs_m_out) ); /* fs call table */
+EXTERN _PROTOTYPE (int (*dev_call_vec[]), (message *fs_m_in, message *fs_m_out) ); /* dev call table */
+
+EXTERN uid_t caller_uid;
+EXTERN gid_t caller_gid;
+EXTERN int req_nr;
+EXTERN int SELF_E;
+EXTERN int exitsignaled;
+EXTERN int busy;
+EXTERN int unmountdone;
+
+/* Inode map. */
+EXTERN bitchunk_t inodemap[FS_BITMAP_CHUNKS(NR_INODES)];
+
+#endif
--- /dev/null
+
+#define _SYSTEM 1 /* get OK and negative error codes */
+#define _MINIX 1 /* tell headers to include MINIX stuff */
+
+#define VERBOSE 0 /* display diagnostics */
+
+#ifdef __NBSD_LIBC
+#include <sys/ioc_net.h>
+#else
+#include <net/ioctl.h>
+#endif
+
+#include <minix/ansi.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/select.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <limits.h>
+#include <errno.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <minix/callnr.h>
+#include <minix/config.h>
+#include <minix/dmap.h>
+#include <minix/type.h>
+#include <minix/const.h>
+#include <minix/com.h>
+#include <minix/syslib.h>
+#include <minix/sysutil.h>
+#include <minix/keymap.h>
+#include <minix/bitmap.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+
+#include "proto.h"
--- /dev/null
+/* This file manages the inode table. There are procedures to allocate and
+ * deallocate inodes, acquire, erase, and release them, and read and write
+ * them from the disk.
+ *
+ * The entry points into this file are
+ * get_inode: search inode table for a given inode; if not there,
+ * read it
+ * put_inode: indicate that an inode is no longer needed in memory
+ * alloc_inode: allocate a new, unused inode
+ * wipe_inode: erase some fields of a newly allocated inode
+ * free_inode: mark an inode as available for a new file
+ * update_times: update atime, ctime, and mtime
+ * find_inode: retrieve pointer to inode in inode cache
+ *
+ */
+
+#include "fs.h"
+#include "buf.h"
+#include "inode.h"
+#include <minix/vfsif.h>
+
+FORWARD _PROTOTYPE( void addhash_inode, (struct inode * const node) );
+FORWARD _PROTOTYPE( void unhash_inode, (struct inode * const node) );
+
+
+/*===========================================================================*
+ * fs_putnode *
+ *===========================================================================*/
+PUBLIC int fs_putnode(message *fs_m_in, message *fs_m_out)
+{
+/* Find the inode specified by the request message and decrease its counter.*/
+
+ struct inode *rip;
+ int count;
+ dev_t dev;
+ ino_t inum;
+
+ rip = find_inode( (ino_t) fs_m_in->REQ_INODE_NR);
+
+ if(!rip) {
+ printf("%s:%d put_inode: inode #%ld dev: %d not found\n", __FILE__,
+ __LINE__, fs_m_in->REQ_INODE_NR, (dev_t) fs_m_in->REQ_DEV);
+ panic("fs_putnode failed");
+ }
+
+ count = fs_m_in->REQ_COUNT;
+ if (count <= 0) {
+ printf("%s:%d put_inode: bad value for count: %d\n", __FILE__,
+ __LINE__, count);
+ panic("fs_putnode failed");
+ } else if(count > rip->i_count) {
+ printf("%s:%d put_inode: count too high: %d > %d\n", __FILE__,
+ __LINE__, count, rip->i_count);
+ panic("fs_putnode failed");
+ }
+
+ /* Decrease reference counter, but keep one reference; it will be consumed by
+ * put_inode(). */
+ rip->i_count -= count - 1;
+ dev = rip->i_dev;
+ inum = rip->i_num;
+ put_inode(rip);
+ if (rip->i_count == 0) put_block(dev, inum);
+ return(OK);
+}
+
+
+/*===========================================================================*
+ * init_inode_cache *
+ *===========================================================================*/
+PUBLIC void init_inode_cache()
+{
+ struct inode *rip;
+ struct inodelist *rlp;
+
+ /* init free/unused list */
+ TAILQ_INIT(&unused_inodes);
+
+ /* init hash lists */
+ for (rlp = &hash_inodes[0]; rlp < &hash_inodes[INODE_HASH_SIZE]; ++rlp)
+ LIST_INIT(rlp);
+
+ /* add free inodes to unused/free list */
+ for (rip = &inode[0]; rip < &inode[NR_INODES]; ++rip) {
+ rip->i_num = NO_ENTRY;
+ TAILQ_INSERT_HEAD(&unused_inodes, rip, i_unused);
+ }
+
+ /* Reserve the first inode (bit 0) to prevent it from being allocated later*/
+ if (alloc_bit() != NO_BIT) printf("PFS could not reserve NO_BIT\n");
+ busy = 0; /* This bit does not make the server 'in use/busy'. */
+}
+
+
+/*===========================================================================*
+ * addhash_inode *
+ *===========================================================================*/
+PRIVATE void addhash_inode(struct inode * const node)
+{
+ int hashi = (int) (node->i_num & INODE_HASH_MASK);
+
+ /* insert into hash table */
+ LIST_INSERT_HEAD(&hash_inodes[hashi], node, i_hash);
+}
+
+
+/*===========================================================================*
+ * unhash_inode *
+ *===========================================================================*/
+PRIVATE void unhash_inode(struct inode * const node)
+{
+ /* remove from hash table */
+ LIST_REMOVE(node, i_hash);
+}
+
+
+/*===========================================================================*
+ * get_inode *
+ *===========================================================================*/
+PUBLIC struct inode *get_inode(
+ dev_t dev, /* device on which inode resides */
+ ino_t numb /* inode number */
+)
+{
+/* Find the inode in the hash table. If it is not there, get a free inode
+ * load it from the disk if it's necessary and put on the hash list
+ */
+ register struct inode *rip;
+ int hashi;
+
+ hashi = (int) (numb & INODE_HASH_MASK);
+
+ /* Search inode in the hash table */
+ LIST_FOREACH(rip, &hash_inodes[hashi], i_hash) {
+ if (rip->i_num == numb && rip->i_dev == dev) {
+ /* If unused, remove it from the unused/free list */
+ if (rip->i_count == 0) {
+ TAILQ_REMOVE(&unused_inodes, rip, i_unused);
+ }
+ ++rip->i_count;
+
+ return(rip);
+ }
+ }
+
+ /* Inode is not on the hash, get a free one */
+ if (TAILQ_EMPTY(&unused_inodes)) {
+ err_code = ENFILE;
+ return(NULL);
+ }
+ rip = TAILQ_FIRST(&unused_inodes);
+
+ /* If not free unhash it */
+ if (rip->i_num != NO_ENTRY) unhash_inode(rip);
+
+ /* Inode is not unused any more */
+ TAILQ_REMOVE(&unused_inodes, rip, i_unused);
+
+ /* Load the inode. */
+ rip->i_dev = dev;
+ rip->i_num = numb;
+ rip->i_count = 1;
+ rip->i_update = 0; /* all the times are initially up-to-date */
+
+ /* Add to hash */
+ addhash_inode(rip);
+
+
+ return(rip);
+}
+
+
+/*===========================================================================*
+ * find_inode *
+ *===========================================================================*/
+PUBLIC struct inode *find_inode(numb)
+ino_t numb; /* inode number */
+{
+/* Find the inode specified by the inode and device number.
+ */
+ struct inode *rip;
+ int hashi;
+
+ hashi = (int) (numb & INODE_HASH_MASK);
+
+ /* Search inode in the hash table */
+ LIST_FOREACH(rip, &hash_inodes[hashi], i_hash) {
+ if (rip->i_count > 0 && rip->i_num == numb) {
+ return(rip);
+ }
+ }
+
+ return(NULL);
+}
+
+
+/*===========================================================================*
+ * put_inode *
+ *===========================================================================*/
+PUBLIC void put_inode(rip)
+struct inode *rip; /* pointer to inode to be released */
+{
+/* The caller is no longer using this inode. If no one else is using it either
+ * write it back to the disk immediately. If it has no links, truncate it and
+ * return it to the pool of available inodes.
+ */
+
+ if (rip == NULL) return; /* checking here is easier than in caller */
+
+ if (rip->i_count < 1)
+ panic("put_inode: i_count already below 1: %d", rip->i_count);
+
+ if (--rip->i_count == 0) { /* i_count == 0 means no one is using it now */
+ if (rip->i_nlinks == NO_LINK) { /* Are there links to this file? */
+ /* no links, free the inode. */
+ truncate_inode(rip, 0); /* return all the disk blocks */
+ rip->i_mode = I_NOT_ALLOC; /* clear I_TYPE field */
+ free_inode(rip);
+ } else {
+ truncate_inode(rip, (off_t) 0);
+ }
+
+ if (rip->i_nlinks == NO_LINK) {
+ /* free, put at the front of the LRU list */
+ unhash_inode(rip);
+ rip->i_num = NO_ENTRY;
+ rip->i_dev = NO_DEV;
+ rip->i_rdev = NO_DEV;
+ TAILQ_INSERT_HEAD(&unused_inodes, rip, i_unused);
+ } else {
+ /* unused, put at the back of the LRU (cache it) */
+ TAILQ_INSERT_TAIL(&unused_inodes, rip, i_unused);
+ }
+ }
+}
+
+
+/*===========================================================================*
+ * alloc_inode *
+ *===========================================================================*/
+PUBLIC struct inode *alloc_inode(dev_t dev, mode_t bits)
+{
+/* Allocate a free inode on 'dev', and return a pointer to it. */
+
+ register struct inode *rip;
+ bit_t b;
+ ino_t i_num;
+
+ b = alloc_bit();
+ if (b == NO_BIT) {
+ err_code = ENOSPC;
+ printf("PipeFS is out of inodes\n");
+ return(NULL);
+ }
+ i_num = (ino_t) b;
+
+
+ /* Try to acquire a slot in the inode table. */
+ if ((rip = get_inode(dev, i_num)) == NULL) {
+ /* No inode table slots available. Free the inode if just allocated.*/
+ if (dev == NO_DEV) free_bit(b);
+ } else {
+ /* An inode slot is available. */
+
+ rip->i_mode = bits; /* set up RWX bits */
+ rip->i_nlinks = NO_LINK; /* initial no links */
+ rip->i_uid = caller_uid; /* file's uid is owner's */
+ rip->i_gid = caller_gid; /* ditto group id */
+
+ /* Fields not cleared already are cleared in wipe_inode(). They have
+ * been put there because truncate() needs to clear the same fields if
+ * the file happens to be open while being truncated. It saves space
+ * not to repeat the code twice.
+ */
+ wipe_inode(rip);
+ }
+
+ return(rip);
+}
+
+
+/*===========================================================================*
+ * wipe_inode *
+ *===========================================================================*/
+PUBLIC void wipe_inode(rip)
+struct inode *rip; /* the inode to be erased */
+{
+/* Erase some fields in the inode. This function is called from alloc_inode()
+ * when a new inode is to be allocated, and from truncate(), when an existing
+ * inode is to be truncated.
+ */
+
+ rip->i_size = 0;
+ rip->i_update = ATIME | CTIME | MTIME; /* update all times later */
+}
+
+
+/*===========================================================================*
+ * free_inode *
+ *===========================================================================*/
+PUBLIC void free_inode(rip)
+struct inode *rip;
+{
+/* Return an inode to the pool of unallocated inodes. */
+
+ bit_t b;
+
+ if (rip->i_num <= (ino_t) 0 || rip->i_num >= (ino_t) NR_INODES) return;
+ b = (bit_t) rip->i_num;
+ free_bit(b);
+}
+
+
+/*===========================================================================*
+ * update_times *
+ *===========================================================================*/
+PUBLIC void update_times(rip)
+struct inode *rip; /* pointer to inode to be read/written */
+{
+/* Various system calls are required by the standard to update atime, ctime,
+ * or mtime. Since updating a time requires sending a message to the clock
+ * task--an expensive business--the times are marked for update by setting
+ * bits in i_update. When a stat, fstat, or sync is done, or an inode is
+ * released, update_times() may be called to actually fill in the times.
+ */
+
+ time_t cur_time;
+
+ cur_time = clock_time();
+ if (rip->i_update & ATIME) rip->i_atime = cur_time;
+ if (rip->i_update & CTIME) rip->i_ctime = cur_time;
+ if (rip->i_update & MTIME) rip->i_mtime = cur_time;
+ rip->i_update = 0; /* they are all up-to-date now */
+}
--- /dev/null
+#ifndef __PFS_INODE_H__
+#define __PFS_INODE_H__
+
+/* Inode table. This table holds inodes that are currently in use.
+ */
+
+#include <sys/queue.h>
+
+EXTERN struct inode {
+ mode_t i_mode; /* file type, protection, etc. */
+ nlink_t i_nlinks; /* how many links to this file */
+ uid_t i_uid; /* user id of the file's owner */
+ gid_t i_gid; /* group number */
+ off_t i_size; /* current file size in bytes */
+ time_t i_atime; /* time of last access (V2 only) */
+ time_t i_mtime; /* when was file data last changed */
+ time_t i_ctime; /* when was inode itself changed (V2 only)*/
+
+ /* The following items are not present on the disk. */
+ dev_t i_dev; /* which device is the inode on */
+ dev_t i_rdev; /* which special device is the inode on */
+ ino_t i_num; /* inode number on its (minor) device */
+ int i_count; /* # times inode used; 0 means slot is free */
+ char i_update; /* the ATIME, CTIME, and MTIME bits are here */
+
+ LIST_ENTRY(inode) i_hash; /* hash list */
+ TAILQ_ENTRY(inode) i_unused; /* free and unused list */
+
+
+} inode[NR_INODES];
+
+/* list of unused/free inodes */
+EXTERN TAILQ_HEAD(unused_inodes_t, inode) unused_inodes;
+
+/* inode hashtable */
+EXTERN LIST_HEAD(inodelist, inode) hash_inodes[INODE_HASH_SIZE];
+
+
+#endif
--- /dev/null
+#include "fs.h"
+#include "buf.h"
+#include "inode.h"
+#include <minix/vfsif.h>
+
+/*===========================================================================*
+ * fs_ftrunc *
+ *===========================================================================*/
+PUBLIC int fs_ftrunc(message *fs_m_in, message *fs_m_out)
+{
+ struct inode *rip;
+ off_t start, end;
+ ino_t inumb;
+
+ inumb = (ino_t) fs_m_in->REQ_INODE_NR;
+
+ if( (rip = find_inode(inumb)) == NULL) return(EINVAL);
+
+ start = fs_m_in->REQ_TRC_START_LO;
+ end = fs_m_in->REQ_TRC_END_LO;
+
+ return truncate_inode(rip, start);
+}
+
+
+/*===========================================================================*
+ * truncate_inode *
+ *===========================================================================*/
+PUBLIC int truncate_inode(rip, newsize)
+register struct inode *rip; /* pointer to inode to be truncated */
+off_t newsize; /* inode must become this size */
+{
+/* Set inode to a certain size, freeing any zones no longer referenced
+ * and updating the size in the inode. If the inode is extended, the
+ * extra space is a hole that reads as zeroes.
+ *
+ * Nothing special has to happen to file pointers if inode is opened in
+ * O_APPEND mode, as this is different per fd and is checked when
+ * writing is done.
+ */
+
+ /* Pipes can shrink, so adjust size to make sure all zones are removed. */
+ if(newsize != 0) return(EINVAL); /* Only truncate pipes to 0. */
+ rip->i_size = newsize;
+
+ /* Next correct the inode size. */
+ wipe_inode(rip); /* Pipes can only be truncated to 0. */
+
+ return(OK);
+}
--- /dev/null
+#include "fs.h"
+#include <assert.h>
+#include <signal.h>
+#include <minix/dmap.h>
+#include <minix/driver.h>
+#include <minix/endpoint.h>
+#include <minix/vfsif.h>
+#include "buf.h"
+#include "inode.h"
+#include "uds.h"
+
+FORWARD _PROTOTYPE(void get_work, (message *m_in) );
+
+/* SEF functions and variables. */
+FORWARD _PROTOTYPE( void sef_local_startup, (void) );
+FORWARD _PROTOTYPE( int sef_cb_init_fresh, (int type, sef_init_info_t *info) );
+FORWARD _PROTOTYPE( void sef_cb_signal_handler, (int signo) );
+
+/*===========================================================================*
+ * main *
+ *===========================================================================*/
+PUBLIC int main(int argc, char *argv[])
+{
+/* This is the main routine of this service. The main loop consists of
+ * three major activities: getting new work, processing the work, and
+ * sending the reply. The loop never terminates, unless a panic occurs.
+ */
+ int ind, do_reply, transid;
+ message pfs_m_in;
+ message pfs_m_out;
+
+ /* SEF local startup. */
+ env_setargs(argc, argv);
+ sef_local_startup();
+
+ printf("Started APFS\n");
+ while(!unmountdone || !exitsignaled) {
+ endpoint_t src;
+
+ do_reply = 1;
+ /* Wait for request message. */
+ get_work(&pfs_m_in);
+
+ transid = TRNS_GET_ID(pfs_m_in.m_type);
+ pfs_m_in.m_type = TRNS_DEL_ID(pfs_m_in.m_type);
+ if (pfs_m_in.m_type == 0) {
+ assert(!IS_VFS_FS_TRANSID(transid));
+ pfs_m_in.m_type = transid;
+ transid = 0;
+ } else
+ assert(IS_VFS_FS_TRANSID(transid) || transid == 0);
+
+ src = pfs_m_in.m_source;
+ caller_uid = INVAL_UID; /* To trap errors */
+ caller_gid = INVAL_GID;
+ req_nr = pfs_m_in.m_type;
+
+ if (IS_DEV_RQ(req_nr)) {
+ ind = req_nr - DEV_RQ_BASE;
+ if (ind < 0 || ind >= DEV_CALL_VEC_SIZE) {
+ printf("pfs: bad DEV request %d\n", req_nr);
+ pfs_m_out.m_type = EINVAL;
+ } else {
+ int result;
+ result = (*dev_call_vec[ind])(&pfs_m_in, &pfs_m_out);
+ if (pfs_m_out.REP_STATUS == SUSPEND ||
+ result == SUSPEND) {
+ /* Nothing to tell, so not replying */
+ do_reply = 0;
+ }
+ }
+ } else if (IS_VFS_RQ(req_nr)) {
+ ind = req_nr - VFS_BASE;
+ if (ind < 0 || ind >= FS_CALL_VEC_SIZE) {
+ printf("pfs: bad FS request %d\n", req_nr);
+ pfs_m_out.m_type = EINVAL;
+ } else {
+ pfs_m_out.m_type =
+ (*fs_call_vec[ind])(&pfs_m_in, &pfs_m_out);
+ }
+ } else {
+ printf("pfs: bad request %d\n", req_nr);
+ pfs_m_out.m_type = EINVAL;
+ }
+
+ if (do_reply) {
+ if (IS_VFS_RQ(req_nr) && IS_VFS_FS_TRANSID(transid)) {
+ pfs_m_out.m_type = TRNS_ADD_ID(pfs_m_out.m_type,
+ transid);
+ }
+ reply(src, &pfs_m_out);
+ }
+ }
+ return(OK);
+}
+
+/*===========================================================================*
+ * sef_local_startup *
+ *===========================================================================*/
+PRIVATE void sef_local_startup()
+{
+ /* Register init callbacks. */
+ sef_setcb_init_fresh(sef_cb_init_fresh);
+ sef_setcb_init_restart(sef_cb_init_fail);
+
+ /* No live update support for now. */
+
+ /* Register signal callbacks. */
+ sef_setcb_signal_handler(sef_cb_signal_handler);
+
+ /* Let SEF perform startup. */
+ sef_startup();
+}
+
+/*===========================================================================*
+ * sef_cb_init_fresh *
+ *===========================================================================*/
+PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info)
+{
+/* Initialize the pipe file server. */
+ int i;
+
+ /* Initialize main loop parameters. */
+ exitsignaled = 0; /* No exit request seen yet. */
+ busy = 0; /* Server is not 'busy' (i.e., inodes in use). */
+
+ /* Init inode table */
+ for (i = 0; i < NR_INODES; ++i) {
+ inode[i].i_count = 0;
+ }
+
+ init_inode_cache();
+ uds_init();
+
+ SELF_E = getprocnr();
+ buf_pool();
+
+ driver_announce();
+
+ return(OK);
+}
+
+/*===========================================================================*
+ * sef_cb_signal_handler *
+ *===========================================================================*/
+PRIVATE void sef_cb_signal_handler(int signo)
+{
+ /* Only check for termination signal, ignore anything else. */
+ if (signo != SIGTERM) return;
+
+
+ exitsignaled = 1;
+}
+
+/*===========================================================================*
+ * get_work *
+ *===========================================================================*/
+PRIVATE void get_work(m_in)
+message *m_in; /* pointer to message */
+{
+ int r, srcok = 0, status;
+ endpoint_t src;
+
+ do {
+ /* wait for a message */
+ if ((r = sef_receive_status(ANY, m_in, &status)) != OK)
+ panic("sef_receive_status failed: %d", r);
+ src = m_in->m_source;
+
+ if(src == VFS_PROC_NR) {
+ srcok = 1; /* Normal FS request. */
+ } else
+ printf("PFS: unexpected source %d\n", src);
+ } while(!srcok);
+}
+
+
+/*===========================================================================*
+ * reply *
+ *===========================================================================*/
+PUBLIC void reply(who, m_out)
+endpoint_t who;
+message *m_out; /* report result */
+{
+ if (OK != send(who, m_out)) /* send the message */
+ printf("PFS(%d) was unable to send reply\n", SELF_E);
+}
--- /dev/null
+#include "fs.h"
+
+
+/*===========================================================================*
+ * fs_sync *
+ *===========================================================================*/
+PUBLIC int fs_sync(message *fs_m_in, message *fs_m_out)
+{
+/* Perform the sync() system call. No-op on this FS. */
+
+ return(OK); /* sync() can't fail */
+}
--- /dev/null
+#include "fs.h"
+#include "glo.h"
+
+
+/*===========================================================================*
+ * fs_unmount *
+ *===========================================================================*/
+PUBLIC int fs_unmount(message *fs_m_in, message *fs_m_out)
+{
+/* Unmount Pipe File Server. */
+
+ if (busy) return(EBUSY); /* can't umount a busy file system */
+
+ /* Finish off the unmount. */
+ unmountdone = TRUE;
+
+ return(OK);
+}
--- /dev/null
+#include "fs.h"
+#include <sys/stat.h>
+#include "buf.h"
+#include "inode.h"
+#include <minix/vfsif.h>
+
+
+/*===========================================================================*
+ * fs_newnode *
+ *===========================================================================*/
+PUBLIC int fs_newnode(message *fs_m_in, message *fs_m_out)
+{
+ register int r = OK;
+ mode_t bits;
+ struct inode *rip;
+ dev_t dev;
+
+ caller_uid = (uid_t) fs_m_in->REQ_UID;
+ caller_gid = (gid_t) fs_m_in->REQ_GID;
+ bits = (mode_t) fs_m_in->REQ_MODE;
+ dev = (dev_t) fs_m_in->REQ_DEV;
+
+ /* Try to allocate the inode */
+ if( (rip = alloc_inode(dev, bits) ) == NULL) return(err_code);
+
+ switch (bits & S_IFMT) {
+ case S_IFBLK:
+ case S_IFCHR:
+ rip->i_rdev = dev; /* Major/minor dev numbers */
+ break;
+ case S_IFIFO:
+ if ((get_block(dev, rip->i_num)) == NULL)
+ r = EIO;
+ break;
+ default:
+ r = EIO; /* Unsupported file type */
+ }
+
+ if (r != OK) {
+ free_inode(rip);
+ } else {
+ /* Fill in the fields of the response message */
+ fs_m_out->RES_INODE_NR = rip->i_num;
+ fs_m_out->RES_MODE = rip->i_mode;
+ fs_m_out->RES_FILE_SIZE_LO = rip->i_size;
+ fs_m_out->RES_UID = rip->i_uid;
+ fs_m_out->RES_GID = rip->i_gid;
+ fs_m_out->RES_DEV = dev;
+ }
+
+ return(r);
+}
--- /dev/null
+#ifndef __PFS_PROTO_H__
+#define __PFS_PROTO_H__
+
+/* Function prototypes. */
+
+/* Structs used in prototypes must be declared as such first. */
+struct buf;
+struct inode;
+struct sockaddr_un;
+struct ancillary;
+
+/* buffer.c */
+_PROTOTYPE( struct buf *get_block, (dev_t dev, ino_t inum) );
+_PROTOTYPE( void put_block, (dev_t dev, ino_t inum) );
+
+/* cache.c */
+_PROTOTYPE( void buf_pool, (void) );
+
+/* inode.c */
+_PROTOTYPE( struct inode *alloc_inode, (dev_t dev, mode_t mode) );
+_PROTOTYPE( void dup_inode, (struct inode *ip) );
+_PROTOTYPE( struct inode *find_inode, (ino_t numb) );
+_PROTOTYPE( void free_inode, (struct inode *rip) );
+_PROTOTYPE( int fs_putnode, (message *fs_m_in, message *fs_m_out) );
+_PROTOTYPE( void init_inode_cache, (void) );
+_PROTOTYPE( struct inode *get_inode, (dev_t dev, ino_t numb) );
+_PROTOTYPE( void put_inode, (struct inode *rip) );
+_PROTOTYPE( void update_times, (struct inode *rip) );
+_PROTOTYPE( void wipe_inode, (struct inode *rip) );
+
+/* link.c */
+_PROTOTYPE( int fs_ftrunc, (message *fs_m_in, message *fs_m_out) );
+_PROTOTYPE( int truncate_inode, (struct inode *rip, off_t newsize) );
+
+
+/* main.c */
+_PROTOTYPE( void reply, (endpoint_t who, message *m_out) );
+
+/* misc.c */
+_PROTOTYPE( int fs_sync, (message *fs_m_in, message *fs_m_out) );
+
+/* mount.c */
+_PROTOTYPE( int fs_unmount, (message *fs_m_in, message *fs_m_out) );
+
+/* open.c */
+_PROTOTYPE( int fs_newnode, (message *fs_m_in, message *fs_m_out) );
+
+/* read.c */
+_PROTOTYPE( int fs_readwrite, (message *fs_m_in, message *fs_m_out) );
+
+/* utility.c */
+_PROTOTYPE( time_t clock_time, (void) );
+_PROTOTYPE( int no_sys, (message *pfs_m_in, message *pfs_m_out) );
+
+/* stadir.c */
+_PROTOTYPE( int fs_stat, (message *fs_m_in, message *fs_m_out) );
+
+/* super.c */
+_PROTOTYPE( bit_t alloc_bit, (void) );
+_PROTOTYPE( void free_bit, (bit_t bit_returned) );
+
+/* dev_uds.c */
+_PROTOTYPE( int uds_open, (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int uds_close, (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int uds_read, (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int uds_write, (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int uds_ioctl, (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int uds_select, (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int uds_unsuspend, (endpoint_t m_source, int minor) );
+_PROTOTYPE( int uds_cancel, (message *dev_m_in, message *dev_m_out) );
+
+/* uds.c */
+_PROTOTYPE( void uds_init, (void) );
+_PROTOTYPE( int do_accept, (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int do_connect, (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int do_listen, (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int do_socket, (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int do_bind, (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int do_getsockname, (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int do_getpeername, (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int do_shutdown, (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int do_socketpair, (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int do_getsockopt_sotype,
+ (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int do_getsockopt_peercred,
+ (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int do_getsockopt_sndbuf,
+ (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int do_setsockopt_sndbuf,
+ (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int do_getsockopt_rcvbuf,
+ (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int do_setsockopt_rcvbuf,
+ (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int do_sendto, (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int do_recvfrom, (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int do_sendmsg, (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int do_recvmsg, (message *dev_m_in, message *dev_m_out) );
+_PROTOTYPE( int perform_connection,
+ (message *dev_m_in, message *dev_m_out,
+ struct sockaddr_un *addr, int minorx,
+ int minory) );
+_PROTOTYPE( int clear_fds, (int minor, struct ancillary *data) );
+#endif
--- /dev/null
+#include "fs.h"
+#include "buf.h"
+#include <minix/com.h>
+#include "inode.h"
+
+
+/*===========================================================================*
+ * fs_readwrite *
+ *===========================================================================*/
+PUBLIC int fs_readwrite(message *fs_m_in, message *fs_m_out)
+{
+ int r, rw_flag;
+ struct buf *bp;
+ cp_grant_id_t gid;
+ off_t position, f_size;
+ unsigned int nrbytes, cum_io;
+ mode_t mode_word;
+ struct inode *rip;
+ ino_t inumb;
+
+ r = OK;
+ cum_io = 0;
+ inumb = (ino_t) fs_m_in->REQ_INODE_NR;
+
+ /* Find the inode referred */
+ if ((rip = find_inode(inumb)) == NULL) return(EINVAL);
+
+ mode_word = rip->i_mode & I_TYPE;
+ if (mode_word != I_NAMED_PIPE) return(EIO);
+ f_size = rip->i_size;
+
+ /* Get the values from the request message */
+ rw_flag = (fs_m_in->m_type == REQ_READ ? READING : WRITING);
+ gid = (cp_grant_id_t) fs_m_in->REQ_GRANT;
+ position = fs_m_in->REQ_SEEK_POS_LO;
+ nrbytes = (unsigned) fs_m_in->REQ_NBYTES;
+
+ /* We can't read beyond the max file position */
+ if (nrbytes > MAX_FILE_POS) return(EFBIG);
+
+ if (rw_flag == WRITING) {
+ /* Check in advance to see if file will grow too big. */
+ /* Casting nrbytes to signed is safe, because it's guaranteed not to
+ be beyond max signed value (i.e., MAX_FILE_POS). */
+ if (position > PIPE_BUF - (signed) nrbytes) return(EFBIG);
+ }
+
+ /* Mark inode in use */
+ if ((get_inode(rip->i_dev, rip->i_num)) == NULL) return(err_code);
+ if ((bp = get_block(rip->i_dev, rip->i_num)) == NULL) return(err_code);
+
+ if (rw_flag == READING) {
+ /* Copy a chunk from the block buffer to user space. */
+ r = sys_safecopyto(VFS_PROC_NR, gid, (vir_bytes) 0,
+ (vir_bytes) (bp->b_data+position), (size_t) nrbytes, D);
+ } else {
+ /* Copy a chunk from user space to the block buffer. */
+ r = sys_safecopyfrom(VFS_PROC_NR, gid, (vir_bytes) 0,
+ (vir_bytes) (bp->b_data+position), (size_t) nrbytes, D);
+ }
+
+ if (r == OK) {
+ position += (signed) nrbytes; /* Update position */
+ cum_io += nrbytes;
+ }
+
+ fs_m_out->RES_SEEK_POS_LO = position; /* It might change later and the VFS
+ has to know this value */
+
+ /* On write, update file size and access time. */
+ if (rw_flag == WRITING) {
+ if (position > f_size) rip->i_size = position;
+ } else {
+ if(position >= rip->i_size) {
+ /* All data in the pipe is read, so reset pipe pointers */
+ rip->i_size = 0; /* no data left */
+ position = 0; /* reset reader(s) */
+ }
+ }
+
+ bp->b_bytes = position;
+ if (rw_flag == READING) rip->i_update |= ATIME;
+ if (rw_flag == WRITING) rip->i_update |= CTIME | MTIME;
+ fs_m_out->RES_NBYTES = (size_t) cum_io;
+ put_inode(rip);
+ put_block(rip->i_dev, rip->i_num);
+
+ return(r);
+}
--- /dev/null
+#include "fs.h"
+#include "inode.h"
+#include <string.h>
+#include <sys/stat.h>
+
+
+/*===========================================================================*
+ * stat_inode *
+ *===========================================================================*/
+PRIVATE int stat_inode(
+ register struct inode *rip, /* pointer to inode to stat */
+ endpoint_t who_e, /* Caller endpoint */
+ cp_grant_id_t gid /* grant for the stat buf */
+)
+{
+/* Common code for stat and fstat system calls. */
+ mode_t type;
+ struct stat statbuf;
+ u32_t blocks; /* The unit of this is 512 */
+ int r, s;
+
+ type = rip->i_mode & I_TYPE;
+ s = (type == I_CHAR_SPECIAL || type == I_BLOCK_SPECIAL);
+
+ /* Update the atime, ctime, and mtime fields in the inode, if need be. */
+ if (rip->i_update) update_times(rip);
+
+ blocks = rip->i_size / S_BLKSIZE;
+ if (rip->i_size % S_BLKSIZE != 0)
+ blocks += 1;
+
+ memset(&statbuf, 0, sizeof(struct stat));
+
+ statbuf.st_dev = rip->i_dev;
+ statbuf.st_ino = rip->i_num;
+ statbuf.st_mode = rip->i_mode;
+ statbuf.st_nlink = rip->i_nlinks;
+ statbuf.st_uid = rip->i_uid;
+ statbuf.st_gid = (short int) rip->i_gid;
+ statbuf.st_rdev = (dev_t) (s ? rip->i_rdev : NO_DEV);
+ statbuf.st_size = rip->i_size;
+ if (!s) statbuf.st_mode &= ~I_REGULAR;/* wipe out I_REGULAR bit for pipes */
+ statbuf.st_atime = rip->i_atime;
+ statbuf.st_mtime = rip->i_mtime;
+ statbuf.st_ctime = rip->i_ctime;
+ statbuf.st_blksize = PIPE_BUF;
+ statbuf.st_blocks = blocks;
+
+ /* Copy the struct to user space. */
+ r = sys_safecopyto(who_e, gid, (vir_bytes) 0, (vir_bytes) &statbuf,
+ (size_t) sizeof(statbuf), D);
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * fs_stat *
+ *===========================================================================*/
+PUBLIC int fs_stat(message *fs_m_in, message *fs_m_out)
+{
+ register int r; /* return value */
+ register struct inode *rip; /* target inode */
+
+ if( (rip = find_inode(fs_m_in->REQ_INODE_NR)) == NULL) return(EINVAL);
+ get_inode(rip->i_dev, rip->i_num); /* mark inode in use */
+ r = stat_inode(rip, fs_m_in->m_source, (cp_grant_id_t) fs_m_in->REQ_GRANT);
+ put_inode(rip); /* release the inode */
+ return(r);
+}
--- /dev/null
+/* This file manages the super block table and the related data structures,
+ * namely, the bit maps that keep track of which zones and which inodes are
+ * allocated and which are free. When a new inode or zone is needed, the
+ * appropriate bit map is searched for a free entry.
+ *
+ * The entry points into this file are
+ * alloc_bit: somebody wants to allocate a zone or inode; find one
+ * free_bit: indicate that a zone or inode is available for allocation
+ */
+
+#include "fs.h"
+#include "buf.h"
+#include "inode.h"
+#include "const.h"
+
+
+/*===========================================================================*
+ * alloc_bit *
+ *===========================================================================*/
+PUBLIC bit_t alloc_bit(void)
+{
+/* Allocate a bit from a bit map and return its bit number. */
+ bitchunk_t *wptr, *wlim;
+ bit_t b;
+ unsigned int i, bcount;
+
+ bcount = FS_BITMAP_CHUNKS(NR_INODES); /* Inode map has this many chunks. */
+ wlim = &inodemap[bcount]; /* Point to last chunk in inodemap. */
+
+ for (wptr = &inodemap[0]; wptr < wlim; wptr++) {
+ /* Does this word contain a free bit? */
+ if (*wptr == (bitchunk_t) ~0) continue; /* No. Go to next word */
+
+ /* Find and allocate the free bit. */
+ for (i = 0; (*wptr & (1 << i)) != 0; ++i) {}
+
+ /* Get inode number */
+ b = (bit_t) ((wptr - &inodemap[0]) * FS_BITCHUNK_BITS + i);
+
+ /* Don't allocate bits beyond end of map. */
+ if (b >= NR_INODES) break;
+
+ /* Allocate and return bit number. */
+ *wptr |= 1 << i;
+
+ /* Mark server 'busy' */
+ busy++;
+ return(b);
+ }
+
+ return(NO_BIT); /* no bit could be allocated */
+}
+
+
+/*===========================================================================*
+ * free_bit *
+ *===========================================================================*/
+PUBLIC void free_bit(bit_returned)
+bit_t bit_returned; /* number of bit to insert into the inode map*/
+{
+ bitchunk_t *k, mask;
+ bit_t bit;
+ unsigned word;
+
+ /* Get word offset and bit within offset */
+ word = (unsigned) (bit_returned / (bit_t) FS_BITCHUNK_BITS);
+ bit = bit_returned % (bit_t) FS_BITCHUNK_BITS;
+
+ /* Unset bit */
+ k = &inodemap[word];
+ mask = (unsigned) 1 << bit;
+ *k &= ~mask;
+
+ busy--; /* One inode less in use. */
+}
--- /dev/null
+
+/* This file contains the table used to map system call numbers onto the
+ * routines that perform them.
+ */
+
+#define _TABLE
+
+#include "fs.h"
+#include "inode.h"
+#include "buf.h"
+#include "uds.h"
+
+/* File System Handlers (pfs) */
+PUBLIC _PROTOTYPE (int (*fs_call_vec[]),
+ (message *fs_m_in, message *fs_m_out) ) = {
+
+ no_sys, /* 0 not used */
+ no_sys, /* 1 */
+ fs_putnode, /* 2 */
+ no_sys, /* 3 */
+ fs_ftrunc, /* 4 */
+ no_sys, /* 5 */
+ no_sys, /* 6 */
+ no_sys, /* 7 */
+ fs_stat, /* 8 */
+ no_sys, /* 9 */
+ no_sys, /* 10 */
+ no_sys, /* 11 */
+ no_sys, /* 12 */
+ no_sys, /* 13 */
+ no_sys, /* 14 */
+ fs_unmount, /* 15 */
+ fs_sync, /* 16 */
+ no_sys, /* 17 */
+ no_sys, /* 18 */
+ fs_readwrite, /* 19 */
+ fs_readwrite, /* 20 */
+ no_sys, /* 21 */
+ no_sys, /* 22 */
+ no_sys, /* 23 */
+ no_sys, /* 24 */
+ no_sys, /* 25 */
+ no_sys, /* 26 */
+ no_sys, /* 27 */
+ no_sys, /* 28 */
+ fs_newnode, /* 29 */
+ no_sys, /* 30 */
+ no_sys, /* 31 */
+ no_sys, /* 32 */
+};
+
+/* Device Handlers (/dev/uds) */
+PUBLIC _PROTOTYPE (int (*dev_call_vec[]),
+ (message *dev_m_in, message *dev_m_out) ) = {
+
+ uds_cancel, /* 0 CANCEL */
+ no_sys, /* 1 */
+ no_sys, /* 2 */
+ no_sys, /* 3 */
+ no_sys, /* 4 */
+ no_sys, /* 5 */
+ uds_open, /* 6 DEV_OPEN */
+ uds_close, /* 7 DEV_CLOSE */
+ no_sys, /* 8 */
+ no_sys, /* 9 */
+ no_sys, /* 10 TTY_SETPGRP */
+ no_sys, /* 11 TTY_EXIT */
+ uds_select, /* 12 DEV_SELECT */
+ no_sys, /* 13 DEV_STATUS */
+ uds_open, /* 14 DEV_REOPEN */
+ no_sys, /* 15 */
+ no_sys, /* 16 */
+ no_sys, /* 17 */
+ no_sys, /* 18 */
+ no_sys, /* 19 */
+ uds_read, /* 20 DEV_READ_S */
+ uds_write, /* 21 DEV_WRITE_S */
+ no_sys, /* 22 DEV_SCATTER_S */
+ no_sys, /* 23 DEV_GATHER_S */
+ uds_ioctl, /* 24 DEV_IOCTL_S */
+ no_sys, /* 25 DEV_MMAP_S */
+};
--- /dev/null
+/*
+ * Unix Domain Sockets Implementation (PF_UNIX, PF_LOCAL)
+ * This code handles ioctl(2) commands to implement the socket API.
+ * Some helper functions are also present.
+ *
+ * The entry points into this file are...
+ *
+ * uds_init: initialize the descriptor table.
+ * do_accept: handles the accept(2) syscall.
+ * do_connect: handles the connect(2) syscall.
+ * do_listen: handles the listen(2) syscall.
+ * do_socket: handles the socket(2) syscall.
+ * do_bind: handles the bind(2) syscall.
+ * do_getsockname: handles the getsockname(2) syscall.
+ * do_getpeername: handles the getpeername(2) syscall.
+ * do_shutdown: handles the shutdown(2) syscall.
+ * do_socketpair: handles the socketpair(2) syscall.
+ * do_getsockopt_sotype: handles the getsockopt(2) syscall.
+ * do_getsockopt_peercred: handles the getsockopt(2) syscall.
+ * do_getsockopt_sndbuf: handles the getsockopt(2) syscall.
+ * do_setsockopt_sndbuf: handles the setsockopt(2) syscall.
+ * do_getsockopt_rcvbuf: handles the getsockopt(2) syscall.
+ * do_setsockopt_rcvbuf: handles the setsockopt(2) syscall.
+ * do_sendto: handles the sendto(2) syscall.
+ * do_recvfrom: handles the recvfrom(2) syscall.
+ * do_sendmsg: handles the sendmsg(2) syscall.
+ * do_recvmsg: handles the recvmsg(2) syscall.
+ * perform_connection: performs the connection of two descriptors.
+ * clear_fds: calls put_filp for undelivered FDs.
+ *
+ * Also see...
+ *
+ * table.c, dev_uds.c, uds.h
+ */
+
+#define DEBUG 0
+
+#include "inc.h"
+#include "const.h"
+#include "glo.h"
+#include "uds.h"
+
+/* File Descriptor Table */
+uds_fd_t uds_fd_table[NR_FDS];
+
+/* initialize the descriptor table */
+PUBLIC void uds_init(void)
+{
+ /*
+ * Setting everything to NULL implicitly sets the
+ * state to UDS_FREE.
+ */
+ memset(uds_fd_table, '\0', sizeof(uds_fd_t) * NR_FDS);
+}
+
+/* check the permissions of a socket file */
+PRIVATE int check_perms(int minor, struct sockaddr_un *addr)
+{
+ int rc;
+ message vfs_m;
+ cp_grant_id_t grant_id;
+
+ grant_id = cpf_grant_direct(VFS_PROC_NR, (vir_bytes) addr->sun_path,
+ UNIX_PATH_MAX, CPF_READ | CPF_WRITE);
+
+ /* ask the VFS to verify the permissions */
+ memset(&vfs_m, '\0', sizeof(message));
+
+ vfs_m.m_type = PFS_REQ_CHECK_PERMS;
+ vfs_m.USER_ENDPT = uds_fd_table[minor].owner;
+ vfs_m.IO_GRANT = (char *) grant_id;
+ vfs_m.COUNT = UNIX_PATH_MAX;
+
+ rc = sendrec(VFS_PROC_NR, &vfs_m);
+ cpf_revoke(grant_id);
+ if (OK != rc) {
+ printf("(uds) sendrec error... req_nr: %d err: %d\n",
+ vfs_m.m_type, rc);
+
+ return EIO;
+ }
+
+#if DEBUG == 1
+ printf("(uds) VFS reply => %d\n", vfs_m.m_type);
+ printf("(uds) Canonical Path => %s\n", addr->sun_path);
+#endif
+
+ return vfs_m.m_type; /* return reply code OK, ELOOP, etc. */
+}
+
+PRIVATE filp_id_t verify_fd(endpoint_t ep, int fd)
+{
+ int rc;
+ message vfs_m;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) verify_fd(%d,%d) call_count=%d\n", ep, fd,
+ ++call_count);
+#endif
+
+ memset(&vfs_m, '\0', sizeof(message));
+
+ vfs_m.m_type = PFS_REQ_VERIFY_FD;
+ vfs_m.USER_ENDPT = ep;
+ vfs_m.COUNT = fd;
+
+ rc = sendrec(VFS_PROC_NR, &vfs_m);
+ if (OK != rc) {
+ printf("(uds) sendrec error... req_nr: %d err: %d\n",
+ vfs_m.m_type, rc);
+ return NULL;
+ }
+
+#if DEBUG == 1
+ printf("(uds) VFS reply => %d\n", vfs_m.m_type);
+#endif
+
+ return vfs_m.ADDRESS;
+}
+
+PRIVATE int set_filp(filp_id_t sfilp)
+{
+ int rc;
+ message vfs_m;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) set_filp(0x%x) call_count=%d\n", sfilp, ++call_count);
+#endif
+
+ memset(&vfs_m, '\0', sizeof(message));
+
+ vfs_m.m_type = PFS_REQ_SET_FILP;
+ vfs_m.ADDRESS = sfilp;
+
+ rc = sendrec(VFS_PROC_NR, &vfs_m);
+ if (OK != rc) {
+ printf("(uds) sendrec error... req_nr: %d err: %d\n",
+ vfs_m.m_type, rc);
+ return EIO;
+ }
+
+#if DEBUG == 1
+ printf("(uds) VFS reply => %d\n", vfs_m.m_type);
+#endif
+ return vfs_m.m_type; /* return reply code OK, ELOOP, etc. */
+}
+
+PRIVATE int copy_filp(endpoint_t to_ep, filp_id_t cfilp)
+{
+ int rc;
+ message vfs_m;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) copy_filp(%d, 0x%x) call_count=%d\n",to_ep, cfilp,
+ ++call_count);
+#endif
+
+ memset(&vfs_m, '\0', sizeof(message));
+
+ vfs_m.m_type = PFS_REQ_COPY_FILP;
+ vfs_m.USER_ENDPT = to_ep;
+ vfs_m.ADDRESS = cfilp;
+
+ rc = sendrec(VFS_PROC_NR, &vfs_m);
+ if (OK != rc) {
+ printf("(uds) sendrec error... req_nr: %d err: %d\n",
+ vfs_m.m_type, rc);
+ return EIO;
+ }
+
+#if DEBUG == 1
+ printf("(uds) VFS reply => %d\n", vfs_m.m_type);
+#endif
+ return vfs_m.m_type;
+}
+
+PRIVATE int put_filp(filp_id_t pfilp)
+{
+ int rc;
+ message vfs_m;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) put_filp(0x%x) call_count=%d\n", pfilp, ++call_count);
+#endif
+
+ memset(&vfs_m, '\0', sizeof(message));
+
+ vfs_m.m_type = PFS_REQ_PUT_FILP;
+ vfs_m.ADDRESS = pfilp;
+
+ rc = sendrec(VFS_PROC_NR, &vfs_m);
+ if (OK != rc) {
+ printf("(uds) sendrec error... req_nr: %d err: %d\n",
+ vfs_m.m_type, rc);
+ return EIO;
+ }
+
+#if DEBUG == 1
+ printf("(uds) VFS reply => %d\n", vfs_m.m_type);
+#endif
+ return vfs_m.m_type; /* return reply code OK, ELOOP, etc. */
+}
+
+PRIVATE int cancel_fd(endpoint_t ep, int fd)
+{
+ int rc;
+ message vfs_m;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) cancel_fd(%d,%d) call_count=%d\n", ep, fd, ++call_count);
+#endif
+
+ memset(&vfs_m, '\0', sizeof(message));
+
+ vfs_m.m_type = PFS_REQ_CANCEL_FD;
+ vfs_m.USER_ENDPT = ep;
+ vfs_m.COUNT = fd;
+
+ rc = sendrec(VFS_PROC_NR, &vfs_m);
+ if (OK != rc) {
+ printf("(uds) sendrec error... req_nr: %d err: %d\n",
+ vfs_m.m_type, rc);
+ return EIO;
+ }
+
+#if DEBUG == 1
+ printf("(uds) VFS reply => %d\n", vfs_m.m_type);
+#endif
+ return vfs_m.m_type; /* return reply code OK, ELOOP, etc. */
+}
+
+PUBLIC int perform_connection(message *dev_m_in, message *dev_m_out,
+ struct sockaddr_un *addr, int minorx, int minory)
+{
+ /* there are several places were a connection is established. */
+ /* accept(2), connect(2), uds_status(2), socketpair(2) */
+ /* This is a helper function to make sure it is done in the */
+ /* same way in each place with the same validation checks. */
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] perform_connection() call_count=%d\n",
+ uds_minor(dev_m_in), ++call_count);
+#endif
+
+ /* only connection oriented types are acceptable and only like
+ * types can connect to each other
+ */
+ if ((uds_fd_table[minorx].type != SOCK_SEQPACKET &&
+ uds_fd_table[minorx].type != SOCK_STREAM) ||
+ uds_fd_table[minorx].type != uds_fd_table[minory].type) {
+
+ /* sockets are not in a valid state */
+ return EINVAL;
+ }
+
+ /* connect the pair of sockets */
+ uds_fd_table[minorx].peer = minory;
+ uds_fd_table[minory].peer = minorx;
+
+ /* Set the address of both sockets */
+ memcpy(&(uds_fd_table[minorx].addr), addr, sizeof(struct sockaddr_un));
+ memcpy(&(uds_fd_table[minory].addr), addr, sizeof(struct sockaddr_un));
+
+ return OK;
+}
+
+
+PUBLIC int do_accept(message *dev_m_in, message *dev_m_out)
+{
+ int minor;
+ int minorparent; /* minor number of parent (server) */
+ int minorpeer;
+ int rc, i;
+ struct sockaddr_un addr;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] do_accept() call_count=%d\n",
+ uds_minor(dev_m_in), ++call_count);
+#endif
+
+ /* Somewhat weird logic is used in this function, so here's an
+ * overview... The minor number is the server's client socket
+ * (the socket to be returned by accept()). The data waiting
+ * for us in the IO Grant is the address that the server is
+ * listening on. This function uses the address to find the
+ * server's descriptor. From there we can perform the
+ * connection or suspend and wait for a connect().
+ */
+
+ minor = uds_minor(dev_m_in);
+
+ if (uds_fd_table[minor].type != -1) {
+ /* this IOCTL must be called on a 'fresh' socket */
+ return EINVAL;
+ }
+
+ /* Get the server's address */
+ rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+ (vir_bytes) 0, (vir_bytes) &addr, sizeof(struct sockaddr_un),
+ D);
+
+ if (rc != OK) {
+ return EIO;
+ }
+
+ /* locate server socket */
+ rc = -1; /* to trap error */
+
+ for (i = 0; i < NR_FDS; i++) {
+
+ if (uds_fd_table[i].addr.sun_family == AF_UNIX &&
+ !strncmp(addr.sun_path,
+ uds_fd_table[i].addr.sun_path,
+ UNIX_PATH_MAX) &&
+ uds_fd_table[i].listening == 1) {
+
+ rc = 0;
+ break;
+ }
+ }
+
+ if (rc == -1) {
+ /* there is no server listening on addr. Maybe someone
+ * screwed up the ioctl()?
+ */
+ return EINVAL;
+ }
+
+ minorparent = i; /* parent */
+
+ /* we are the parent's child */
+ uds_fd_table[minorparent].child = minor;
+
+ /* the peer has the same type as the parent. we need to be that
+ * type too.
+ */
+ uds_fd_table[minor].type = uds_fd_table[minorparent].type;
+
+ /* locate peer to accept in the parent's backlog */
+ minorpeer = -1; /* to trap error */
+ for (i = 0; i < uds_fd_table[minorparent].backlog_size; i++) {
+ if (uds_fd_table[minorparent].backlog[i] != -1) {
+ minorpeer = uds_fd_table[minorparent].backlog[i];
+ uds_fd_table[minorparent].backlog[i] = -1;
+ rc = 0;
+ break;
+ }
+ }
+
+ if (minorpeer == -1) {
+
+#if DEBUG == 1
+ printf("(uds) [%d] {do_accept} suspend\n", minor);
+#endif
+
+ /* there are no peers in the backlog, suspend and wait
+ * for some to show up
+ */
+ uds_fd_table[minor].suspended = UDS_SUSPENDED_ACCEPT;
+
+ return SUSPEND;
+ }
+
+#if DEBUG == 1
+ printf("(uds) [%d] connecting to %d -- parent is %d\n", minor,
+ minorpeer, minorparent);
+#endif
+
+ rc = perform_connection(dev_m_in, dev_m_out, &addr, minor, minorpeer);
+ if (rc != OK) {
+#if DEBUG == 1
+ printf("(uds) [%d] {do_accept} connection not performed\n",
+ minor);
+#endif
+ return rc;
+ }
+
+ uds_fd_table[minorparent].child = -1;
+
+ /* if peer is blocked on connect() revive peer */
+ if (uds_fd_table[minorpeer].suspended) {
+#if DEBUG == 1
+ printf("(uds) [%d] {do_accept} revive %d\n", minor,
+ minorpeer);
+#endif
+ uds_fd_table[minorpeer].ready_to_revive = 1;
+ uds_unsuspend(dev_m_in->m_source, minorpeer);
+ }
+
+ return OK;
+}
+
+PUBLIC int do_connect(message *dev_m_in, message *dev_m_out)
+{
+ int minor;
+ struct sockaddr_un addr;
+ int rc, i, j;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] do_connect() call_count=%d\n", uds_minor(dev_m_in),
+ ++call_count);
+#endif
+
+ minor = uds_minor(dev_m_in);
+
+ /* only connection oriented sockets can connect */
+ if (uds_fd_table[minor].type != SOCK_STREAM &&
+ uds_fd_table[minor].type != SOCK_SEQPACKET) {
+ return EINVAL;
+ }
+
+ if (uds_fd_table[minor].peer != -1) {
+ /* socket is already connected */
+ return EISCONN;
+ }
+
+ rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+ (vir_bytes) 0, (vir_bytes) &addr,
+ sizeof(struct sockaddr_un), D);
+
+ if (rc != OK) {
+ return EIO;
+ }
+
+ rc = check_perms(minor, &addr);
+ if (rc != OK) {
+ /* permission denied, socket file doesn't exist, etc. */
+ return rc;
+ }
+
+ /* look for a socket of the same type that is listening on the
+ * address we want to connect to
+ */
+ for (i = 0; i < NR_FDS; i++) {
+
+ if (uds_fd_table[minor].type == uds_fd_table[i].type &&
+ uds_fd_table[i].listening &&
+ uds_fd_table[i].addr.sun_family == AF_UNIX &&
+ !strncmp(addr.sun_path, uds_fd_table[i].addr.sun_path,
+ UNIX_PATH_MAX)) {
+
+ if (uds_fd_table[i].child != -1) {
+
+ /* the server is blocked on accept(2) --
+ * perform connection to the child
+ */
+
+ rc = perform_connection(dev_m_in, dev_m_out,
+ &addr, minor, uds_fd_table[i].child);
+
+ if (rc == OK) {
+
+ uds_fd_table[i].child = -1;
+
+#if DEBUG == 1
+ printf("(uds) [%d] {do_connect} revive %d\n", minor, i);
+#endif
+
+ /* wake the parent (server) */
+ uds_fd_table[i].ready_to_revive = 1;
+ uds_unsuspend(dev_m_in->m_source, i);
+ }
+
+ return rc;
+
+ } else {
+
+#if DEBUG == 1
+ printf("(uds) [%d] adding to %d's backlog\n",
+ minor, i);
+#endif
+
+ /* tell the server were waiting to be served */
+
+ /* look for a free slot in the backlog */
+ rc = -1; /* to trap error */
+ for (j = 0; j < uds_fd_table[i].backlog_size;
+ j++) {
+
+ if (uds_fd_table[i].backlog[j] == -1) {
+
+ uds_fd_table[i].backlog[j] =
+ minor;
+
+ rc = 0;
+ break;
+ }
+ }
+
+ if (rc == -1) {
+
+ /* backlog is full */
+ break;
+ }
+
+ /* see if the server is blocked on select() */
+ if (uds_fd_table[i].selecting == 1) {
+
+ /* if the server wants to know
+ * about data ready to read and
+ * it doesn't know about it
+ * already, then let the server
+ * know we have data for it.
+ */
+ if ((uds_fd_table[i].sel_ops_in &
+ SEL_RD) &&
+ !(uds_fd_table[i].sel_ops_out &
+ SEL_RD)) {
+
+ uds_fd_table[i].sel_ops_out |=
+ SEL_RD;
+ uds_fd_table[i].status_updated
+ = 1;
+
+ uds_unsuspend(
+ dev_m_in->m_source, i);
+ }
+ }
+
+ /* we found our server */
+ uds_fd_table[minor].peer = i;
+
+ /* set the address */
+ memcpy(&(uds_fd_table[minor].addr), &addr,
+ sizeof(struct sockaddr_un));
+
+ break;
+ }
+ }
+ }
+
+ if (uds_fd_table[minor].peer == -1) {
+ /* could not find another open socket listening on the
+ * specified address with room in the backlog
+ */
+ return ECONNREFUSED;
+ }
+
+#if DEBUG == 1
+ printf("(uds) [%d] {do_connect} suspend\n", minor);
+#endif
+
+ /* suspend until the server side completes the connection with accept()
+ */
+
+ uds_fd_table[minor].suspended = UDS_SUSPENDED_CONNECT;
+
+ return SUSPEND;
+}
+
+PUBLIC int do_listen(message *dev_m_in, message *dev_m_out)
+{
+ int minor;
+ int rc;
+ int backlog_size;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] do_listen() call_count=%d\n", uds_minor(dev_m_in),
+ ++call_count);
+#endif
+
+ minor = uds_minor(dev_m_in);
+
+ /* ensure the socket has a type and is bound */
+ if (uds_fd_table[minor].type == -1 ||
+ uds_fd_table[minor].addr.sun_family != AF_UNIX) {
+
+ /* probably trying to call listen() before bind() */
+ return EINVAL;
+ }
+
+ /* the two supported types for listen(2) are SOCK_STREAM and
+ * SOCK_SEQPACKET
+ */
+ if (uds_fd_table[minor].type != SOCK_STREAM &&
+ uds_fd_table[minor].type != SOCK_SEQPACKET) {
+
+ /* probably trying to call listen() with a SOCK_DGRAM */
+ return EOPNOTSUPP;
+ }
+
+ /* The POSIX standard doesn't say what to do if listen() has
+ * already been called. Well, there isn't an errno. we silently
+ * let it happen, but if listen() has already been called, we
+ * don't allow the backlog to shrink
+ */
+ rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+ (vir_bytes) 0, (vir_bytes) &backlog_size, sizeof(int), D);
+
+ if (rc != OK) {
+ return EIO;
+ }
+
+ if (uds_fd_table[minor].listening == 0) {
+
+ /* See if backlog_size is between 0 and UDS_SOMAXCONN */
+ if (backlog_size >= 0 || backlog_size < UDS_SOMAXCONN) {
+
+ /* use the user provided backlog_size */
+ uds_fd_table[minor].backlog_size = backlog_size;
+
+ } else {
+
+ /* the user gave an invalid size, use
+ * UDS_SOMAXCONN instead
+ */
+ uds_fd_table[minor].backlog_size = UDS_SOMAXCONN;
+ }
+ } else {
+
+ /* See if the user is trying to expand the backlog_size */
+ if (backlog_size > uds_fd_table[minor].backlog_size &&
+ backlog_size < UDS_SOMAXCONN) {
+
+ /* expand backlog_size */
+ uds_fd_table[minor].backlog_size = backlog_size;
+ }
+
+ /* Don't let the user shrink the backlog_size (we might
+ * have clients waiting in those slots
+ */
+ }
+
+ /* perform listen(2) */
+ uds_fd_table[minor].listening = 1;
+
+ return OK;
+}
+
+PUBLIC int do_socket(message *dev_m_in, message *dev_m_out)
+{
+ int rc;
+ int minor;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] do_socket() call_count=%d\n", uds_minor(dev_m_in),
+ ++call_count);
+#endif
+
+ minor = uds_minor(dev_m_in);
+
+ /* see if this socket already has a type */
+ if (uds_fd_table[minor].type != -1) {
+ /* socket type can only be set once */
+ return EINVAL;
+ }
+
+ /* get the requested type */
+ rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+ (vir_bytes) 0, (vir_bytes) &(uds_fd_table[minor].type),
+ sizeof(int), D);
+
+ if (rc != OK) {
+
+ /* something went wrong and we couldn't get the type */
+ return EIO;
+ }
+
+ /* validate the type */
+ switch (uds_fd_table[minor].type) {
+ case SOCK_STREAM:
+ case SOCK_DGRAM:
+ case SOCK_SEQPACKET:
+
+ /* the type is one of the 3 valid socket types */
+ return OK;
+
+ default:
+
+ /* if the type isn't one of the 3 valid socket
+ * types, then it must be invalid.
+ */
+
+ /* set the type back to '-1' (no type set) */
+ uds_fd_table[minor].type = -1;
+
+ return EINVAL;
+ }
+}
+
+PUBLIC int do_bind(message *dev_m_in, message *dev_m_out)
+{
+ int minor;
+ struct sockaddr_un addr;
+ int rc, i;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] do_bind() call_count=%d\n", uds_minor(dev_m_in),
+ ++call_count);
+#endif
+
+ minor = uds_minor(dev_m_in);
+
+ if ((uds_fd_table[minor].type == -1) ||
+ (uds_fd_table[minor].addr.sun_family == AF_UNIX &&
+ uds_fd_table[minor].type != SOCK_DGRAM)) {
+
+ /* the type hasn't been set by do_socket() yet OR attempting
+ * to re-bind() a non-SOCK_DGRAM socket
+ */
+ return EINVAL;
+ }
+
+ rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+ (vir_bytes) 0, (vir_bytes) &addr, sizeof(struct sockaddr_un),
+ D);
+
+ if (rc != OK) {
+ return EIO;
+ }
+
+ /* do some basic sanity checks on the address */
+ if (addr.sun_family != AF_UNIX) {
+
+ /* bad family */
+ return EAFNOSUPPORT;
+ }
+
+ if (addr.sun_path[0] == '\0') {
+
+ /* bad address */
+ return ENOENT;
+ }
+
+ rc = check_perms(minor, &addr);
+ if (rc != OK) {
+ /* permission denied, socket file doesn't exist, etc. */
+ return rc;
+ }
+
+ /* make sure the address isn't already in use by another socket. */
+ for (i = 0; i < NR_FDS; i++) {
+ if ((uds_fd_table[i].addr.sun_family == AF_UNIX) &&
+ !strncmp(addr.sun_path,
+ uds_fd_table[i].addr.sun_path, UNIX_PATH_MAX)) {
+
+ /* another socket is bound to this sun_path */
+ return EADDRINUSE;
+ }
+ }
+
+ /* looks good, perform the bind() */
+ memcpy(&(uds_fd_table[minor].addr), &addr, sizeof(struct sockaddr_un));
+
+ return OK;
+}
+
+PUBLIC int do_getsockname(message *dev_m_in, message *dev_m_out)
+{
+ int minor;
+ int rc;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] do_getsockname() call_count=%d\n",
+ uds_minor(dev_m_in), ++call_count);
+#endif
+
+ minor = uds_minor(dev_m_in);
+
+ /* Unconditionally send the address we have assigned to this socket.
+ * The POSIX standard doesn't say what to do if the address
+ * hasn't been set. If the address isn't currently set, then
+ * the user will get NULL bytes. Note: libc depends on this
+ * behavior.
+ */
+ rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+ (vir_bytes) 0, (vir_bytes) &(uds_fd_table[minor].addr),
+ sizeof(struct sockaddr_un), D);
+
+ return rc ? EIO : OK;
+}
+
+PUBLIC int do_getpeername(message *dev_m_in, message *dev_m_out)
+{
+ int minor;
+ int rc;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] do_getpeername() call_count=%d\n",
+ uds_minor(dev_m_in), ++call_count);
+#endif
+
+ minor = uds_minor(dev_m_in);
+
+ /* check that the socket is connected with a valid peer */
+ if (uds_fd_table[minor].peer != -1) {
+ int peer_minor;
+
+ peer_minor = uds_fd_table[minor].peer;
+
+ /* copy the address from the peer */
+ rc = sys_safecopyto(VFS_PROC_NR,
+ (cp_grant_id_t) dev_m_in->IO_GRANT, (vir_bytes) 0,
+ (vir_bytes) &(uds_fd_table[peer_minor].addr),
+ sizeof(struct sockaddr_un), D);
+
+ return rc ? EIO : OK;
+ } else {
+ if (uds_fd_table[minor].err == ECONNRESET) {
+ uds_fd_table[minor].err = 0;
+
+ return ECONNRESET;
+ } else {
+ return ENOTCONN;
+ }
+ }
+}
+
+PUBLIC int do_shutdown(message *dev_m_in, message *dev_m_out)
+{
+ int minor;
+ int rc, how;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] do_shutdown() call_count=%d\n",
+ uds_minor(dev_m_in), ++call_count);
+#endif
+
+ minor = uds_minor(dev_m_in);
+
+ if (uds_fd_table[minor].type != SOCK_STREAM &&
+ uds_fd_table[minor].type != SOCK_SEQPACKET) {
+
+ /* socket must be a connection oriented socket */
+ return EINVAL;
+ }
+
+ if (uds_fd_table[minor].peer == -1) {
+ /* shutdown(2) is only valid for connected sockets */
+ if (uds_fd_table[minor].err == ECONNRESET) {
+ return ECONNRESET;
+ } else {
+ return ENOTCONN;
+ }
+ }
+
+ /* get the 'how' parameter from the process */
+ rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+ (vir_bytes) 0, (vir_bytes) &how, sizeof(int), D);
+
+ if (rc != OK) {
+ return EIO;
+ }
+
+ switch (how) {
+ case SHUT_RD:
+ /* take away read permission */
+ uds_fd_table[minor].mode =
+ uds_fd_table[minor].mode ^ S_IRUSR;
+ break;
+
+ case SHUT_WR:
+ /* take away write permission */
+ uds_fd_table[minor].mode =
+ uds_fd_table[minor].mode ^ S_IWUSR;
+ break;
+
+ case SHUT_RDWR:
+ /* completely shutdown */
+ uds_fd_table[minor].mode = 0;
+ break;
+
+ default:
+ /* the 'how' parameter is invalid */
+ return EINVAL;
+ }
+
+ return OK;
+}
+
+PUBLIC int do_socketpair(message *dev_m_in, message *dev_m_out)
+{
+ int rc;
+ dev_t minorin;
+ int minorx, minory;
+ struct sockaddr_un addr;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] do_socketpair() call_count=%d\n",
+ uds_minor(dev_m_in), ++call_count);
+#endif
+
+ /* first ioctl param is the first socket */
+ minorx = uds_minor(dev_m_in);
+
+ /* third ioctl param is the minor number of the second socket */
+ rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+ (vir_bytes) 0, (vir_bytes) &minorin, sizeof(dev_t), D);
+
+ if (rc != OK) {
+ return EIO;
+ }
+
+ minory = (minor(minorin) & BYTE);
+
+#if DEBUG == 1
+ printf("socketpair() %d - %d\n", minorx, minory);
+#endif
+
+ /* security check - both sockets must have the same endpoint (owner) */
+ if (uds_fd_table[minorx].owner != uds_fd_table[minory].owner) {
+
+ /* we won't allow you to magically connect your socket to
+ * someone elses socket
+ */
+ return EPERM;
+ }
+
+ addr.sun_family = AF_UNIX;
+ addr.sun_path[0] = 'X';
+ addr.sun_path[1] = '\0';
+
+ uds_fd_table[minorx].syscall_done = 1;
+ return perform_connection(dev_m_in, dev_m_out, &addr, minorx, minory);
+}
+
+PUBLIC int do_getsockopt_sotype(message *dev_m_in, message *dev_m_out)
+{
+ int minor;
+ int rc;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] do_getsockopt_sotype() call_count=%d\n",
+ uds_minor(dev_m_in), ++call_count);
+#endif
+
+ minor = uds_minor(dev_m_in);
+
+ if (uds_fd_table[minor].type == -1) {
+
+ /* the type hasn't been set yet. instead of returning an
+ * invalid type, we fail with EINVAL
+ */
+ return EINVAL;
+ }
+
+ rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+ (vir_bytes) 0, (vir_bytes) &(uds_fd_table[minor].type),
+ sizeof(int), D);
+
+ return rc ? EIO : OK;
+}
+
+PUBLIC int do_getsockopt_peercred(message *dev_m_in, message *dev_m_out)
+{
+ int minor;
+ int peer_minor;
+ int rc;
+ struct ucred cred;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] do_getsockopt_peercred() call_count=%d\n",
+ uds_minor(dev_m_in), ++call_count);
+#endif
+
+ minor = uds_minor(dev_m_in);
+
+ if (uds_fd_table[minor].peer == -1) {
+
+ if (uds_fd_table[minor].err == ECONNRESET) {
+ uds_fd_table[minor].err = 0;
+
+ return ECONNRESET;
+ } else {
+ return ENOTCONN;
+ }
+ }
+
+ peer_minor = uds_fd_table[minor].peer;
+
+ /* obtain the peer's credentials */
+ rc = getnucred(uds_fd_table[peer_minor].owner, &cred);
+ if (rc == -1) {
+ /* likely error: invalid endpoint / proc doesn't exist */
+ return errno;
+ }
+
+ rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+ (vir_bytes) 0, (vir_bytes) &cred, sizeof(struct ucred), D);
+
+ return rc ? EIO : OK;
+}
+
+int do_getsockopt_sndbuf(message *dev_m_in, message *dev_m_out)
+{
+ int minor;
+ int rc;
+ size_t sndbuf = PIPE_BUF;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] do_getsockopt_sndbuf() call_count=%d\n",
+ uds_minor(dev_m_in), ++call_count);
+#endif
+
+ minor = uds_minor(dev_m_in);
+
+ rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+ (vir_bytes) 0, (vir_bytes) &(sndbuf),
+ sizeof(size_t), D);
+
+ return rc ? EIO : OK;
+}
+
+int do_setsockopt_sndbuf(message *dev_m_in, message *dev_m_out)
+{
+ int minor;
+ int rc;
+ size_t sndbuf;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] do_setsockopt_rcvbuf() call_count=%d\n",
+ uds_minor(dev_m_in), ++call_count);
+#endif
+
+ minor = uds_minor(dev_m_in);
+
+
+ rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+ (vir_bytes) 0, (vir_bytes) &sndbuf,
+ sizeof(size_t), D);
+
+ if (rc != OK) {
+ return EIO;
+ }
+
+ if (sndbuf > PIPE_BUF) {
+ /* The send buffer is limited to 32K at the moment. */
+ return ENOSYS;
+ }
+
+ /* There is no way to reduce the send buffer, do we have to
+ * let this call fail for smaller buffers?
+ */
+ return OK;
+}
+
+int do_getsockopt_rcvbuf(message *dev_m_in, message *dev_m_out)
+{
+ int minor;
+ int rc;
+ size_t rcvbuf = PIPE_BUF;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] do_getsockopt_rcvbuf() call_count=%d\n",
+ uds_minor(dev_m_in), ++call_count);
+#endif
+
+ minor = uds_minor(dev_m_in);
+
+ rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+ (vir_bytes) 0, (vir_bytes) &(rcvbuf),
+ sizeof(size_t), D);
+
+ return rc ? EIO : OK;
+}
+
+int do_setsockopt_rcvbuf(message *dev_m_in, message *dev_m_out)
+{
+ int minor;
+ int rc;
+ size_t rcvbuf;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] do_setsockopt_rcvbuf() call_count=%d\n",
+ uds_minor(dev_m_in), ++call_count);
+#endif
+
+ minor = uds_minor(dev_m_in);
+
+
+ rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+ (vir_bytes) 0, (vir_bytes) &rcvbuf,
+ sizeof(size_t), D);
+
+ if (rc != OK) {
+ return EIO;
+ }
+
+ if (rcvbuf > PIPE_BUF) {
+ /* The send buffer is limited to 32K at the moment. */
+ return ENOSYS;
+ }
+
+ /* There is no way to reduce the send buffer, do we have to
+ * let this call fail for smaller buffers?
+ */
+ return OK;
+}
+
+
+PUBLIC int do_sendto(message *dev_m_in, message *dev_m_out)
+{
+ int minor;
+ int rc;
+ struct sockaddr_un addr;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] do_sendto() call_count=%d\n", uds_minor(dev_m_in),
+ ++call_count);
+#endif
+
+ minor = uds_minor(dev_m_in);
+
+ if (uds_fd_table[minor].type != SOCK_DGRAM) {
+ /* This IOCTL is only for SOCK_DGRAM sockets */
+ return EINVAL;
+ }
+
+ rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+ (vir_bytes) 0, (vir_bytes) &addr, sizeof(struct sockaddr_un),
+ D);
+
+ if (rc != OK) {
+ return EIO;
+ }
+
+ /* do some basic sanity checks on the address */
+ if (addr.sun_family != AF_UNIX || addr.sun_path[0] == '\0') {
+ /* bad address */
+ return EINVAL;
+ }
+
+ rc = check_perms(minor, &addr);
+ if (rc != OK) {
+ return rc;
+ }
+
+ memcpy(&(uds_fd_table[minor].target), &addr,
+ sizeof(struct sockaddr_un));
+
+ return OK;
+}
+
+PUBLIC int do_recvfrom(message *dev_m_in, message *dev_m_out)
+{
+ int minor;
+ int rc;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] do_recvfrom() call_count=%d\n",
+ uds_minor(dev_m_in), ++call_count);
+#endif
+
+ minor = uds_minor(dev_m_in);
+
+ rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+ (vir_bytes) 0, (vir_bytes) &(uds_fd_table[minor].source),
+ sizeof(struct sockaddr_un), D);
+
+ return rc ? EIO : OK;
+}
+
+int msg_control_read(struct msg_control *msg_ctrl, struct ancillary *data,
+ int minor)
+{
+ int rc;
+ struct msghdr msghdr;
+ struct cmsghdr *cmsg = NULL;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] msg_control_read() call_count=%d\n", minor,
+ ++call_count);
+#endif
+
+ data->nfiledes = 0;
+
+ memset(&msghdr, '\0', sizeof(struct msghdr));
+ msghdr.msg_control = msg_ctrl->msg_control;
+ msghdr.msg_controllen = msg_ctrl->msg_controllen;
+
+ for(cmsg = CMSG_FIRSTHDR(&msghdr); cmsg != NULL;
+ cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
+
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_RIGHTS) {
+
+ int i;
+ int nfds =
+ MIN((cmsg->cmsg_len-CMSG_LEN(0))/sizeof(int),
+ OPEN_MAX);
+
+ for (i = 0; i < nfds; i++) {
+ if (data->nfiledes == OPEN_MAX) {
+ return EOVERFLOW;
+ }
+
+ data->fds[data->nfiledes] =
+ ((int *) CMSG_DATA(cmsg))[i];
+#if DEBUG == 1
+ printf("(uds) [%d] fd[%d]=%d\n", minor,
+ data->nfiledes, data->fds[data->nfiledes]);
+#endif
+ data->nfiledes++;
+ }
+ }
+ }
+
+ /* obtain this socket's credentials */
+ rc = getnucred(uds_fd_table[minor].owner, &(data->cred));
+ if (rc == -1) {
+ return errno;
+ }
+#if DEBUG == 1
+ printf("(uds) [%d] cred={%d,%d,%d}\n", minor,
+ data->cred.pid, data->cred.uid,
+ data->cred.gid);
+#endif
+ return OK;
+}
+
+PRIVATE int send_fds(int minor, struct ancillary *data)
+{
+ int rc, i, j;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] send_fds() call_count=%d\n", minor, ++call_count);
+#endif
+
+ /* verify the file descriptors and get their filps. */
+ for (i = 0; i < data->nfiledes; i++) {
+ data->filps[i] = verify_fd(uds_fd_table[minor].owner,
+ data->fds[i]);
+
+ if (data->filps[i] == NULL) {
+ return EINVAL;
+ }
+ }
+
+ /* set them as in-flight */
+ for (i = 0; i < data->nfiledes; i++) {
+ rc = set_filp(data->filps[i]);
+ if (rc != OK) {
+ /* revert set_filp() calls */
+ for (j = i; j >= 0; j--) {
+ put_filp(data->filps[j]);
+ }
+ return rc;
+ }
+ }
+
+ return OK;
+}
+
+PUBLIC int clear_fds(int minor, struct ancillary *data)
+{
+/* This function calls put_filp() for all of the FDs in data.
+ * This is used when a Unix Domain Socket is closed and there
+ * exists references to file descriptors that haven't been received
+ * with recvmsg().
+ */
+ int i;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] recv_fds() call_count=%d\n", minor,
+ ++call_count);
+#endif
+
+ for (i = 0; i < data->nfiledes; i++) {
+ put_filp(data->filps[i]);
+#if DEBUG == 1
+ printf("(uds) clear_fds() => %d\n", data->fds[i]);
+#endif
+ data->fds[i] = -1;
+ data->filps[i] = NULL;
+ }
+
+ data->nfiledes = 0;
+
+ return OK;
+}
+
+PRIVATE int recv_fds(int minor, struct ancillary *data,
+ struct msg_control *msg_ctrl)
+{
+ int rc, i, j;
+ struct msghdr msghdr;
+ struct cmsghdr *cmsg;
+ endpoint_t to_ep;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] recv_fds() call_count=%d\n", minor,
+ ++call_count);
+#endif
+
+ msghdr.msg_control = msg_ctrl->msg_control;
+ msghdr.msg_controllen = msg_ctrl->msg_controllen;
+
+ cmsg = CMSG_FIRSTHDR(&msghdr);
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int) * data->nfiledes);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+
+ to_ep = uds_fd_table[minor].owner;
+
+ /* copy to the target endpoint */
+ for (i = 0; i < data->nfiledes; i++) {
+ rc = copy_filp(to_ep, data->filps[i]);
+ if (rc < 0) {
+ /* revert set_filp() calls */
+ for (j = 0; j < data->nfiledes; j++) {
+ put_filp(data->filps[j]);
+ }
+ /* revert copy_filp() calls */
+ for (j = i; j >= 0; j--) {
+ cancel_fd(to_ep, data->fds[j]);
+ }
+ return rc;
+ }
+ data->fds[i] = rc; /* data->fds[i] now has the new FD */
+ }
+
+ for (i = 0; i < data->nfiledes; i++) {
+ put_filp(data->filps[i]);
+#if DEBUG == 1
+ printf("(uds) recv_fds() => %d\n", data->fds[i]);
+#endif
+ ((int *)CMSG_DATA(cmsg))[i] = data->fds[i];
+ data->fds[i] = -1;
+ data->filps[i] = NULL;
+ }
+
+ data->nfiledes = 0;
+
+ return OK;
+}
+
+PRIVATE int recv_cred(int minor, struct ancillary *data,
+ struct msg_control *msg_ctrl)
+{
+ struct msghdr msghdr;
+ struct cmsghdr *cmsg;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] recv_cred() call_count=%d\n", minor,
+ ++call_count);
+#endif
+
+ msghdr.msg_control = msg_ctrl->msg_control;
+ msghdr.msg_controllen = msg_ctrl->msg_controllen;
+
+ cmsg = CMSG_FIRSTHDR(&msghdr);
+ if (cmsg->cmsg_len > 0) {
+ cmsg = CMSG_NXTHDR(&msghdr, cmsg);
+ }
+
+ cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_CREDENTIALS;
+ memcpy(CMSG_DATA(cmsg), &(data->cred), sizeof(struct ucred));
+
+ return OK;
+}
+
+PUBLIC int do_sendmsg(message *dev_m_in, message *dev_m_out)
+{
+ int minor, peer, rc, i;
+ struct msg_control msg_ctrl;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] do_sendmsg() call_count=%d\n",
+ uds_minor(dev_m_in), ++call_count);
+#endif
+
+ minor = uds_minor(dev_m_in);
+
+ memset(&msg_ctrl, '\0', sizeof(struct msg_control));
+
+ rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+ (vir_bytes) 0, (vir_bytes) &msg_ctrl,
+ sizeof(struct msg_control), D);
+
+ if (rc != OK) {
+ return EIO;
+ }
+
+ /* locate peer */
+ peer = -1;
+ if (uds_fd_table[minor].type == SOCK_DGRAM) {
+ if (uds_fd_table[minor].target.sun_path[0] == '\0' ||
+ uds_fd_table[minor].target.sun_family != AF_UNIX) {
+
+ return EDESTADDRREQ;
+ }
+
+ for (i = 0; i < NR_FDS; i++) {
+
+ /* look for a SOCK_DGRAM socket that is bound on
+ * the target address
+ */
+ if (uds_fd_table[i].type == SOCK_DGRAM &&
+ uds_fd_table[i].addr.sun_family == AF_UNIX &&
+ !strncmp(uds_fd_table[minor].target.sun_path,
+ uds_fd_table[i].addr.sun_path, UNIX_PATH_MAX)){
+
+ peer = i;
+ break;
+ }
+ }
+
+ if (peer == -1) {
+ return ENOENT;
+ }
+ } else {
+ peer = uds_fd_table[minor].peer;
+ if (peer == -1) {
+ return ENOTCONN;
+ }
+ }
+
+#if DEBUG == 1
+ printf("(uds) [%d] sendmsg() -- peer=%d\n", minor, peer);
+#endif
+ /* note: it's possible that there is already some file
+ * descriptors in ancillary_data if the peer didn't call
+ * recvmsg() yet. That's okay. The receiver will
+ * get the current file descriptors plus the new ones.
+ */
+ rc = msg_control_read(&msg_ctrl, &uds_fd_table[peer].ancillary_data,
+ minor);
+ if (rc != OK) {
+ return rc;
+ }
+
+ return send_fds(minor, &uds_fd_table[peer].ancillary_data);
+}
+
+PUBLIC int do_recvmsg(message *dev_m_in, message *dev_m_out)
+{
+ int minor;
+ int rc;
+ struct msg_control msg_ctrl;
+ socklen_t controllen_avail = 0;
+ socklen_t controllen_needed = 0;
+ socklen_t controllen_desired = 0;
+
+#if DEBUG == 1
+ static int call_count = 0;
+ printf("(uds) [%d] do_sendmsg() call_count=%d\n",
+ uds_minor(dev_m_in), ++call_count);
+#endif
+
+ minor = uds_minor(dev_m_in);
+
+
+#if DEBUG == 1
+ printf("(uds) [%d] CREDENTIALS {pid:%d,uid:%d,gid:%d}\n", minor,
+ uds_fd_table[minor].ancillary_data.cred.pid,
+ uds_fd_table[minor].ancillary_data.cred.uid,
+ uds_fd_table[minor].ancillary_data.cred.gid);
+#endif
+
+ memset(&msg_ctrl, '\0', sizeof(struct msg_control));
+
+ /* get the msg_control from the user, it will include the
+ * amount of space the user has allocated for control data.
+ */
+ rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+ (vir_bytes) 0, (vir_bytes) &msg_ctrl,
+ sizeof(struct msg_control), D);
+
+ if (rc != OK) {
+ return EIO;
+ }
+
+ controllen_avail = MIN(msg_ctrl.msg_controllen, MSG_CONTROL_MAX);
+
+ if (uds_fd_table[minor].ancillary_data.nfiledes > 0) {
+ controllen_needed = CMSG_LEN(sizeof(int) *
+ (uds_fd_table[minor].ancillary_data.nfiledes));
+ }
+
+ /* if there is room we also include credentials */
+ controllen_desired = controllen_needed +
+ CMSG_LEN(sizeof(struct ucred));
+
+ if (controllen_needed > controllen_avail) {
+ return EOVERFLOW;
+ }
+
+ rc = recv_fds(minor, &uds_fd_table[minor].ancillary_data, &msg_ctrl);
+ if (rc != OK) {
+ return rc;
+ }
+
+ if (controllen_desired <= controllen_avail) {
+ rc = recv_cred(minor, &uds_fd_table[minor].ancillary_data,
+ &msg_ctrl);
+ if (rc != OK) {
+ return rc;
+ }
+ }
+
+ /* send the user the control data */
+ rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+ (vir_bytes) 0, (vir_bytes) &msg_ctrl,
+ sizeof(struct msg_control), D);
+
+ return rc ? EIO : OK;
+}
--- /dev/null
+#ifndef __PFS_UDS_H__
+#define __PFS_UDS_H__
+
+/*
+ * Unix Domain Sockets Implementation (PF_UNIX, PF_LOCAL)
+ *
+ * Also See...
+ *
+ * dev_uds.c, table.c, uds.c
+ */
+
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/ucred.h>
+#include <sys/un.h>
+
+#include <minix/endpoint.h>
+
+/* max connection backlog for incoming connections */
+#define UDS_SOMAXCONN 64
+
+typedef void* filp_id_t;
+
+/* ancillary data to be sent */
+struct ancillary {
+ filp_id_t filps[OPEN_MAX];
+ int fds[OPEN_MAX];
+ int nfiledes;
+ struct ucred cred;
+};
+
+/*
+ * Internal State Information for a socket descriptor.
+ */
+struct uds_fd {
+
+/* Flags */
+
+ enum UDS_STATE {
+ /* This file descriptor is UDS_FREE and can be allocated. */
+ UDS_FREE = 0,
+
+ /* OR it is UDS_INUSE and can't be allocated. */
+ UDS_INUSE = 1
+
+ /* state is set to UDS_INUSE in uds_open(). state is Set to
+ * UDS_FREE in uds_init() and uds_close(). state should be
+ * checked prior to all operations.
+ */
+ } state;
+
+/* Owner Info */
+
+ /* Socket Owner */
+ endpoint_t owner;
+
+ /* endpoint for suspend/resume */
+ endpoint_t endpoint;
+
+/* Pipe Housekeeping */
+
+ /* inode number on PFS -- each descriptor is backed by 1
+ * PIPE which is allocated in uds_open() and freed in
+ * uds_close(). Data is sent/written to a peer's PIPE.
+ * Data is recv/read from this PIPE.
+ */
+ ino_t inode_nr;
+
+
+ /* position in the PIPE where the data starts */
+ off_t pos;
+
+ /* size of data in the PIPE */
+ size_t size;
+
+ /* control read/write, set by uds_open() and shutdown(2).
+ * Can be set to S_IRUSR|S_IWUSR, S_IRUSR, S_IWUSR, or 0
+ * for read and write, read only, write only, or neither.
+ * default is S_IRUSR|S_IWUSR.
+ */
+ mode_t mode;
+
+/* Socket Info */
+
+
+ /* socket type - SOCK_STREAM, SOCK_DGRAM, or SOCK_SEQPACKET
+ * Set by uds_ioctl(NWIOSUDSTYPE). It defaults to -1 in
+ * uds_open(). Any action on a socket with type -1 besides
+ * uds_ioctl(NWIOSUDSTYPE) and uds_close() will result in
+ * an error.
+ */
+ int type;
+
+ /* queue of pending connections for server sockets.
+ * connect(2) inserts and accept(2) removes from the queue
+ */
+ int backlog[UDS_SOMAXCONN];
+
+ /* requested connection backlog size. Set by listen(2)
+ * Bounds (0 <= backlog_size <= UDS_SOMAXCONN)
+ * Defaults to UDS_SOMAXCONN which is defined above.
+ */
+ unsigned char backlog_size;
+
+ /* index of peer in uds_fd_table for connected sockets.
+ * -1 is used to mean no peer. Assumptions: peer != -1 means
+ * connected.
+ */
+ int peer;
+
+ /* index of child (client sd returned by accept(2))
+ * -1 is used to mean no child.
+ */
+ int child;
+
+ /* address -- the address the socket is bound to.
+ * Assumptions: addr.sun_family == AF_UNIX means its bound.
+ */
+ struct sockaddr_un addr;
+
+ /* target -- where DGRAMs are sent to on the next uds_write(). */
+ struct sockaddr_un target;
+
+ /* source -- address where DGRAMs are from. used to fill in the
+ * from address in recvfrom(2) and recvmsg(2).
+ */
+ struct sockaddr_un source;
+
+ /* Flag (1 or 0) - listening for incoming connections.
+ * Default to 0. Set to 1 by do_listen()
+ */
+ int listening;
+
+ /* stores file pointers and credentials being sent between
+ * processes with sendmsg(2) and recvmsg(2).
+ */
+ struct ancillary ancillary_data;
+
+ /* Holds an errno. This is set when a connected socket is
+ * closed and we need to pass ECONNRESET on to a suspended
+ * peer.
+ */
+ int err;
+
+/* Suspend/Revive Housekeeping */
+
+
+ /* SUSPEND State Flags */
+ enum UDS_SUSPENDED {
+
+ /* Socket isn't blocked. */
+ UDS_NOT_SUSPENDED = 0,
+
+ /* Socket is blocked on read(2) waiting for data to read. */
+ UDS_SUSPENDED_READ = 1,
+
+ /* Socket is blocked on write(2) for space to write data. */
+ UDS_SUSPENDED_WRITE = 2,
+
+ /* Socket is blocked on connect(2) waiting for the server. */
+ UDS_SUSPENDED_CONNECT = 4,
+
+ /* Socket is blocked on accept(2) waiting for clients. */
+ UDS_SUSPENDED_ACCEPT = 8
+ } suspended;
+
+ /* Flag (1 or 0) - thing socket was waiting for is ready.
+ * If 1, then uds_status() will attempt the operation that
+ * the socket was blocked on.
+ */
+ int ready_to_revive;
+
+ /* i/o grant, saved for later use by suspended procs */
+ cp_grant_id_t io_gr;
+
+ /* is of i/o grant, saved for later use by suspended procs */
+ size_t io_gr_size;
+
+ /* Save the call number so that uds_cancel() can unwind the
+ * call properly.
+ */
+ int call_nr;
+
+ /* Save the IOCTL so uds_cancel() knows what got cancelled. */
+ int ioctl;
+
+ /* Flag (1 or 0) - the system call completed.
+ * A doc I read said DEV_CANCEL might be called even though
+ * the operation is finished. We use this variable to
+ * determine if we should rollback the changes or not.
+ */
+ int syscall_done;
+
+/* select() */
+
+ /* Flag (1 or 0) - the process blocked on select(2). When
+ * selecting is 1 and I/O happens on this socket, then
+ * select_proc should be notified.
+ */
+ int selecting;
+
+ /* when a select is in progress, we notify() this endpoint
+ * of new data.
+ */
+ endpoint_t select_proc;
+
+ /* Options (SEL_RD, SEL_WR, SEL_ERR) that are requested. */
+ int sel_ops_in;
+
+ /* Options that are available for this socket. */
+ int sel_ops_out;
+
+ /* Flag (1 or 0) to be set to one before calling notify().
+ * uds_status() will use the flag to locate this descriptor.
+ */
+ int status_updated;
+};
+
+typedef struct uds_fd uds_fd_t;
+
+/* File Descriptor Table -- Defined in uds.c */
+EXTERN uds_fd_t uds_fd_table[NR_FDS];
+
+/*
+ * Take message m and get the index in uds_fd_table.
+ */
+#define uds_minor(m) (minor((dev_t) m->DEVICE) & BYTE)
+
+/*
+ * Fill in a reply message.
+ */
+#define uds_set_reply(msg,type,endpoint,io_gr,status) \
+ do { \
+ (msg)->m_type = type; \
+ (msg)->REP_ENDPT = endpoint; \
+ (msg)->REP_IO_GRANT = io_gr; \
+ (msg)->REP_STATUS = status; \
+ } while (0)
+
+#define uds_sel_reply(msg,type,minor,ops) \
+ do { \
+ (msg)->m_type = type; \
+ (msg)->DEV_MINOR = minor; \
+ (msg)->DEV_SEL_OPS = ops; \
+ } while (0)
+
+
+
+
+#endif
--- /dev/null
+#include "fs.h"
+
+
+/*===========================================================================*
+ * no_sys *
+ *===========================================================================*/
+PUBLIC int no_sys(message *pfs_m_in, message *pfs_m_out)
+{
+/* Somebody has used an illegal system call number */
+ printf("no_sys: invalid call 0x%x to pfs\n", req_nr);
+ return(EINVAL);
+}
+
+
+/*===========================================================================*
+ * clock_time *
+ *===========================================================================*/
+PUBLIC time_t clock_time()
+{
+/* This routine returns the time in seconds since 1.1.1970. MINIX is an
+ * astrophysically naive system that assumes the earth rotates at a constant
+ * rate and that such things as leap seconds do not exist.
+ */
+
+ int r;
+ clock_t uptime; /* Uptime in ticks */
+ time_t boottime;
+
+ if ((r = getuptime2(&uptime, &boottime)) != OK)
+ panic("clock_time: getuptme2 failed: %d", r);
+
+ return( (time_t) (boottime + (uptime/sys_hz())));
+}
--- /dev/null
+# Makefile for Virtual File System (VFS)
+.include <bsd.own.mk>
+
+PROG= vfs
+SRCS= main.c open.c read.c write.c pipe.c dmap.c \
+ path.c device.c mount.c link.c exec.c \
+ filedes.c stadir.c protect.c time.c \
+ lock.c misc.c utility.c select.c table.c \
+ vnode.c vmnt.c request.c fscall.c \
+ tll.c comm.c worker.c
+
+.if ${MKCOVERAGE} != "no"
+SRCS+= gcov.c
+CPPFLAGS+= -DUSE_COVERAGE
+.endif
+
+DPADD+= ${LIBSYS} ${LIBTIMERS} ${LIBEXEC}
+LDADD+= -lsys -ltimers -lexec -lmthread
+
+MAN=
+
+BINDIR?= /usr/sbin
+INSTALLFLAGS+= -S 16k
+
+.include <minix.bootprog.mk>
--- /dev/null
+#include "fs.h"
+#include "glo.h"
+#include "vmnt.h"
+#include "fproc.h"
+#include <minix/vfsif.h>
+#include <assert.h>
+
+FORWARD _PROTOTYPE( int sendmsg, (struct vmnt *vmp, struct fproc *rfp) );
+FORWARD _PROTOTYPE( int queuemsg, (struct vmnt *vmp) );
+
+/*===========================================================================*
+ * sendmsg *
+ *===========================================================================*/
+PRIVATE int sendmsg(vmp, rfp)
+struct vmnt *vmp;
+struct fproc *rfp;
+{
+/* This is the low level function that sends requests to FS processes.
+ */
+ int r, transid;
+
+ if (vmp->m_fs_e == rfp->fp_endpoint) return(EDEADLK);
+ vmp->m_comm.c_cur_reqs++; /* One more request awaiting a reply */
+
+ transid = rfp->fp_wtid + VFS_TRANSID;
+ rfp->fp_sendrec->m_type = TRNS_ADD_ID(rfp->fp_sendrec->m_type, transid);
+ if ((r = asynsend3(vmp->m_fs_e, rfp->fp_sendrec, AMF_NOREPLY)) != OK) {
+ printf("VFS: sendmsg: error sending message. "
+ "FS_e: %d req_nr: %d err: %d\n", vmp->m_fs_e,
+ rfp->fp_sendrec->m_type, r);
+ util_stacktrace();
+ return(r);
+ }
+
+ return(r);
+}
+
+/*===========================================================================*
+ * send_work *
+ *===========================================================================*/
+PUBLIC void send_work(void)
+{
+/* Try to send out as many requests as possible */
+ struct vmnt *vmp;
+
+ if (sending == 0) return;
+ for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; vmp++)
+ fs_sendmore(vmp);
+}
+
+/*===========================================================================*
+ * fs_sendmore *
+ *===========================================================================*/
+PUBLIC void fs_sendmore(struct vmnt *vmp)
+{
+ struct worker_thread *worker;
+
+ /* Can we send more requests? */
+ if (vmp->m_fs_e == NONE) return;
+ if ((worker = vmp->m_comm.c_req_queue) == NULL) /* No process is queued */
+ return;
+ if (vmp->m_comm.c_cur_reqs >= vmp->m_comm.c_max_reqs)/*No room to send more*/
+ return;
+ if (vmp->m_flags & VMNT_BACKCALL) /* Hold off for now */
+ return;
+
+ vmp->m_comm.c_req_queue = worker->w_next; /* Remove head */
+ worker->w_next = NULL;
+ sending--;
+ assert(sending >= 0);
+ sendmsg(vmp, worker->w_job.j_fp);
+}
+
+/*===========================================================================*
+ * fs_sendrec *
+ *===========================================================================*/
+PUBLIC int fs_sendrec(endpoint_t fs_e, message *reqmp)
+{
+ struct vmnt *vmp;
+ int r;
+
+ if ((vmp = find_vmnt(fs_e)) == NULL)
+ panic("Trying to talk to non-existent FS");
+
+ if (!force_sync) {
+ fp->fp_sendrec = reqmp; /* Where to store request and reply */
+
+ /* Find out whether we can send right away or have to enqueue */
+ if ( !(vmp->m_flags & VMNT_BACKCALL) &&
+ vmp->m_comm.c_cur_reqs < vmp->m_comm.c_max_reqs) {
+ /* There's still room to send more and no proc is queued */
+ r = sendmsg(vmp, fp);
+ } else {
+ r = queuemsg(vmp);
+ }
+ self->w_next = NULL; /* End of list */
+
+ if (r != OK) return(r);
+
+ worker_wait(); /* Yield execution until we've received the reply. */
+ } else if (force_sync == 1) {
+ int r;
+ if (OK != (r = sendrec(fs_e, reqmp))) {
+ printf("VFS: sendrec failed: %d\n", r);
+ util_stacktrace();
+ return(r);
+ }
+ } else if (force_sync == 2) {
+ int r, status;
+ if (OK != (r = asynsend(fs_e, reqmp)) ||
+ OK != (r = receive(fs_e, reqmp, &status))) {
+ printf("VFS: asynrec failed: %d\n", r);
+ util_stacktrace();
+ return(r);
+ }
+ } else if (force_sync == 3) {
+ int r, status;
+ if (OK != (r = send(fs_e, reqmp)) ||
+ OK != (r = receive(fs_e, reqmp, &status))) {
+ printf("VFS: sendreceive failed: %d\n", r);
+ util_stacktrace();
+ return(r);
+ }
+ }
+
+ if (reqmp->m_type == -EENTERMOUNT || reqmp->m_type == -ELEAVEMOUNT ||
+ reqmp->m_type == -ESYMLINK) {
+ reqmp->m_type = -reqmp->m_type;
+ } else if (force_sync != 0 && reqmp->m_type > 0) {
+ /* XXX: Keep this as long as we're interested in having support
+ * for synchronous communication. */
+ nested_fs_call(reqmp);
+ return fs_sendrec(fs_e, reqmp);
+ }
+
+ return(reqmp->m_type);
+}
+
+/*===========================================================================*
+ * queuemsg *
+ *===========================================================================*/
+PRIVATE int queuemsg(struct vmnt *vmp)
+{
+/* Put request on queue for vmnt */
+
+ struct worker_thread *queue;
+
+ if (vmp->m_comm.c_req_queue == NULL) {
+ vmp->m_comm.c_req_queue = self;
+ } else {
+ /* Walk the list ... */
+ queue = vmp->m_comm.c_req_queue;
+ while (queue->w_next != NULL) queue = queue->w_next;
+
+ /* ... and append this worker */
+ queue->w_next = self;
+ }
+
+ self->w_next = NULL; /* End of list */
+ sending++;
+
+ return(OK);
+}
--- /dev/null
+#ifndef __VFS_COMM_H__
+#define __VFS_COMM_H__
+
+/* VFS<->FS communication */
+
+typedef struct {
+ int c_max_reqs; /* Max requests an FS can handle simultaneously */
+ int c_cur_reqs; /* Number of requests the FS is currently handling */
+ struct worker_thread *c_req_queue;/* Queue of procs waiting to send a message */
+} comm_t;
+
+#endif
--- /dev/null
+#ifndef __VFS_CONST_H__
+#define __VFS_CONST_H__
+
+/* Tables sizes */
+#define NR_FILPS 512 /* # slots in filp table */
+#define NR_LOCKS 8 /* # slots in the file locking table */
+#define NR_MNTS 16 /* # slots in mount table */
+#define NR_VNODES 512 /* # slots in vnode table */
+#define NR_WTHREADS 8 /* # slots in worker thread table */
+
+#define NR_NONEDEVS NR_MNTS /* # slots in nonedev bitmap */
+
+/* Miscellaneous constants */
+#define SU_UID ((uid_t) 0) /* super_user's uid_t */
+#define SYS_UID ((uid_t) 0) /* uid_t for system processes and INIT */
+#define SYS_GID ((gid_t) 0) /* gid_t for system processes and INIT */
+
+#define FP_BLOCKED_ON_NONE 0 /* not blocked */
+#define FP_BLOCKED_ON_PIPE 1 /* susp'd on pipe */
+#define FP_BLOCKED_ON_LOCK 2 /* susp'd on lock */
+#define FP_BLOCKED_ON_POPEN 3 /* susp'd on pipe open */
+#define FP_BLOCKED_ON_SELECT 4 /* susp'd on select */
+#define FP_BLOCKED_ON_DOPEN 5 /* susp'd on device open */
+#define FP_BLOCKED_ON_OTHER 6 /* blocked on other process, check
+ fp_task to find out */
+
+/* test if the process is blocked on something */
+#define fp_is_blocked(fp) ((fp)->fp_blocked_on != FP_BLOCKED_ON_NONE)
+
+#define DUP_MASK 0100 /* mask to distinguish dup2 from dup */
+
+#define LOOK_UP 0 /* tells search_dir to lookup string */
+#define ENTER 1 /* tells search_dir to make dir entry */
+#define DELETE 2 /* tells search_dir to delete entry */
+#define IS_EMPTY 3 /* tells search_dir to ret. OK or ENOTEMPTY */
+
+#define SYMLOOP 16
+
+#define LABEL_MAX 16 /* maximum label size (including '\0'). Should
+ * not be smaller than 16 or bigger than
+ * M3_LONG_STRING.
+ */
+
+/* Args to dev_io */
+#define VFS_DEV_READ 2001
+#define VFS_DEV_WRITE 2002
+#define VFS_DEV_IOCTL 2005
+#define VFS_DEV_SELECT 2006
+
+#endif
--- /dev/null
+/* When a needed block is not in the cache, it must be fetched from the disk.
+ * Special character files also require I/O. The routines for these are here.
+ *
+ * The entry points in this file are:
+ * dev_open: FS opens a device
+ * dev_close: FS closes a device
+ * dev_io: FS does a read or write on a device
+ * dev_status: FS processes callback request alert
+ * gen_opcl: generic call to a task to perform an open/close
+ * gen_io: generic call to a task to perform an I/O operation
+ * no_dev: open/close processing for devices that don't exist
+ * no_dev_io: i/o processing for devices that don't exist
+ * tty_opcl: perform tty-specific processing for open/close
+ * ctty_opcl: perform controlling-tty-specific processing for open/close
+ * ctty_io: perform controlling-tty-specific processing for I/O
+ * pm_setsid: perform VFS's side of setsid system call
+ * do_ioctl: perform the IOCTL system call
+ */
+
+#include "fs.h"
+#include <fcntl.h>
+#include <assert.h>
+#include <sys/stat.h>
+#include <minix/callnr.h>
+#include <minix/com.h>
+#include <minix/endpoint.h>
+#include <minix/ioctl.h>
+#include <minix/u64.h>
+#include "file.h"
+#include "fproc.h"
+#include "dmap.h"
+#include <minix/vfsif.h>
+#include "vnode.h"
+#include "vmnt.h"
+#include "param.h"
+
+FORWARD _PROTOTYPE( void restart_reopen, (int major) );
+FORWARD _PROTOTYPE( int safe_io_conversion, (endpoint_t, cp_grant_id_t *,
+ int *,
+ endpoint_t *, void **,
+ size_t, u32_t *) );
+
+PRIVATE int dummyproc;
+
+
+/*===========================================================================*
+ * dev_open *
+ *===========================================================================*/
+PUBLIC int dev_open(
+ dev_t dev, /* device to open */
+ endpoint_t proc_e, /* process to open for */
+ int flags /* mode bits and flags */
+)
+{
+ int major, r;
+
+ /* Determine the major device number call the device class specific
+ * open/close routine. (This is the only routine that must check the
+ * device number for being in range. All others can trust this check.)
+ */
+ major = major(dev);
+ if (major < 0 || major >= NR_DEVICES) major = 0;
+ if (dmap[major].dmap_driver == NONE) return(ENXIO);
+ r = (*dmap[major].dmap_opcl)(DEV_OPEN, dev, proc_e, flags);
+ return(r);
+}
+
+
+/*===========================================================================*
+ * dev_reopen *
+ *===========================================================================*/
+PUBLIC int dev_reopen(
+ dev_t dev, /* device to open */
+ int filp_no, /* filp to reopen for */
+ int flags /* mode bits and flags */
+)
+{
+/* Reopen a device after a failing device driver */
+
+ int major, r;
+ struct dmap *dp;
+
+ /* Determine the major device number and call the device class specific
+ * open/close routine. (This is the only routine that must check the device
+ * number for being in range. All others can trust this check.)
+ */
+
+ major = major(dev);
+ if (major < 0 || major >= NR_DEVICES) major = 0;
+ dp = &dmap[major];
+ if (dp->dmap_driver == NONE) return(ENXIO);
+ r = (*dp->dmap_opcl)(DEV_REOPEN, dev, filp_no, flags);
+ if (r == SUSPEND) r = OK;
+ return(r);
+}
+
+
+/*===========================================================================*
+ * dev_close *
+ *===========================================================================*/
+PUBLIC int dev_close(
+ dev_t dev, /* device to close */
+ int filp_no
+)
+{
+/* Close a device */
+ int r, major;
+
+ /* See if driver is roughly valid. */
+ major = major(dev);
+ if (major < 0 || major >= NR_DEVICES) return(ENXIO);
+ if (dmap[major].dmap_driver == NONE) return(ENXIO);
+ r = (*dmap[major].dmap_opcl)(DEV_CLOSE, dev, filp_no, 0);
+ return(r);
+}
+
+
+/*===========================================================================*
+ * find_suspended_ep *
+ *===========================================================================*/
+endpoint_t find_suspended_ep(endpoint_t driver, cp_grant_id_t g)
+{
+/* A process is suspended on a driver for which VFS issued a grant. Find out
+ * which process it was.
+ */
+ struct fproc *rfp;
+ for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+ if(rfp->fp_pid == PID_FREE)
+ continue;
+
+ if(rfp->fp_blocked_on == FP_BLOCKED_ON_OTHER &&
+ rfp->fp_task == driver && rfp->fp_grant == g)
+ return(rfp->fp_endpoint);
+ }
+
+ return(NONE);
+}
+
+
+/*===========================================================================*
+ * dev_status *
+ *===========================================================================*/
+PUBLIC void dev_status(message *m)
+{
+/* A device sent us a notification it has something for us. Retrieve it. */
+
+ message st;
+ int major, get_more = 1;
+ endpoint_t endpt;
+
+ for (major = 0; major < NR_DEVICES; major++)
+ if (dmap_driver_match(m->m_source, major))
+ break; /* 'major' is the device that sent the message */
+
+ if (major >= NR_DEVICES) /* Device endpoint not found; nothing to do */
+ return;
+
+ if (dmap[major].dmap_style == STYLE_DEVA ||
+ dmap[major].dmap_style == STYLE_CLONE_A) {
+ printf("VFS: not doing dev_status for async driver %d\n", m->m_source);
+ return;
+ }
+
+ /* Continuously send DEV_STATUS messages until the device has nothing to
+ * say to us anymore. */
+ do {
+ int r;
+ st.m_type = DEV_STATUS;
+ r = sendrec(m->m_source, &st);
+ if (r == OK && st.REP_STATUS == ERESTART) r = EDEADEPT;
+ if (r != OK) {
+ printf("VFS: DEV_STATUS failed to %d: %d\n", m->m_source, r);
+ if (r == EDEADSRCDST || r == EDEADEPT) return;
+ panic("VFS: couldn't sendrec for DEV_STATUS: %d", r);
+ }
+
+ switch(st.m_type) {
+ case DEV_REVIVE:
+ /* We've got results for a read/write/ioctl call to a
+ * synchronous character driver */
+ endpt = st.REP_ENDPT;
+ if (endpt == VFS_PROC_NR) {
+ endpt = find_suspended_ep(m->m_source,st.REP_IO_GRANT);
+ if(endpt == NONE) {
+ printf("VFS: proc with grant %d from %d not found\n",
+ st.REP_IO_GRANT, st.m_source);
+ continue;
+ }
+ }
+ revive(endpt, st.REP_STATUS);
+ break;
+ case DEV_IO_READY:
+ /* Reply to a select request: driver is ready for I/O */
+ select_reply2(st.m_source, st.DEV_MINOR, st.DEV_SEL_OPS);
+ break;
+ default:
+ printf("VFS: unrecognized reply %d to DEV_STATUS\n",st.m_type);
+ /* Fall through. */
+ case DEV_NO_STATUS:
+ get_more = 0;
+ break;
+ }
+ } while(get_more);
+}
+
+/*===========================================================================*
+ * safe_io_conversion *
+ *===========================================================================*/
+PRIVATE int safe_io_conversion(driver, gid, op, io_ept, buf, bytes, pos_lo)
+endpoint_t driver;
+cp_grant_id_t *gid;
+int *op;
+endpoint_t *io_ept;
+void **buf;
+size_t bytes;
+u32_t *pos_lo;
+{
+/* Convert operation to the 'safe' variant (i.e., grant based) if applicable.
+ * If no copying of data is involved, there is also no need to convert. */
+
+ int access = 0;
+ size_t size;
+
+ *gid = GRANT_INVALID; /* Grant to buffer */
+
+ switch(*op) {
+ case VFS_DEV_READ:
+ case VFS_DEV_WRITE:
+ /* Change to safe op. */
+ *op = (*op == VFS_DEV_READ) ? DEV_READ_S : DEV_WRITE_S;
+ *gid = cpf_grant_magic(driver, *io_ept, (vir_bytes) *buf, bytes,
+ *op == DEV_READ_S ? CPF_WRITE : CPF_READ);
+ if (*gid < 0)
+ panic("VFS: cpf_grant_magic of READ/WRITE buffer failed");
+ break;
+ case VFS_DEV_IOCTL:
+ *pos_lo = *io_ept; /* Old endpoint in POSITION field. */
+ *op = DEV_IOCTL_S;
+ if(_MINIX_IOCTL_IOR(m_in.REQUEST)) access |= CPF_WRITE;
+ if(_MINIX_IOCTL_IOW(m_in.REQUEST)) access |= CPF_READ;
+ if(_MINIX_IOCTL_BIG(m_in.REQUEST))
+ size = _MINIX_IOCTL_SIZE_BIG(m_in.REQUEST);
+ else
+ size = _MINIX_IOCTL_SIZE(m_in.REQUEST);
+
+ /* Grant access to the buffer even if no I/O happens with the ioctl, in
+ * order to disambiguate requests with DEV_IOCTL_S.
+ */
+ *gid = cpf_grant_magic(driver, *io_ept, (vir_bytes) *buf, size, access);
+ if (*gid < 0)
+ panic("VFS: cpf_grant_magic IOCTL buffer failed");
+
+ break;
+ case VFS_DEV_SELECT:
+ *op = DEV_SELECT;
+ break;
+ default:
+ panic("VFS: unknown operation %d for safe I/O conversion", *op);
+ }
+
+ /* If we have converted to a safe operation, I/O endpoint becomes VFS if it
+ * wasn't already.
+ */
+ if(GRANT_VALID(*gid)) {
+ *io_ept = VFS_PROC_NR;
+ return(1);
+ }
+
+ /* Not converted to a safe operation (because there is no copying involved in
+ * this operation).
+ */
+ return(0);
+}
+
+/*===========================================================================*
+ * dev_io *
+ *===========================================================================*/
+PUBLIC int dev_io(
+ int op, /* DEV_READ, DEV_WRITE, DEV_IOCTL, etc. */
+ dev_t dev, /* major-minor device number */
+ int proc_e, /* in whose address space is buf? */
+ void *buf, /* virtual address of the buffer */
+ u64_t pos, /* byte position */
+ size_t bytes, /* how many bytes to transfer */
+ int flags, /* special flags, like O_NONBLOCK */
+ int suspend_reopen /* Just suspend the process */
+)
+{
+/* Read from or write to a device. The parameter 'dev' tells which one. */
+ struct dmap *dp;
+ u32_t pos_lo, pos_high;
+ message dev_mess;
+ cp_grant_id_t gid = GRANT_INVALID;
+ int safe, minor_dev, major_dev;
+ void *buf_used;
+ endpoint_t ioproc;
+
+ pos_lo = ex64lo(pos);
+ pos_high = ex64hi(pos);
+ major_dev = major(dev);
+ minor_dev = minor(dev);
+
+ /* Determine task dmap. */
+ dp = &dmap[major_dev];
+
+ /* See if driver is roughly valid. */
+ if (dp->dmap_driver == NONE) {
+ printf("VFS: dev_io: no driver for major %d\n", major_dev);
+ return(ENXIO);
+ }
+
+ if (suspend_reopen) {
+ /* Suspend user. */
+ fp->fp_grant = GRANT_INVALID;
+ fp->fp_ioproc = NONE;
+ wait_for(dp->dmap_driver);
+ fp->fp_flags |= FP_SUSP_REOPEN;
+ return(SUSPEND);
+ }
+
+ if(isokendpt(dp->dmap_driver, &dummyproc) != OK) {
+ printf("VFS: dev_io: old driver for major %x (%d)\n", major_dev,
+ dp->dmap_driver);
+ return(ENXIO);
+ }
+
+ /* By default, these are right. */
+ dev_mess.USER_ENDPT = proc_e;
+ dev_mess.ADDRESS = buf;
+
+ /* Convert DEV_* to DEV_*_S variants. */
+ buf_used = buf;
+ safe = safe_io_conversion(dp->dmap_driver, &gid, &op,
+ (endpoint_t *) &dev_mess.USER_ENDPT, &buf_used,
+ bytes, &pos_lo);
+
+ /* If the safe conversion was done, set the IO_GRANT to
+ * the grant id.
+ */
+ if(safe) dev_mess.IO_GRANT = (char *) gid;
+
+ /* Set up the rest of the message passed to task. */
+ dev_mess.m_type = op;
+ dev_mess.DEVICE = minor_dev;
+ dev_mess.POSITION = pos_lo;
+ dev_mess.COUNT = bytes;
+ dev_mess.HIGHPOS = pos_high;
+
+ /* This will be used if the i/o is suspended. */
+ ioproc = dev_mess.USER_ENDPT;
+
+ /* Call the task. */
+ (*dp->dmap_io)(dp->dmap_driver, &dev_mess);
+
+ if(dp->dmap_driver == NONE) {
+ /* Driver has vanished. */
+ printf("VFS: driver gone?!\n");
+ if(safe) cpf_revoke(gid);
+ return(EIO);
+ }
+
+ /* Task has completed. See if call completed. */
+ if (dev_mess.REP_STATUS == SUSPEND) {
+ if ((flags & O_NONBLOCK) && !(dp->dmap_style == STYLE_DEVA ||
+ dp->dmap_style == STYLE_CLONE_A)) {
+ /* Not supposed to block. */
+ dev_mess.m_type = CANCEL;
+ dev_mess.USER_ENDPT = ioproc;
+ dev_mess.IO_GRANT = (char *) gid;
+
+ /* This R_BIT/W_BIT check taken from suspend()/unpause()
+ * logic. Mode is expected in the COUNT field.
+ */
+ dev_mess.COUNT = 0;
+ if (call_nr == READ) dev_mess.COUNT = R_BIT;
+ else if (call_nr == WRITE) dev_mess.COUNT = W_BIT;
+ dev_mess.DEVICE = minor_dev;
+ (*dp->dmap_io)(dp->dmap_driver, &dev_mess);
+ if (dev_mess.REP_STATUS == EINTR) dev_mess.REP_STATUS = EAGAIN;
+ } else {
+ /* select() will do suspending itself. */
+ if(op != DEV_SELECT) {
+ /* Suspend user. */
+ wait_for(dp->dmap_driver);
+ }
+ assert(!GRANT_VALID(fp->fp_grant));
+ fp->fp_grant = gid; /* revoke this when unsuspended. */
+ fp->fp_ioproc = ioproc;
+
+ if (flags & O_NONBLOCK) {
+ /* Not supposed to block, send cancel message */
+ dev_mess.m_type = CANCEL;
+ dev_mess.USER_ENDPT = ioproc;
+ dev_mess.IO_GRANT = (char *) gid;
+
+ /* This R_BIT/W_BIT check taken from suspend()/unpause()
+ * logic. Mode is expected in the COUNT field.
+ */
+ dev_mess.COUNT = 0;
+ if(call_nr == READ) dev_mess.COUNT = R_BIT;
+ else if(call_nr == WRITE) dev_mess.COUNT = W_BIT;
+ dev_mess.DEVICE = minor_dev;
+ (*dp->dmap_io)(dp->dmap_driver, &dev_mess);
+
+ /* Should do something about EINTR -> EAGAIN mapping */
+ }
+ return(SUSPEND);
+ }
+ }
+
+ /* No suspend, or cancelled suspend, so I/O is over and can be cleaned up. */
+ if(safe) cpf_revoke(gid);
+
+ return(dev_mess.REP_STATUS);
+}
+
+/*===========================================================================*
+ * gen_opcl *
+ *===========================================================================*/
+PUBLIC int gen_opcl(
+ int op, /* operation, DEV_OPEN or DEV_CLOSE */
+ dev_t dev, /* device to open or close */
+ endpoint_t proc_e, /* process to open/close for */
+ int flags /* mode bits and flags */
+)
+{
+/* Called from the dmap struct on opens & closes of special files.*/
+ int r, minor_dev, major_dev;
+ struct dmap *dp;
+ message dev_mess;
+
+ /* Determine task dmap. */
+ major_dev = major(dev);
+ minor_dev = minor(dev);
+ if (major_dev < 0 || major_dev >= NR_DEVICES) return(ENXIO);
+ dp = &dmap[major_dev];
+ if (dp->dmap_driver == NONE) {
+ printf("VFS: gen_opcl: no driver for major %d\n", major_dev);
+ return(ENXIO);
+ }
+
+ dev_mess.m_type = op;
+ dev_mess.DEVICE = minor_dev;
+ dev_mess.USER_ENDPT = proc_e;
+ dev_mess.COUNT = flags;
+
+ /* Call the task. */
+ r = (*dp->dmap_io)(dp->dmap_driver, &dev_mess);
+ if (r != OK) return(r);
+
+ return(dev_mess.REP_STATUS);
+}
+
+/*===========================================================================*
+ * tty_opcl *
+ *===========================================================================*/
+PUBLIC int tty_opcl(
+ int op, /* operation, DEV_OPEN or DEV_CLOSE */
+ dev_t dev, /* device to open or close */
+ endpoint_t proc_e, /* process to open/close for */
+ int flags /* mode bits and flags */
+)
+{
+/* This procedure is called from the dmap struct on tty open/close. */
+
+ int r;
+ register struct fproc *rfp;
+
+ /* Add O_NOCTTY to the flags if this process is not a session leader, or
+ * if it already has a controlling tty, or if it is someone elses
+ * controlling tty.
+ */
+ if (!(fp->fp_flags & FP_SESLDR) || fp->fp_tty != 0) {
+ flags |= O_NOCTTY;
+ } else {
+ for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+ if(rfp->fp_pid == PID_FREE) continue;
+ if (rfp->fp_tty == dev) flags |= O_NOCTTY;
+ }
+ }
+
+ r = gen_opcl(op, dev, proc_e, flags);
+
+ /* Did this call make the tty the controlling tty? */
+ if (r == 1) {
+ fp->fp_tty = dev;
+ r = OK;
+ }
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * ctty_opcl *
+ *===========================================================================*/
+PUBLIC int ctty_opcl(
+ int op, /* operation, DEV_OPEN or DEV_CLOSE */
+ dev_t dev, /* device to open or close */
+ endpoint_t proc_e, /* process to open/close for */
+ int flags /* mode bits and flags */
+)
+{
+/* This procedure is called from the dmap struct on opening or closing
+ * /dev/tty, the magic device that translates to the controlling tty.
+ */
+
+ return(fp->fp_tty == 0 ? ENXIO : OK);
+}
+
+
+/*===========================================================================*
+ * pm_setsid *
+ *===========================================================================*/
+PUBLIC void pm_setsid(proc_e)
+int proc_e;
+{
+/* Perform the VFS side of the SETSID call, i.e. get rid of the controlling
+ * terminal of a process, and make the process a session leader.
+ */
+ register struct fproc *rfp;
+ int slot;
+
+ /* Make the process a session leader with no controlling tty. */
+ okendpt(proc_e, &slot);
+ rfp = &fproc[slot];
+ rfp->fp_flags |= FP_SESLDR;
+ rfp->fp_tty = 0;
+}
+
+
+/*===========================================================================*
+ * do_ioctl *
+ *===========================================================================*/
+PUBLIC int do_ioctl()
+{
+/* Perform the ioctl(ls_fd, request, argx) system call (uses m2 fmt). */
+
+ int r = OK, suspend_reopen;
+ struct filp *f;
+ register struct vnode *vp;
+ dev_t dev;
+
+ if ((f = get_filp(m_in.ls_fd, VNODE_READ)) == NULL) return(err_code);
+ vp = f->filp_vno; /* get vnode pointer */
+ if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL &&
+ (vp->v_mode & I_TYPE) != I_BLOCK_SPECIAL) {
+ r = ENOTTY;
+ }
+
+ if (r == OK) {
+ suspend_reopen = (f->filp_state != FS_NORMAL);
+ dev = (dev_t) vp->v_sdev;
+
+ r = dev_io(VFS_DEV_IOCTL, dev, who_e, m_in.ADDRESS, cvu64(0),
+ m_in.REQUEST, f->filp_flags, suspend_reopen);
+ }
+
+ unlock_filp(f);
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * gen_io *
+ *===========================================================================*/
+PUBLIC int gen_io(task_nr, mess_ptr)
+endpoint_t task_nr; /* which task to call */
+message *mess_ptr; /* pointer to message for task */
+{
+/* All file system I/O ultimately comes down to I/O on major/minor device
+ * pairs. These lead to calls on the following routines via the dmap table.
+ */
+
+ int r, proc_e;
+
+ proc_e = mess_ptr->USER_ENDPT;
+
+ r = sendrec(task_nr, mess_ptr);
+ if (r == OK && mess_ptr->REP_STATUS == ERESTART) r = EDEADEPT;
+ if (r != OK) {
+ if (r == EDEADSRCDST || r == EDEADEPT) {
+ printf("VFS: dead driver %d\n", task_nr);
+ dmap_unmap_by_endpt(task_nr);
+ return(r);
+ } else if (r == ELOCKED) {
+ printf("VFS: ELOCKED talking to %d\n", task_nr);
+ return(r);
+ }
+ panic("call_task: can't send/receive: %d", r);
+ }
+
+ /* Did the process we did the sendrec() for get a result? */
+ if (mess_ptr->REP_ENDPT != proc_e) {
+ printf("VFS: strange device reply from %d, type = %d, "
+ "proc = %d (not %d) (2) ignored\n", mess_ptr->m_source,
+ mess_ptr->m_type, proc_e, mess_ptr->REP_ENDPT);
+
+ return(EIO);
+ }
+
+ return(OK);
+}
+
+
+/*===========================================================================*
+ * asyn_io *
+ *===========================================================================*/
+PUBLIC int asyn_io(task_nr, mess_ptr)
+int task_nr; /* which task to call */
+message *mess_ptr; /* pointer to message for task */
+{
+/* All file system I/O ultimately comes down to I/O on major/minor device
+ * pairs. These lead to calls on the following routines via the dmap table.
+ */
+
+ int r;
+
+ fp->fp_sendrec = mess_ptr; /* Remember where result should be stored */
+ r = asynsend3(task_nr, mess_ptr, AMF_NOREPLY);
+
+ if (r != OK) panic("VFS: asynsend in asyn_io failed: %d", r);
+
+ /* Fake a SUSPEND */
+ mess_ptr->REP_STATUS = SUSPEND;
+ return(OK);
+}
+
+
+/*===========================================================================*
+ * ctty_io *
+ *===========================================================================*/
+PUBLIC int ctty_io(task_nr, mess_ptr)
+int task_nr; /* not used - for compatibility with dmap_t */
+message *mess_ptr; /* pointer to message for task */
+{
+/* This routine is only called for one device, namely /dev/tty. Its job
+ * is to change the message to use the controlling terminal, instead of the
+ * major/minor pair for /dev/tty itself.
+ */
+
+ struct dmap *dp;
+
+ if (fp->fp_tty == 0) {
+ /* No controlling tty present anymore, return an I/O error. */
+ mess_ptr->REP_STATUS = EIO;
+ } else {
+ /* Substitute the controlling terminal device. */
+ dp = &dmap[major(fp->fp_tty)];
+ mess_ptr->DEVICE = minor(fp->fp_tty);
+
+ if (dp->dmap_driver == NONE) {
+ printf("FS: ctty_io: no driver for dev\n");
+ return(EIO);
+ }
+
+ if (isokendpt(dp->dmap_driver, &dummyproc) != OK) {
+ printf("VFS: ctty_io: old driver %d\n", dp->dmap_driver);
+ return(EIO);
+ }
+
+ (*dp->dmap_io)(dp->dmap_driver, mess_ptr);
+ }
+
+ return(OK);
+}
+
+
+/*===========================================================================*
+ * no_dev *
+ *===========================================================================*/
+PUBLIC int no_dev(
+ int UNUSED(op), /* operation, DEV_OPEN or DEV_CLOSE */
+ dev_t UNUSED(dev), /* device to open or close */
+ int UNUSED(proc), /* process to open/close for */
+ int UNUSED(flags) /* mode bits and flags */
+)
+{
+/* Called when opening a nonexistent device. */
+ return(ENODEV);
+}
+
+/*===========================================================================*
+ * no_dev_io *
+ *===========================================================================*/
+PUBLIC int no_dev_io(int proc, message *m)
+{
+/* Called when doing i/o on a nonexistent device. */
+ printf("VFS: I/O on unmapped device number\n");
+ return(EIO);
+}
+
+
+/*===========================================================================*
+ * clone_opcl *
+ *===========================================================================*/
+PUBLIC int clone_opcl(
+ int op, /* operation, DEV_OPEN or DEV_CLOSE */
+ dev_t dev, /* device to open or close */
+ int proc_e, /* process to open/close for */
+ int flags /* mode bits and flags */
+)
+{
+/* Some devices need special processing upon open. Such a device is "cloned",
+ * i.e. on a succesful open it is replaced by a new device with a new unique
+ * minor device number. This new device number identifies a new object (such
+ * as a new network connection) that has been allocated within a task.
+ */
+ struct dmap *dp;
+ int r, minor_dev, major_dev;
+ message dev_mess;
+
+ /* Determine task dmap. */
+ minor_dev = minor(dev);
+ major_dev = major(dev);
+ if (major_dev < 0 || major_dev >= NR_DEVICES) return(ENXIO);
+ dp = &dmap[major_dev];
+ if (dp->dmap_driver == NONE) {
+ printf("VFS clone_opcl: no driver for major %d\n", major_dev);
+ return(ENXIO);
+ }
+
+ dev_mess.m_type = op;
+ dev_mess.DEVICE = minor_dev;
+ dev_mess.USER_ENDPT = proc_e;
+ dev_mess.COUNT = flags;
+
+ if(isokendpt(dp->dmap_driver, &dummyproc) != OK) {
+ printf("VFS clone_opcl: bad driver endpoint for major %d (%d)\n",
+ major_dev, dp->dmap_driver);
+ return(ENXIO);
+ }
+
+ /* Call the task. */
+ r = (*dp->dmap_io)(dp->dmap_driver, &dev_mess);
+ if (r != OK) return(r);
+
+ if (op == DEV_OPEN && dp->dmap_style == STYLE_CLONE_A) {
+ /* Wait for reply when driver is asynchronous */
+ worker_wait();
+ }
+
+ if (op == DEV_OPEN && dev_mess.REP_STATUS >= 0) {
+ if (dev_mess.REP_STATUS != minor_dev) {
+ struct vnode *vp;
+ struct node_details res;
+
+ /* A new minor device number has been returned.
+ * Request PFS to create a temporary device file to hold it.
+ */
+
+ /* Device number of the new device. */
+ dev = (dev & ~(BYTE << MINOR)) | (dev_mess.REP_STATUS << MINOR);
+
+ /* Issue request */
+ r = req_newnode(PFS_PROC_NR, fp->fp_effuid, fp->fp_effgid,
+ ALL_MODES | I_CHAR_SPECIAL, dev, &res);
+ if (r != OK) {
+ (void) clone_opcl(DEV_CLOSE, dev, proc_e, 0);
+ return r;
+ }
+
+ /* Drop old node and use the new values */
+ vp = fp->fp_filp[m_in.fd]->filp_vno;
+
+ unlock_vnode(vp);
+ put_vnode(vp);
+ if ((vp = get_free_vnode()) == NULL)
+ return(err_code);
+
+ lock_vnode(vp, VNODE_OPCL);
+
+ vp->v_fs_e = res.fs_e;
+ vp->v_vmnt = NULL;
+ vp->v_dev = NO_DEV;
+ vp->v_fs_e = res.fs_e;
+ vp->v_inode_nr = res.inode_nr;
+ vp->v_mode = res.fmode;
+ vp->v_sdev = dev;
+ vp->v_fs_count = 1;
+ vp->v_ref_count = 1;
+ fp->fp_filp[m_in.fd]->filp_vno = vp;
+ }
+ dev_mess.REP_STATUS = OK;
+ }
+ return(dev_mess.REP_STATUS);
+}
+
+
+/*===========================================================================*
+ * dev_up *
+ *===========================================================================*/
+PUBLIC void dev_up(int maj)
+{
+ /* A new device driver has been mapped in. This function
+ * checks if any filesystems are mounted on it, and if so,
+ * dev_open()s them so the filesystem can be reused.
+ */
+ int r, new_driver_e, needs_reopen, fd_nr, found;
+ struct filp *rfilp;
+ struct vmnt *vmp;
+ struct fproc *rfp;
+ struct vnode *vp;
+
+ /* First deal with block devices. We need to consider both mounted file
+ * systems and open block-special files.
+ */
+ if (maj < 0 || maj >= NR_DEVICES) panic("VFS: out-of-bound major");
+ new_driver_e = dmap[maj].dmap_driver;
+
+ /* Tell each affected mounted file system about the new endpoint. This code
+ * is currently useless, as driver endpoints do not change across restarts.
+ */
+ for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) {
+ int minor_dev, major_dev;
+ major_dev = major(vmp->m_dev);
+ minor_dev = minor(vmp->m_dev);
+ if (major_dev != maj) continue;
+
+ /* Send the new driver endpoint to the mounted file system. */
+ if (OK != req_newdriver(vmp->m_fs_e, vmp->m_dev, new_driver_e))
+ printf("VFS dev_up: error sending new driver endpoint."
+ " FS_e: %d req_nr: %d\n", vmp->m_fs_e, REQ_NEW_DRIVER);
+ }
+
+ /* For each block-special file that was previously opened on the affected
+ * device, we need to reopen it on the new driver.
+ */
+ found = 0;
+ for (rfilp = filp; rfilp < &filp[NR_FILPS]; rfilp++) {
+ if (rfilp->filp_count < 1 || !(vp = rfilp->filp_vno)) continue;
+ if (major(vp->v_sdev) != maj) continue;
+ if (!S_ISBLK(vp->v_mode)) continue;
+
+ /* Reopen the device on the driver, once per filp. */
+ if ((r = dev_open(vp->v_sdev, VFS_PROC_NR, rfilp->filp_mode)) != OK)
+ printf("VFS: mounted dev %d/%d re-open failed: %d.\n",
+ maj, minor(vp->v_sdev), r);
+
+ found = 1;
+ }
+
+ /* If any block-special file was open for this major at all, also inform the
+ * root file system about the new endpoint of the driver. We do this even if
+ * the block-special file is linked to another mounted file system, merely
+ * because it is more work to check for that case.
+ */
+ if (found) {
+ if (OK != req_newdriver(ROOT_FS_E, makedev(maj, 0), new_driver_e))
+ printf("VFSdev_up: error sending new driver endpoint."
+ " FS_e: %d req_nr: %d\n", ROOT_FS_E, REQ_NEW_DRIVER);
+ }
+
+ /* The rest of the code deals with character-special files. To start with,
+ * look for processes that are suspened in an OPEN call. Set FP_SUSP_REOPEN
+ * to indicate that this process was suspended before the call to dev_up.
+ */
+ for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+ if(rfp->fp_pid == PID_FREE) continue;
+ if(rfp->fp_blocked_on != FP_BLOCKED_ON_DOPEN) continue;
+
+ printf("VFS: dev_up: found process in FP_BLOCKED_ON_DOPEN, fd %d\n",
+ rfp->fp_block_fd);
+ fd_nr = rfp->fp_block_fd;
+ rfilp = rfp->fp_filp[fd_nr];
+ vp = rfilp->filp_vno;
+ if (!vp) panic("VFS: restart_reopen: no vp");
+ if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL) continue;
+ if (major(vp->v_sdev) != maj) continue;
+
+ rfp->fp_flags |= FP_SUSP_REOPEN;
+ }
+
+ needs_reopen= FALSE;
+ for (rfilp = filp; rfilp < &filp[NR_FILPS]; rfilp++) {
+ if (rfilp->filp_count < 1 || !(vp = rfilp->filp_vno)) continue;
+ if (major(vp->v_sdev) != maj) continue;
+ if (!S_ISCHR(vp->v_mode)) continue;
+
+ rfilp->filp_state = FS_NEEDS_REOPEN;
+ needs_reopen = TRUE;
+ }
+
+ if (needs_reopen)
+ restart_reopen(maj);
+
+}
+
+/*===========================================================================*
+ * open_reply *
+ *===========================================================================*/
+PUBLIC void open_reply(void)
+{
+ struct fproc *rfp;
+ endpoint_t proc_e;
+ int slot;
+
+ proc_e = m_in.REP_ENDPT;
+ if (isokendpt(proc_e, &slot) != OK) return;
+ rfp = &fproc[slot];
+ *rfp->fp_sendrec = m_in;
+ worker_signal(worker_get(rfp->fp_wtid)); /* Continue open */
+}
+
+/*===========================================================================*
+ * restart_reopen *
+ *===========================================================================*/
+PRIVATE void restart_reopen(maj)
+int maj;
+{
+ int n, r, minor_dev, major_dev, fd_nr;
+ endpoint_t driver_e;
+ struct vnode *vp;
+ struct filp *rfilp;
+ struct fproc *rfp;
+
+ if (maj < 0 || maj >= NR_DEVICES) panic("VFS: out-of-bound major");
+ for (rfilp = filp; rfilp < &filp[NR_FILPS]; rfilp++) {
+ if (rfilp->filp_count < 1 || !(vp = rfilp->filp_vno)) continue;
+ if (rfilp->filp_state != FS_NEEDS_REOPEN) continue;
+ if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL) continue;
+
+ major_dev = major(vp->v_sdev);
+ minor_dev = minor(vp->v_sdev);
+ if (major_dev != maj) continue;
+
+ if (!(rfilp->filp_flags & O_REOPEN)) {
+ /* File descriptor is to be closed when driver restarts. */
+ n = invalidate(rfilp);
+ if (n != rfilp->filp_count) {
+ printf("VFS: warning: invalidate/count "
+ "discrepancy (%d, %d)\n", n, rfilp->filp_count);
+ }
+ rfilp->filp_count = 0;
+ continue;
+ }
+
+ r = dev_reopen(vp->v_sdev, rfilp-filp, vp->v_mode & (R_BIT|W_BIT));
+ if (r == OK) return;
+
+ /* Device could not be reopened. Invalidate all filps on that device.*/
+ n = invalidate(rfilp);
+ if (n != rfilp->filp_count) {
+ printf("VFS: warning: invalidate/count "
+ "discrepancy (%d, %d)\n", n, rfilp->filp_count);
+ }
+ rfilp->filp_count = 0;
+ printf("VFS: file on dev %d/%d re-open failed: %d; "
+ "invalidated %d fd's.\n", major_dev, minor_dev, r, n);
+ }
+
+ /* Nothing more to re-open. Restart suspended processes */
+ driver_e = dmap[maj].dmap_driver;
+
+ for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+ if(rfp->fp_pid == PID_FREE) continue;
+ if(rfp->fp_blocked_on == FP_BLOCKED_ON_OTHER &&
+ rfp->fp_task == driver_e && (rfp->fp_flags & FP_SUSP_REOPEN)) {
+ rfp->fp_flags &= ~FP_SUSP_REOPEN;
+ rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
+ reply(rfp->fp_endpoint, ERESTART);
+ }
+ }
+
+ /* Look for processes that are suspened in an OPEN call */
+ for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+ if (rfp->fp_pid == PID_FREE) continue;
+ if (rfp->fp_blocked_on == FP_BLOCKED_ON_DOPEN ||
+ !(rfp->fp_flags & FP_SUSP_REOPEN)) continue;
+
+ printf("VFS: restart_reopen: found process in FP_BLOCKED_ON_DOPEN, fd %d\n",
+ rfp->fp_block_fd);
+ fd_nr = rfp->fp_block_fd;
+ rfilp = rfp->fp_filp[fd_nr];
+
+ if (!rfilp) {
+ /* Open failed, and automatic reopen was not requested */
+ rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
+ FD_CLR(fd_nr, &rfp->fp_filp_inuse);
+ reply(rfp->fp_endpoint, EIO);
+ continue;
+ }
+
+ vp = rfilp->filp_vno;
+ if (!vp) panic("VFS: restart_reopen: no vp");
+ if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL) continue;
+ if (major(vp->v_sdev) != maj) continue;
+
+ rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
+ reply(rfp->fp_endpoint, fd_nr);
+ }
+}
+
+
+/*===========================================================================*
+ * reopen_reply *
+ *===========================================================================*/
+PUBLIC void reopen_reply()
+{
+ endpoint_t driver_e;
+ int filp_no, status, maj;
+ struct filp *rfilp;
+ struct vnode *vp;
+ struct dmap *dp;
+
+ driver_e = m_in.m_source;
+ filp_no = m_in.REP_ENDPT;
+ status = m_in.REP_STATUS;
+
+ if (filp_no < 0 || filp_no >= NR_FILPS) {
+ printf("VFS: reopen_reply: bad filp number %d from driver %d\n",
+ filp_no, driver_e);
+ return;
+ }
+
+ rfilp = &filp[filp_no];
+ if (rfilp->filp_count < 1) {
+ printf("VFS: reopen_reply: filp number %d not inuse (from driver %d)\n",
+ filp_no, driver_e);
+ return;
+ }
+
+ vp = rfilp->filp_vno;
+ if (!vp) {
+ printf("VFS: reopen_reply: no vnode for filp number %d (from driver "
+ "%d)\n", filp_no, driver_e);
+ return;
+ }
+
+ if (rfilp->filp_state != FS_NEEDS_REOPEN) {
+ printf("VFS: reopen_reply: bad state %d for filp number %d"
+ " (from driver %d)\n", rfilp->filp_state, filp_no, driver_e);
+ return;
+ }
+
+ if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL) {
+ printf("VFS: reopen_reply: bad mode 0%o for filp number %d"
+ " (from driver %d)\n", vp->v_mode, filp_no, driver_e);
+ return;
+ }
+
+ maj = major(vp->v_sdev);
+ dp = &dmap[maj];
+ if (dp->dmap_driver != driver_e) {
+ printf("VFS: reopen_reply: bad major %d for filp number %d "
+ "(from driver %d, current driver is %d)\n", maj, filp_no,
+ driver_e, dp->dmap_driver);
+ return;
+ }
+
+ if (status == OK) {
+ rfilp->filp_state= FS_NORMAL;
+ } else {
+ printf("VFS: reopen_reply: should handle error status\n");
+ return;
+ }
+
+ restart_reopen(maj);
+}
--- /dev/null
+/* This file contains the table with device <-> driver mappings. It also
+ * contains some routines to dynamically add and/ or remove device drivers
+ * or change mappings.
+ */
+
+#include "fs.h"
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <minix/com.h>
+#include <minix/ds.h>
+#include "fproc.h"
+#include "dmap.h"
+#include "param.h"
+
+/* The order of the entries in the table determines the mapping between major
+ * device numbers and device drivers. Character and block devices
+ * can be intermixed at random. The ordering determines the device numbers in
+ * /dev. Note that the major device numbers used in /dev are NOT the same as
+ * the process numbers of the device drivers. See <minix/dmap.h> for mappings.
+ */
+
+struct dmap dmap[NR_DEVICES];
+
+#define DT_EMPTY { no_dev, no_dev_io, NONE, "", 0, STYLE_NDEV, NULL }
+
+/*===========================================================================*
+ * do_mapdriver *
+ *===========================================================================*/
+PUBLIC int do_mapdriver()
+{
+/* Create a device->driver mapping. RS will tell us which major is driven by
+ * this driver, what type of device it is (regular, TTY, asynchronous, clone,
+ * etc), and its label. This label is registered with DS, and allows us to
+ * retrieve the driver's endpoint.
+ */
+ int r, flags, major;
+ endpoint_t endpoint;
+ vir_bytes label_vir;
+ size_t label_len;
+ char label[LABEL_MAX];
+
+ /* Only RS can map drivers. */
+ if (who_e != RS_PROC_NR) return(EPERM);
+
+ /* Get the label */
+ label_vir = (vir_bytes) m_in.md_label;
+ label_len = (size_t) m_in.md_label_len;
+
+ if (label_len+1 > sizeof(label)) { /* Can we store this label? */
+ printf("VFS: do_mapdriver: label too long\n");
+ return(EINVAL);
+ }
+ r = sys_vircopy(who_e, D, label_vir, SELF, D, (vir_bytes) label, label_len);
+ if (r != OK) {
+ printf("VFS: do_mapdriver: sys_vircopy failed: %d\n", r);
+ return(EINVAL);
+ }
+ label[label_len] = '\0'; /* Terminate label */
+
+ /* Now we know how the driver is called, fetch its endpoint */
+ r = ds_retrieve_label_endpt(label, &endpoint);
+ if (r != OK) {
+ printf("VFS: do_mapdriver: label '%s' unknown\n", label);
+ return(EINVAL);
+ }
+
+ /* Try to update device mapping. */
+ major = m_in.md_major;
+ flags = m_in.md_flags;
+
+ return map_driver(label, major, endpoint, m_in.md_style, flags);
+}
+
+/*===========================================================================*
+ * map_driver *
+ *===========================================================================*/
+PUBLIC int map_driver(label, major, proc_nr_e, style, flags)
+const char *label; /* name of the driver */
+int major; /* major number of the device */
+endpoint_t proc_nr_e; /* process number of the driver */
+int style; /* style of the device */
+int flags; /* device flags */
+{
+/* Add a new device driver mapping in the dmap table. If the proc_nr is set to
+ * NONE, we're supposed to unmap it.
+ */
+
+ int slot;
+ size_t len;
+ struct dmap *dp;
+
+ /* Get pointer to device entry in the dmap table. */
+ if (major < 0 || major >= NR_DEVICES) return(ENODEV);
+ dp = &dmap[major];
+
+ /* Check if we're supposed to unmap it. */
+ if(proc_nr_e == NONE) {
+ dp->dmap_opcl = no_dev;
+ dp->dmap_io = no_dev_io;
+ dp->dmap_driver = NONE;
+ dp->dmap_flags = flags;
+ return(OK);
+ }
+
+ /* Check process number of new driver if it was alive before mapping */
+ if (! (flags & DRV_FORCED)) {
+ if (isokendpt(proc_nr_e, &slot) != OK)
+ return(EINVAL);
+ }
+
+ if (label != NULL) {
+ len = strlen(label);
+ if (len+1 > sizeof(dp->dmap_label))
+ panic("VFS: map_driver: label too long: %d", len);
+ strcpy(dp->dmap_label, label);
+ }
+
+ /* Store driver I/O routines based on type of device */
+ switch (style) {
+ case STYLE_DEV:
+ dp->dmap_opcl = gen_opcl;
+ dp->dmap_io = gen_io;
+ break;
+ case STYLE_DEVA:
+ dp->dmap_opcl = gen_opcl;
+ dp->dmap_io = asyn_io;
+ break;
+ case STYLE_TTY:
+ dp->dmap_opcl = tty_opcl;
+ dp->dmap_io = gen_io;
+ break;
+ case STYLE_CTTY:
+ dp->dmap_opcl = ctty_opcl;
+ dp->dmap_io = ctty_io;
+ break;
+ case STYLE_CLONE:
+ dp->dmap_opcl = clone_opcl;
+ dp->dmap_io = gen_io;
+ break;
+ case STYLE_CLONE_A:
+ dp->dmap_opcl = clone_opcl;
+ dp->dmap_io = asyn_io;
+ break;
+ default:
+ return(EINVAL);
+ }
+
+ dp->dmap_driver = proc_nr_e;
+ dp->dmap_flags = flags;
+ dp->dmap_style = style;
+
+ return(OK);
+}
+
+/*===========================================================================*
+ * dmap_unmap_by_endpt *
+ *===========================================================================*/
+PUBLIC void dmap_unmap_by_endpt(endpoint_t proc_e)
+{
+/* Lookup driver in dmap table by endpoint and unmap it */
+ int major, r;
+
+ for (major = 0; major < NR_DEVICES; major++) {
+ if (dmap_driver_match(proc_e, major)) {
+ /* Found driver; overwrite it with a NULL entry */
+ if ((r = map_driver(NULL, major, NONE, 0, 0)) != OK) {
+ printf("VFS: unmapping driver %d for major %d failed:"
+ " %d\n", proc_e, major, r);
+ }
+ }
+ }
+}
+
+/*===========================================================================*
+ * map_service *
+ *===========================================================================*/
+PUBLIC int map_service(struct rprocpub *rpub)
+{
+/* Map a new service by storing its device driver properties. */
+ int r;
+
+ /* Not a driver, nothing more to do. */
+ if(rpub->dev_nr == NO_DEV) return(OK);
+
+ /* Map driver. */
+ r = map_driver(rpub->label, rpub->dev_nr, rpub->endpoint, rpub->dev_style,
+ rpub->dev_flags);
+ if(r != OK) return(r);
+
+ /* If driver has two major numbers associated, also map the other one. */
+ if(rpub->dev_style2 != STYLE_NDEV) {
+ r = map_driver(rpub->label, rpub->dev_nr+1, rpub->endpoint,
+ rpub->dev_style2, rpub->dev_flags);
+ if(r != OK) return(r);
+ }
+
+ return(OK);
+}
+
+/*===========================================================================*
+ * init_dmap *
+ *===========================================================================*/
+PUBLIC void init_dmap()
+{
+/* Initialize the table with empty device <-> driver mappings. */
+ int i;
+ struct dmap dmap_default = DT_EMPTY;
+
+ for (i = 0; i < NR_DEVICES; i++)
+ dmap[i] = dmap_default;
+}
+
+/*===========================================================================*
+ * dmap_driver_match *
+ *===========================================================================*/
+PUBLIC int dmap_driver_match(endpoint_t proc, int major)
+{
+ if (major < 0 || major >= NR_DEVICES) return(0);
+ if (dmap[major].dmap_driver != NONE && dmap[major].dmap_driver == proc)
+ return(1);
+
+ return(0);
+}
+
+/*===========================================================================*
+ * dmap_endpt_up *
+ *===========================================================================*/
+PUBLIC void dmap_endpt_up(endpoint_t proc_e)
+{
+/* A device driver with endpoint proc_e has been restarted. Go tell everyone
+ * that might be blocking on it that this device is 'up'.
+ */
+
+ int major;
+ for (major = 0; major < NR_DEVICES; major++)
+ if (dmap_driver_match(proc_e, major))
+ dev_up(major);
+
+}
+
+/*===========================================================================*
+ * get_dmap *
+ *===========================================================================*/
+PUBLIC struct dmap *get_dmap(endpoint_t proc_e)
+{
+/* See if 'proc_e' endpoint belongs to a valid dmap entry. If so, return a
+ * pointer */
+
+ int major;
+ for (major = 0; major < NR_DEVICES; major++)
+ if (dmap_driver_match(proc_e, major))
+ return(&dmap[major]);
+
+ return(NULL);
+}
--- /dev/null
+#ifndef __VFS_DMAP_H__
+#define __VFS_DMAP_H__
+
+/*
+dmap.h
+*/
+
+/*===========================================================================*
+ * Device <-> Driver Table *
+ *===========================================================================*/
+
+/* Device table. This table is indexed by major device number. It provides
+ * the link between major device numbers and the routines that process them.
+ * The table can be update dynamically. The field 'dmap_flags' describe an
+ * entry's current status and determines what control options are possible.
+ */
+
+extern struct dmap {
+ int _PROTOTYPE ((*dmap_opcl), (int, dev_t, int, int) );
+ int _PROTOTYPE ((*dmap_io), (int, message *) );
+ endpoint_t dmap_driver;
+ char dmap_label[LABEL_MAX];
+ int dmap_flags;
+ int dmap_style;
+ struct filp *dmap_sel_filp;
+} dmap[];
+
+#endif
--- /dev/null
+/* This file handles the EXEC system call. It performs the work as follows:
+ * - see if the permissions allow the file to be executed
+ * - read the header and extract the sizes
+ * - fetch the initial args and environment from the user space
+ * - allocate the memory for the new process
+ * - copy the initial stack from PM to the process
+ * - read in the text and data segments and copy to the process
+ * - take care of setuid and setgid bits
+ * - fix up 'mproc' table
+ * - tell kernel about EXEC
+ * - save offset to initial argc (for ps)
+ *
+ * The entry points into this file are:
+ * pm_exec: perform the EXEC system call
+ */
+
+#include "fs.h"
+#include <sys/stat.h>
+#include <minix/callnr.h>
+#include <minix/endpoint.h>
+#include <minix/com.h>
+#include <minix/u64.h>
+#include <a.out.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <dirent.h>
+#include <sys/param.h>
+#include "fproc.h"
+#include "path.h"
+#include "param.h"
+#include "vnode.h"
+#include <minix/vfsif.h>
+#include <assert.h>
+#include <libexec.h>
+#include "exec.h"
+
+FORWARD _PROTOTYPE( void lock_exec, (void) );
+FORWARD _PROTOTYPE( void unlock_exec, (void) );
+FORWARD _PROTOTYPE( int exec_newmem, (int proc_e, vir_bytes text_addr, vir_bytes text_bytes,
+ vir_bytes data_addr, vir_bytes data_bytes,
+ vir_bytes tot_bytes, vir_bytes frame_len, int sep_id,
+ int is_elf, dev_t st_dev, ino_t st_ino, time_t ctime,
+ char *progname, int new_uid, int new_gid,
+ vir_bytes *stack_topp, int *load_textp,
+ int *allow_setuidp) );
+FORWARD _PROTOTYPE( int is_script, (const char *exec_hdr, size_t exec_len));
+FORWARD _PROTOTYPE( int patch_stack, (struct vnode *vp, char stack[ARG_MAX],
+ vir_bytes *stk_bytes, char path[PATH_MAX+1]) );
+FORWARD _PROTOTYPE( int insert_arg, (char stack[ARG_MAX], vir_bytes *stk_bytes,
+ char *arg, int replace) );
+FORWARD _PROTOTYPE( void patch_ptr, (char stack[ARG_MAX], vir_bytes base));
+FORWARD _PROTOTYPE( void clo_exec, (struct fproc *rfp) );
+FORWARD _PROTOTYPE( int read_seg, (struct vnode *vp, off_t off, int proc_e,
+ int seg, vir_bytes seg_addr,
+ phys_bytes seg_bytes) );
+FORWARD _PROTOTYPE( int load_aout, (struct exec_info *execi) );
+FORWARD _PROTOTYPE( int load_elf, (struct exec_info *execi) );
+FORWARD _PROTOTYPE( int map_header, (char **exec_hdr,
+ const struct vnode *vp) );
+
+#define PTRSIZE sizeof(char *) /* Size of pointers in argv[] and envp[]. */
+
+/* Array of loaders for different object file formats */
+struct exec_loaders {
+ int (*load_object)(struct exec_info *);
+};
+
+PRIVATE const struct exec_loaders exec_loaders[] = {
+ { load_aout },
+ { load_elf },
+ { NULL }
+};
+
+PRIVATE char hdr[PAGE_SIZE]; /* Assume that header is not larger than a page */
+
+/*===========================================================================*
+ * lock_exec *
+ *===========================================================================*/
+PRIVATE void lock_exec(void)
+{
+ message org_m_in;
+ struct fproc *org_fp;
+ struct worker_thread *org_self;
+
+ /* First try to get it right off the bat */
+ if (mutex_trylock(&exec_lock) == 0)
+ return;
+
+ org_m_in = m_in;
+ org_fp = fp;
+ org_self = self;
+
+ if (mutex_lock(&exec_lock) != 0)
+ panic("Could not obtain lock on exec");
+
+ m_in = org_m_in;
+ fp = org_fp;
+ self = org_self;
+}
+
+/*===========================================================================*
+ * unlock_exec *
+ *===========================================================================*/
+PRIVATE void unlock_exec(void)
+{
+ if (mutex_unlock(&exec_lock) != 0)
+ panic("Could not release lock on exec");
+}
+
+/*===========================================================================*
+ * pm_exec *
+ *===========================================================================*/
+PUBLIC int pm_exec(int proc_e, char *path, vir_bytes path_len, char *frame,
+ vir_bytes frame_len, vir_bytes *pc)
+{
+/* Perform the execve(name, argv, envp) call. The user library builds a
+ * complete stack image, including pointers, args, environ, etc. The stack
+ * is copied to a buffer inside VFS, and then to the new core image.
+ */
+ int r, r1, round, slot;
+ vir_bytes vsp;
+ struct fproc *rfp;
+ struct vnode *vp;
+ struct vmnt *vmp;
+ char *cp;
+ static char mbuf[ARG_MAX]; /* buffer for stack and zeroes */
+ struct exec_info execi;
+ int i;
+ char fullpath[PATH_MAX+1];
+ struct lookup resolve;
+
+ lock_exec();
+
+ okendpt(proc_e, &slot);
+ rfp = fp = &fproc[slot];
+ vp = NULL;
+
+ lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+ resolve.l_vmnt_lock = VMNT_READ;
+ resolve.l_vnode_lock = VNODE_READ;
+
+ /* Get the exec file name. */
+ if ((r = fetch_name(path, path_len, 0, fullpath)) != OK)
+ goto pm_execfinal;
+
+ /* Fetch the stack from the user before destroying the old core image. */
+ if (frame_len > ARG_MAX) {
+ printf("VFS: pm_exec: stack too big\n");
+ r = ENOMEM; /* stack too big */
+ goto pm_execfinal;
+ }
+ r = sys_datacopy(proc_e, (vir_bytes) frame, SELF, (vir_bytes) mbuf,
+ (phys_bytes) frame_len);
+ if (r != OK) { /* can't fetch stack (e.g. bad virtual addr) */
+ printf("VFS: pm_exec: sys_datacopy failed\n");
+ goto pm_execfinal;
+ }
+
+ /* The default is to keep the original user and group IDs */
+ execi.new_uid = rfp->fp_effuid;
+ execi.new_gid = rfp->fp_effgid;
+
+ for (round = 0; round < 2; round++) {
+ /* round = 0 (first attempt), or 1 (interpreted script) */
+ /* Save the name of the program */
+ (cp = strrchr(fullpath, '/')) ? cp++ : (cp = fullpath);
+
+ strncpy(execi.progname, cp, PROC_NAME_LEN-1);
+ execi.progname[PROC_NAME_LEN-1] = '\0';
+
+ /* Open executable */
+ if ((vp = eat_path(&resolve, fp)) == NULL) {
+ r = err_code;
+ goto pm_execfinal;
+ }
+ execi.vp = vp;
+ unlock_vmnt(vmp);
+
+ if ((vp->v_mode & I_TYPE) != I_REGULAR)
+ r = ENOEXEC;
+ else if ((r1 = forbidden(vp, X_BIT)) != OK)
+ r = r1;
+ else
+ r = req_stat(vp->v_fs_e, vp->v_inode_nr, VFS_PROC_NR,
+ (char *) &(execi.sb), 0, 0);
+ if (r != OK) goto pm_execfinal;
+
+ if (round == 0) {
+ /* Deal with setuid/setgid executables */
+ if (vp->v_mode & I_SET_UID_BIT) execi.new_uid = vp->v_uid;
+ if (vp->v_mode & I_SET_GID_BIT) execi.new_gid = vp->v_gid;
+ }
+
+ r = map_header(&execi.hdr, execi.vp);
+ if (r != OK) goto pm_execfinal;
+
+ if (!is_script(execi.hdr, execi.vp->v_size) || round != 0)
+ break;
+
+ /* Get fresh copy of the file name. */
+ if ((r = fetch_name(path, path_len, 0, fullpath)) != OK)
+ printf("VFS pm_exec: 2nd fetch_name failed\n");
+ else if ((r = patch_stack(vp, mbuf, &frame_len, fullpath)) != OK)
+ printf("VFS pm_exec: patch_stack failed\n");
+
+ unlock_vnode(vp);
+ put_vnode(vp);
+ vp = NULL;
+ if (r != OK) goto pm_execfinal;
+ }
+
+ execi.proc_e = proc_e;
+ execi.frame_len = frame_len;
+
+ for (i = 0; exec_loaders[i].load_object != NULL; i++) {
+ r = (*exec_loaders[i].load_object)(&execi);
+ /* Loaded successfully, so no need to try other loaders */
+ if (r == OK) break;
+ }
+
+ if (r != OK) { /* No exec loader could load the object */
+ r = ENOEXEC;
+ goto pm_execfinal;
+ }
+
+ /* Save off PC */
+ *pc = execi.pc;
+
+ /* Patch up stack and copy it from VFS to new core image. */
+ vsp = execi.stack_top;
+ vsp -= frame_len;
+ patch_ptr(mbuf, vsp);
+ if ((r = sys_datacopy(SELF, (vir_bytes) mbuf, proc_e, (vir_bytes) vsp,
+ (phys_bytes)frame_len)) != OK) {
+ printf("VFS: datacopy failed (%d) trying to copy to %lu\n", r, vsp);
+ goto pm_execfinal;
+ }
+
+ if (r != OK) goto pm_execfinal;
+ clo_exec(rfp);
+
+ if (execi.allow_setuid) {
+ rfp->fp_effuid = execi.new_uid;
+ rfp->fp_effgid = execi.new_gid;
+ }
+
+pm_execfinal:
+ if (vp != NULL) {
+ unlock_vnode(vp);
+ put_vnode(vp);
+ }
+ unlock_exec();
+ return(r);
+}
+
+/*===========================================================================*
+ * load_aout *
+ *===========================================================================*/
+PRIVATE int load_aout(struct exec_info *execi)
+{
+ int r;
+ struct vnode *vp;
+ int proc_e;
+ off_t off;
+ int hdrlen;
+ int sep_id;
+ vir_bytes text_bytes, data_bytes, bss_bytes;
+ phys_bytes tot_bytes; /* total space for program, including gap */
+
+ assert(execi != NULL);
+ assert(execi->hdr != NULL);
+ assert(execi->vp != NULL);
+
+ proc_e = execi->proc_e;
+ vp = execi->vp;
+
+ /* Read the file header and extract the segment sizes. */
+ r = read_header_aout(execi->hdr, execi->vp->v_size, &sep_id,
+ &text_bytes, &data_bytes, &bss_bytes,
+ &tot_bytes, &execi->pc, &hdrlen);
+ if (r != OK) return(r);
+
+ r = exec_newmem(proc_e, 0 /* text_addr */, text_bytes,
+ 0 /* data_addr */, data_bytes + bss_bytes, tot_bytes,
+ execi->frame_len, sep_id, 0 /* is_elf */, vp->v_dev, vp->v_inode_nr,
+ execi->sb.st_ctime,
+ execi->progname, execi->new_uid, execi->new_gid,
+ &execi->stack_top, &execi->load_text, &execi->allow_setuid);
+
+ if (r != OK) {
+ printf("VFS: load_aout: exec_newmem failed: %d\n", r);
+ return(r);
+ }
+
+ off = hdrlen;
+
+ /* Read in text and data segments. */
+ if (execi->load_text)
+ r = read_seg(vp, off, proc_e, T, 0, text_bytes);
+ off += text_bytes;
+ if (r == OK)
+ r = read_seg(vp, off, proc_e, D, 0, data_bytes);
+
+ return(r);
+}
+
+/*===========================================================================*
+ * load_elf *
+ *===========================================================================*/
+PRIVATE int load_elf(struct exec_info *execi)
+{
+ int r;
+ struct vnode *vp;
+ int proc_e;
+ phys_bytes tot_bytes; /* total space for program, including gap */
+ vir_bytes text_vaddr, text_paddr, text_filebytes, text_membytes;
+ vir_bytes data_vaddr, data_paddr, data_filebytes, data_membytes;
+ off_t text_offset, data_offset;
+ int sep_id, is_elf;
+
+ assert(execi != NULL);
+ assert(execi->hdr != NULL);
+ assert(execi->vp != NULL);
+
+ proc_e = execi->proc_e;
+ vp = execi->vp;
+
+ /* Read the file header and extract the segment sizes. */
+ r = read_header_elf(execi->hdr, &text_vaddr, &text_paddr,
+ &text_filebytes, &text_membytes,
+ &data_vaddr, &data_paddr,
+ &data_filebytes, &data_membytes,
+ &execi->pc, &text_offset, &data_offset);
+ if (r != OK) return(r);
+
+ sep_id = 0;
+ is_elf = 1;
+ tot_bytes = 0; /* Use default stack size */
+ r = exec_newmem(proc_e,
+ trunc_page(text_vaddr), text_membytes,
+ trunc_page(data_vaddr), data_membytes,
+ tot_bytes, execi->frame_len, sep_id, is_elf,
+ vp->v_dev, vp->v_inode_nr, execi->sb.st_ctime,
+ execi->progname, execi->new_uid, execi->new_gid,
+ &execi->stack_top, &execi->load_text, &execi->allow_setuid);
+
+ if (r != OK) {
+ printf("VFS: load_elf: exec_newmem failed: %d\n", r);
+ return(r);
+ }
+
+ /* Read in text and data segments. */
+ if (execi->load_text)
+ r = read_seg(vp, text_offset, proc_e, T, text_vaddr, text_filebytes);
+
+ if (r == OK)
+ r = read_seg(vp, data_offset, proc_e, D, data_vaddr, data_filebytes);
+
+ return(r);
+}
+
+/*===========================================================================*
+ * exec_newmem *
+ *===========================================================================*/
+PRIVATE int exec_newmem(
+ int proc_e,
+ vir_bytes text_addr,
+ vir_bytes text_bytes,
+ vir_bytes data_addr,
+ vir_bytes data_bytes,
+ vir_bytes tot_bytes,
+ vir_bytes frame_len,
+ int sep_id,
+ int is_elf,
+ dev_t st_dev,
+ ino_t st_ino,
+ time_t ctime,
+ char *progname,
+ int new_uid,
+ int new_gid,
+ vir_bytes *stack_topp,
+ int *load_textp,
+ int *allow_setuidp
+)
+{
+/* Allocate a new memory map for a process that tries to exec */
+ int r;
+ struct exec_newmem e;
+ message m;
+
+ e.text_addr = text_addr;
+ e.text_bytes = text_bytes;
+ e.data_addr = data_addr;
+ e.data_bytes = data_bytes;
+ e.tot_bytes = tot_bytes;
+ e.args_bytes = frame_len;
+ e.sep_id = sep_id;
+ e.is_elf = is_elf;
+ e.st_dev = st_dev;
+ e.st_ino = st_ino;
+ e.enst_ctime = ctime;
+ e.new_uid = new_uid;
+ e.new_gid = new_gid;
+ strncpy(e.progname, progname, sizeof(e.progname)-1);
+ e.progname[sizeof(e.progname)-1] = '\0';
+
+ m.m_type = EXEC_NEWMEM;
+ m.EXC_NM_PROC = proc_e;
+ m.EXC_NM_PTR = (char *)&e;
+ if ((r = sendrec(PM_PROC_NR, &m)) != OK) return(r);
+
+ *stack_topp = m.m1_i1;
+ *load_textp = !!(m.m1_i2 & EXC_NM_RF_LOAD_TEXT);
+ *allow_setuidp = !!(m.m1_i2 & EXC_NM_RF_ALLOW_SETUID);
+
+ return(m.m_type);
+}
+
+/*===========================================================================*
+ * is_script *
+ *===========================================================================*/
+PRIVATE int is_script(const char *exec_hdr, size_t exec_len)
+{
+/* Is Interpreted script? */
+ assert(exec_hdr != NULL);
+
+ return(exec_hdr[0] == '#' && exec_hdr[1] == '!' && exec_len >= 2);
+}
+
+/*===========================================================================*
+ * patch_stack *
+ *===========================================================================*/
+PRIVATE int patch_stack(vp, stack, stk_bytes, path)
+struct vnode *vp; /* pointer for open script file */
+char stack[ARG_MAX]; /* pointer to stack image within VFS */
+vir_bytes *stk_bytes; /* size of initial stack */
+char path[PATH_MAX+1]; /* path to script file */
+{
+/* Patch the argument vector to include the path name of the script to be
+ * interpreted, and all strings on the #! line. Returns the path name of
+ * the interpreter.
+ */
+ enum { INSERT=FALSE, REPLACE=TRUE };
+ int n, r;
+ off_t pos;
+ char *sp, *interp = NULL;
+ u64_t new_pos;
+ unsigned int cum_io;
+ char buf[_MAX_BLOCK_SIZE];
+
+ /* Make 'path' the new argv[0]. */
+ if (!insert_arg(stack, stk_bytes, path, REPLACE)) return(ENOMEM);
+
+ pos = 0; /* Read from the start of the file */
+
+ /* Issue request */
+ r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, cvul64(pos), READING,
+ VFS_PROC_NR, buf, _MAX_BLOCK_SIZE, &new_pos, &cum_io);
+ if (r != OK) return(r);
+
+ n = vp->v_size;
+ if (n > _MAX_BLOCK_SIZE)
+ n = _MAX_BLOCK_SIZE;
+ if (n < 2) return ENOEXEC;
+
+ sp = &(buf[2]); /* just behind the #! */
+ n -= 2;
+ if (n > PATH_MAX) n = PATH_MAX;
+
+ /* Use the 'path' variable for temporary storage */
+ memcpy(path, sp, n);
+
+ if ((sp = memchr(path, '\n', n)) == NULL) /* must be a proper line */
+ return(ENOEXEC);
+
+ /* Move sp backwards through script[], prepending each string to stack. */
+ for (;;) {
+ /* skip spaces behind argument. */
+ while (sp > path && (*--sp == ' ' || *sp == '\t')) {}
+ if (sp == path) break;
+
+ sp[1] = 0;
+ /* Move to the start of the argument. */
+ while (sp > path && sp[-1] != ' ' && sp[-1] != '\t') --sp;
+
+ interp = sp;
+ if (!insert_arg(stack, stk_bytes, sp, INSERT)) {
+ printf("VFS: patch_stack: insert_arg failed\n");
+ return(ENOMEM);
+ }
+ }
+
+ /* Round *stk_bytes up to the size of a pointer for alignment contraints. */
+ *stk_bytes= ((*stk_bytes + PTRSIZE - 1) / PTRSIZE) * PTRSIZE;
+
+ if (interp != path)
+ memmove(path, interp, strlen(interp)+1);
+ return(OK);
+}
+
+/*===========================================================================*
+ * insert_arg *
+ *===========================================================================*/
+PRIVATE int insert_arg(
+char stack[ARG_MAX], /* pointer to stack image within PM */
+vir_bytes *stk_bytes, /* size of initial stack */
+char *arg, /* argument to prepend/replace as new argv[0] */
+int replace
+)
+{
+/* Patch the stack so that arg will become argv[0]. Be careful, the stack may
+ * be filled with garbage, although it normally looks like this:
+ * nargs argv[0] ... argv[nargs-1] NULL envp[0] ... NULL
+ * followed by the strings "pointed" to by the argv[i] and the envp[i]. The
+ * pointers are really offsets from the start of stack.
+ * Return true iff the operation succeeded.
+ */
+ int offset, a0, a1, old_bytes = *stk_bytes;
+
+ /* Prepending arg adds at least one string and a zero byte. */
+ offset = strlen(arg) + 1;
+
+ a0 = (int) ((char **) stack)[1]; /* argv[0] */
+ if (a0 < 4 * PTRSIZE || a0 >= old_bytes) return(FALSE);
+
+ a1 = a0; /* a1 will point to the strings to be moved */
+ if (replace) {
+ /* Move a1 to the end of argv[0][] (argv[1] if nargs > 1). */
+ do {
+ if (a1 == old_bytes) return(FALSE);
+ --offset;
+ } while (stack[a1++] != 0);
+ } else {
+ offset += PTRSIZE; /* new argv[0] needs new pointer in argv[] */
+ a0 += PTRSIZE; /* location of new argv[0][]. */
+ }
+
+ /* stack will grow by offset bytes (or shrink by -offset bytes) */
+ if ((*stk_bytes += offset) > ARG_MAX) return(FALSE);
+
+ /* Reposition the strings by offset bytes */
+ memmove(stack + a1 + offset, stack + a1, old_bytes - a1);
+
+ strcpy(stack + a0, arg); /* Put arg in the new space. */
+
+ if (!replace) {
+ /* Make space for a new argv[0]. */
+ memmove(stack + 2 * PTRSIZE, stack + 1 * PTRSIZE, a0 - 2 * PTRSIZE);
+
+ ((char **) stack)[0]++; /* nargs++; */
+ }
+ /* Now patch up argv[] and envp[] by offset. */
+ patch_ptr(stack, (vir_bytes) offset);
+ ((char **) stack)[1] = (char *) a0; /* set argv[0] correctly */
+ return(TRUE);
+}
+
+
+/*===========================================================================*
+ * patch_ptr *
+ *===========================================================================*/
+PRIVATE void patch_ptr(
+char stack[ARG_MAX], /* pointer to stack image within PM */
+vir_bytes base /* virtual address of stack base inside user */
+)
+{
+/* When doing an exec(name, argv, envp) call, the user builds up a stack
+ * image with arg and env pointers relative to the start of the stack. Now
+ * these pointers must be relocated, since the stack is not positioned at
+ * address 0 in the user's address space.
+ */
+
+ char **ap, flag;
+ vir_bytes v;
+
+ flag = 0; /* counts number of 0-pointers seen */
+ ap = (char **) stack; /* points initially to 'nargs' */
+ ap++; /* now points to argv[0] */
+ while (flag < 2) {
+ if (ap >= (char **) &stack[ARG_MAX]) return; /* too bad */
+ if (*ap != NULL) {
+ v = (vir_bytes) *ap; /* v is relative pointer */
+ v += base; /* relocate it */
+ *ap = (char *) v; /* put it back */
+ } else {
+ flag++;
+ }
+ ap++;
+ }
+}
+
+/*===========================================================================*
+ * read_seg *
+ *===========================================================================*/
+PRIVATE int read_seg(
+struct vnode *vp, /* inode descriptor to read from */
+off_t off, /* offset in file */
+int proc_e, /* process number (endpoint) */
+int seg, /* T, D, or S */
+vir_bytes seg_addr, /* address to load segment */
+phys_bytes seg_bytes /* how much is to be transferred? */
+)
+{
+/*
+ * The byte count on read is usually smaller than the segment count, because
+ * a segment is padded out to a click multiple, and the data segment is only
+ * partially initialized.
+ */
+ int r;
+ unsigned n, o;
+ u64_t new_pos;
+ unsigned int cum_io;
+ static char buf[128 * 1024];
+
+ assert((seg == T)||(seg == D));
+
+ /* Make sure that the file is big enough */
+ if (vp->v_size < off+seg_bytes) return(EIO);
+
+ if (seg == T) {
+ /* We have to use a copy loop until safecopies support segments */
+ o = 0;
+ while (o < seg_bytes) {
+ n = seg_bytes - o;
+ if (n > sizeof(buf))
+ n = sizeof(buf);
+
+ if ((r = req_readwrite(vp->v_fs_e,vp->v_inode_nr,cvul64(off+o),
+ READING, VFS_PROC_NR, buf,
+ n, &new_pos, &cum_io)) != OK) {
+ printf("VFS: read_seg: req_readwrite failed (text)\n");
+ return(r);
+ }
+
+ if (cum_io != n) {
+ printf(
+ "VFSread_seg segment has not been read properly by exec() \n");
+ return(EIO);
+ }
+
+ if ((r = sys_vircopy(VFS_PROC_NR, D, (vir_bytes)buf, proc_e,
+ seg, seg_addr + o, n)) != OK) {
+ printf("VFS: read_seg: copy failed (text)\n");
+ return(r);
+ }
+
+ o += n;
+ }
+ return(OK);
+ } else if (seg == D) {
+
+ if ((r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, cvul64(off), READING,
+ proc_e, (char*)seg_addr, seg_bytes,
+ &new_pos, &cum_io)) != OK) {
+ printf("VFS: read_seg: req_readwrite failed (data)\n");
+ return(r);
+ }
+
+ if (r == OK && cum_io != seg_bytes)
+ printf("VFS: read_seg segment has not been read properly by exec()\n");
+
+ return(r);
+ }
+
+ return(OK);
+}
+
+
+/*===========================================================================*
+ * clo_exec *
+ *===========================================================================*/
+PRIVATE void clo_exec(struct fproc *rfp)
+{
+/* Files can be marked with the FD_CLOEXEC bit (in fp->fp_cloexec).
+ */
+ int i;
+
+ /* Check the file desriptors one by one for presence of FD_CLOEXEC. */
+ for (i = 0; i < OPEN_MAX; i++)
+ if ( FD_ISSET(i, &rfp->fp_cloexec_set))
+ (void) close_fd(rfp, i);
+}
+
+/*===========================================================================*
+ * map_header *
+ *===========================================================================*/
+PRIVATE int map_header(char **exec_hdr, const struct vnode *vp)
+{
+ int r;
+ u64_t new_pos;
+ unsigned int cum_io;
+ off_t pos;
+
+ pos = 0; /* Read from the start of the file */
+
+ r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, cvul64(pos), READING,
+ VFS_PROC_NR, hdr, MIN(vp->v_size, PAGE_SIZE),
+ &new_pos, &cum_io);
+ if (r != OK) {
+ printf("VFS: exec: map_header: req_readwrite failed\n");
+ return(r);
+ }
+
+ *exec_hdr = hdr;
+ return(OK);
+}
--- /dev/null
+#ifndef _VFS_EXEC_H_
+#define _VFS_EXEC_H_ 1
+
+struct exec_info {
+ int proc_e; /* Process endpoint */
+ char *hdr; /* Exec file's header */
+ vir_bytes pc; /* Entry point of exec file */
+ vir_bytes stack_top; /* Top of the stack */
+ vir_bytes frame_len; /* Stack size */
+ uid_t new_uid; /* Process UID after exec */
+ gid_t new_gid; /* Process GID after exec */
+ int load_text; /* Load text section? */
+ int allow_setuid; /* Allow setuid execution? */
+ struct vnode *vp; /* Exec file's vnode */
+ struct stat sb; /* Exec file's stat structure */
+ char progname[PROC_NAME_LEN]; /* Program name */
+};
+
+#endif /* !_VFS_EXEC_H_ */
--- /dev/null
+#ifndef __VFS_FILE_H__
+#define __VFS_FILE_H__
+
+/* This is the filp table. It is an intermediary between file descriptors and
+ * inodes. A slot is free if filp_count == 0.
+ */
+
+EXTERN struct filp {
+ mode_t filp_mode; /* RW bits, telling how file is opened */
+ int filp_flags; /* flags from open and fcntl */
+ int filp_state; /* state for crash recovery */
+ int filp_count; /* how many file descriptors share this slot?*/
+ struct vnode *filp_vno; /* vnode belonging to this file */
+ u64_t filp_pos; /* file position */
+ mutex_t filp_lock; /* lock to gain exclusive access */
+ struct fproc *filp_softlock; /* if not NULL; this filp didn't lock the
+ * vnode. Another filp already holds a lock
+ * for this thread */
+
+ /* the following fields are for select() and are owned by the generic
+ * select() code (i.e., fd-type-specific select() code can't touch these).
+ */
+ int filp_selectors; /* select()ing processes blocking on this fd */
+ int filp_select_ops; /* interested in these SEL_* operations */
+ int filp_select_flags; /* Select flags for the filp */
+
+ /* following are for fd-type-specific select() */
+ int filp_pipe_select_ops;
+} filp[NR_FILPS];
+
+#define FILP_CLOSED 0 /* filp_mode: associated device closed */
+
+#define FS_NORMAL 0 /* file descriptor can be used normally */
+#define FS_NEEDS_REOPEN 1 /* file descriptor needs to be re-opened */
+
+#define FSF_UPDATE 001 /* The driver should be informed about new
+ * state.
+ */
+#define FSF_BUSY 002 /* Select operation sent to driver but no
+ * reply yet.
+ */
+#define FSF_RD_BLOCK 010 /* Read request is blocking, the driver should
+ * keep state.
+ */
+#define FSF_WR_BLOCK 020 /* Write request is blocking */
+#define FSF_ERR_BLOCK 040 /* Exception request is blocking */
+#define FSF_BLOCKED 070
+#endif
--- /dev/null
+/* This file contains the procedures that manipulate file descriptors.
+ *
+ * The entry points into this file are
+ * get_fd: look for free file descriptor and free filp slots
+ * get_filp: look up the filp entry for a given file descriptor
+ * find_filp: find a filp slot that points to a given vnode
+ * inval_filp: invalidate a filp and associated fd's, only let close()
+ * happen on it
+ * do_verify_fd: verify whether the given file descriptor is valid for
+ * the given endpoint.
+ * do_set_filp: marks a filp as in-flight.
+ * do_copy_filp: copies a filp to another endpoint.
+ * do_put_filp: marks a filp as not in-flight anymore.
+ * do_cancel_fd: cancel the transaction when something goes wrong for
+ * the receiver.
+ */
+
+#include <sys/select.h>
+#include <minix/callnr.h>
+#include <minix/u64.h>
+#include <assert.h>
+#include "fs.h"
+#include "file.h"
+#include "fproc.h"
+#include "vnode.h"
+
+
+FORWARD _PROTOTYPE( filp_id_t verify_fd, (endpoint_t ep, int fd) );
+
+#if LOCK_DEBUG
+/*===========================================================================*
+ * check_filp_locks *
+ *===========================================================================*/
+PUBLIC void check_filp_locks_by_me(void)
+{
+/* Check whether this thread still has filp locks held */
+ struct filp *f;
+ int r;
+
+ for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
+ r = mutex_trylock(&f->filp_lock);
+ if (r == -EDEADLK)
+ panic("Thread %d still holds filp lock on filp %p call_nr=%d\n",
+ mthread_self(), f, call_nr);
+ else if (r == 0) {
+ /* We just obtained the lock, release it */
+ mutex_unlock(&f->filp_lock);
+ }
+ }
+}
+#endif
+
+/*===========================================================================*
+ * check_filp_locks *
+ *===========================================================================*/
+PUBLIC void check_filp_locks(void)
+{
+ struct filp *f;
+ int r, count = 0;
+
+ for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
+ r = mutex_trylock(&f->filp_lock);
+ if (r == -EBUSY) {
+ /* Mutex is still locked */
+ count++;
+ } else if (r == 0) {
+ /* We just obtained a lock, don't want it */
+ mutex_unlock(&f->filp_lock);
+ } else
+ panic("filp_lock weird state");
+ }
+ if (count) panic("locked filps");
+#if 0
+ else printf("check_filp_locks OK\n");
+#endif
+}
+
+/*===========================================================================*
+ * init_filps *
+ *===========================================================================*/
+PUBLIC void init_filps(void)
+{
+/* Initialize filps */
+ struct filp *f;
+
+ for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
+ mutex_init(&f->filp_lock, NULL);
+ }
+
+}
+
+/*===========================================================================*
+ * get_fd *
+ *===========================================================================*/
+PUBLIC int get_fd(int start, mode_t bits, int *k, struct filp **fpt)
+{
+/* Look for a free file descriptor and a free filp slot. Fill in the mode word
+ * in the latter, but don't claim either one yet, since the open() or creat()
+ * may yet fail.
+ */
+
+ register struct filp *f;
+ register int i;
+
+ /* Search the fproc fp_filp table for a free file descriptor. */
+ for (i = start; i < OPEN_MAX; i++) {
+ if (fp->fp_filp[i] == NULL && !FD_ISSET(i, &fp->fp_filp_inuse)) {
+ /* A file descriptor has been located. */
+ *k = i;
+ break;
+ }
+ }
+
+ /* Check to see if a file descriptor has been found. */
+ if (i >= OPEN_MAX) return(EMFILE);
+
+ /* If we don't care about a filp, return now */
+ if (fpt == NULL) return(OK);
+
+ /* Now that a file descriptor has been found, look for a free filp slot. */
+ for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
+ assert(f->filp_count >= 0);
+ if (f->filp_count == 0 && mutex_trylock(&f->filp_lock) == 0) {
+ if (verbose) printf("get_fd: locking filp=%p\n", f);
+ f->filp_mode = bits;
+ f->filp_pos = cvu64(0);
+ f->filp_selectors = 0;
+ f->filp_select_ops = 0;
+ f->filp_pipe_select_ops = 0;
+ f->filp_flags = 0;
+ f->filp_state = FS_NORMAL;
+ f->filp_select_flags = 0;
+ f->filp_softlock = NULL;
+ *fpt = f;
+ return(OK);
+ }
+ }
+
+ /* If control passes here, the filp table must be full. Report that back. */
+ return(ENFILE);
+}
+
+
+/*===========================================================================*
+ * get_filp *
+ *===========================================================================*/
+PUBLIC struct filp *get_filp(fild, locktype)
+int fild; /* file descriptor */
+tll_access_t locktype;
+{
+/* See if 'fild' refers to a valid file descr. If so, return its filp ptr. */
+
+ return get_filp2(fp, fild, locktype);
+}
+
+
+/*===========================================================================*
+ * get_filp2 *
+ *===========================================================================*/
+PUBLIC struct filp *get_filp2(rfp, fild, locktype)
+register struct fproc *rfp;
+int fild; /* file descriptor */
+tll_access_t locktype;
+{
+/* See if 'fild' refers to a valid file descr. If so, return its filp ptr. */
+ struct filp *filp;
+
+ err_code = EBADF;
+ if (fild < 0 || fild >= OPEN_MAX ) return(NULL);
+ if (rfp->fp_filp[fild] == NULL && FD_ISSET(fild, &rfp->fp_filp_inuse))
+ err_code = EIO; /* The filedes is not there, but is not closed either.
+ */
+ if ((filp = rfp->fp_filp[fild]) != NULL) lock_filp(filp, locktype);
+
+ return(filp); /* may also be NULL */
+}
+
+
+/*===========================================================================*
+ * find_filp *
+ *===========================================================================*/
+PUBLIC struct filp *find_filp(struct vnode *vp, mode_t bits)
+{
+/* Find a filp slot that refers to the vnode 'vp' in a way as described
+ * by the mode bit 'bits'. Used for determining whether somebody is still
+ * interested in either end of a pipe. Also used when opening a FIFO to
+ * find partners to share a filp field with (to shared the file position).
+ * Like 'get_fd' it performs its job by linear search through the filp table.
+ */
+
+ struct filp *f;
+
+ for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
+ if (f->filp_count != 0 && f->filp_vno == vp && (f->filp_mode & bits)) {
+ return(f);
+ }
+ }
+
+ /* If control passes here, the filp wasn't there. Report that back. */
+ return(NULL);
+}
+
+/*===========================================================================*
+ * invalidate *
+ *===========================================================================*/
+PUBLIC int invalidate(struct filp *fp)
+{
+/* Invalidate filp. fp_filp_inuse is not cleared, so filp can't be reused
+ until it is closed first. */
+
+ int f, fd, n = 0;
+ for(f = 0; f < NR_PROCS; f++) {
+ if(fproc[f].fp_pid == PID_FREE) continue;
+ for(fd = 0; fd < OPEN_MAX; fd++) {
+ if(fproc[f].fp_filp[fd] && fproc[f].fp_filp[fd] == fp) {
+ fproc[f].fp_filp[fd] = NULL;
+ n++;
+ }
+ }
+ }
+
+ return(n); /* Report back how often this filp has been invalidated. */
+}
+
+/*===========================================================================*
+ * lock_filp *
+ *===========================================================================*/
+PUBLIC void lock_filp(filp, locktype)
+struct filp *filp;
+tll_access_t locktype;
+{
+ message org_m_in;
+ struct fproc *org_fp;
+ struct worker_thread *org_self;
+ struct vnode *vp;
+
+ assert(filp->filp_count > 0);
+ vp = filp->filp_vno;
+ assert(vp != NULL);
+
+ if (verbose)
+ printf("lock_filp: filp=%p locking vnode %p with locktype %d\n", filp,
+ vp, locktype);
+
+ /* Lock vnode only if we haven't already locked it. If already locked by us,
+ * we're allowed to have one additional 'soft' lock. */
+ if (tll_locked_by_me(&vp->v_lock)) {
+ assert(filp->filp_softlock == NULL);
+ filp->filp_softlock = fp;
+ } else {
+ lock_vnode(vp, locktype);
+ }
+
+ assert(vp->v_ref_count > 0); /* vnode still in use? */
+ assert(filp->filp_vno == vp); /* vnode still what we think it is? */
+ assert(filp->filp_count > 0); /* filp still in use? */
+
+ /* First try to get filp lock right off the bat */
+ if (mutex_trylock(&filp->filp_lock) != 0) {
+
+ /* Already in use, let's wait for our turn */
+ org_m_in = m_in;
+ org_fp = fp;
+ org_self = self;
+ assert(mutex_lock(&filp->filp_lock) == 0);
+ m_in = org_m_in;
+ fp = org_fp;
+ self = org_self;
+ }
+
+ assert(filp->filp_count > 0); /* Yet again; filp still in use? */
+}
+
+/*===========================================================================*
+ * unlock_filp *
+ *===========================================================================*/
+PUBLIC void unlock_filp(filp)
+struct filp *filp;
+{
+ /* If this filp holds a soft lock on the vnode, we must be the owner */
+ if (filp->filp_softlock != NULL)
+ assert(filp->filp_softlock == fp);
+
+ if (filp->filp_count > 0) {
+ /* Only unlock vnode if filp is still in use */
+
+ /* and if we don't hold a soft lock */
+ if (filp->filp_softlock == NULL) {
+ assert(tll_islocked(&(filp->filp_vno->v_lock)));
+ unlock_vnode(filp->filp_vno);
+ }
+ }
+
+ filp->filp_softlock = NULL;
+ assert(mutex_unlock(&filp->filp_lock) == 0);
+}
+
+/*===========================================================================*
+ * unlock_filps *
+ *===========================================================================*/
+PUBLIC void unlock_filps(filp1, filp2)
+struct filp *filp1;
+struct filp *filp2;
+{
+/* Unlock two filps that are tied to the same vnode. As a thread can lock a
+ * vnode only once, unlocking the vnode twice would result in an error. */
+
+ /* No NULL pointers and not equal */
+ assert(filp1);
+ assert(filp2);
+ assert(filp1 != filp2);
+
+ /* Must be tied to the same vnode and not NULL */
+ assert(filp1->filp_vno == filp2->filp_vno);
+ assert(filp1->filp_vno != NULL);
+
+ if (filp1->filp_count > 0 && filp2->filp_count > 0) {
+ /* Only unlock vnode if filps are still in use */
+ unlock_vnode(filp1->filp_vno);
+ }
+
+ filp1->filp_softlock = NULL;
+ filp2->filp_softlock = NULL;
+ assert(mutex_unlock(&filp2->filp_lock) == 0);
+ assert(mutex_unlock(&filp1->filp_lock) == 0);
+}
+
+/*===========================================================================*
+ * verify_fd *
+ *===========================================================================*/
+PRIVATE filp_id_t verify_fd(ep, fd)
+endpoint_t ep;
+int fd;
+{
+/* Verify whether the file descriptor 'fd' is valid for the endpoint 'ep'. When
+ * the file descriptor is valid, verify_fd returns a pointer to that filp, else
+ * it returns NULL.
+ */
+ int slot;
+ struct filp *rfilp;
+
+ if (isokendpt(ep, &slot) != OK)
+ return(NULL);
+
+ rfilp = get_filp2(&fproc[slot], fd, VNODE_READ);
+
+ return(rfilp);
+}
+
+/*===========================================================================*
+ * do_verify_fd *
+ *===========================================================================*/
+PUBLIC int do_verify_fd(void)
+{
+ struct filp *rfilp;
+ rfilp = (struct filp *) verify_fd(m_in.USER_ENDPT, m_in.COUNT);
+ m_out.ADDRESS = (void *) rfilp;
+ if (rfilp != NULL) unlock_filp(rfilp);
+ return (rfilp != NULL) ? OK : EINVAL;
+}
+
+/*===========================================================================*
+ * set_filp *
+ *===========================================================================*/
+PUBLIC int set_filp(sfilp)
+filp_id_t sfilp;
+{
+ if (sfilp == NULL) return(EINVAL);
+
+ lock_filp(sfilp, VNODE_READ);
+ sfilp->filp_count++;
+ unlock_filp(sfilp);
+
+ return(OK);
+}
+
+/*===========================================================================*
+ * do_set_filp *
+ *===========================================================================*/
+PUBLIC int do_set_filp(void)
+{
+ return set_filp((filp_id_t) m_in.ADDRESS);
+}
+
+/*===========================================================================*
+ * copy_filp *
+ *===========================================================================*/
+PUBLIC int copy_filp(to_ep, cfilp)
+endpoint_t to_ep;
+filp_id_t cfilp;
+{
+ int fd;
+ int slot;
+ struct fproc *rfp;
+
+ if (isokendpt(to_ep, &slot) != OK) return(EINVAL);
+ rfp = &fproc[slot];
+
+ /* Find an open slot in fp_filp */
+ for (fd = 0; fd < OPEN_MAX; fd++) {
+ if (rfp->fp_filp[fd] == NULL &&
+ !FD_ISSET(fd, &rfp->fp_filp_inuse)) {
+
+ /* Found a free slot, add descriptor */
+ FD_SET(fd, &rfp->fp_filp_inuse);
+ rfp->fp_filp[fd] = cfilp;
+ rfp->fp_filp[fd]->filp_count++;
+ return(fd);
+ }
+ }
+
+ /* File descriptor table is full */
+ return(EMFILE);
+}
+
+/*===========================================================================*
+ * do_copy_filp *
+ *===========================================================================*/
+PUBLIC int do_copy_filp(void)
+{
+ return copy_filp(m_in.USER_ENDPT, (filp_id_t) m_in.ADDRESS);
+}
+
+/*===========================================================================*
+ * put_filp *
+ *===========================================================================*/
+PUBLIC int put_filp(pfilp)
+filp_id_t pfilp;
+{
+ if (pfilp == NULL) {
+ return EINVAL;
+ } else {
+ lock_filp(pfilp, VNODE_OPCL);
+ close_filp(pfilp);
+ return(OK);
+ }
+}
+
+/*===========================================================================*
+ * do_put_filp *
+ *===========================================================================*/
+PUBLIC int do_put_filp(void)
+{
+ return put_filp((filp_id_t) m_in.ADDRESS);
+}
+
+/*===========================================================================*
+ * cancel_fd *
+ *===========================================================================*/
+PUBLIC int cancel_fd(ep, fd)
+endpoint_t ep;
+int fd;
+{
+ int slot;
+ struct fproc *rfp;
+ struct filp *rfilp;
+
+ if (isokendpt(ep, &slot) != OK) return(EINVAL);
+ rfp = &fproc[slot];
+
+ /* Check that the input 'fd' is valid */
+ rfilp = (struct filp *) verify_fd(ep, fd);
+ if (rfilp != NULL) {
+ /* Found a valid descriptor, remove it */
+ FD_CLR(fd, &rfp->fp_filp_inuse);
+ if (rfp->fp_filp[fd]->filp_count == 0) {
+ unlock_filp(rfilp);
+ printf("VFS: filp_count for slot %d fd %d already zero", slot,
+ fd);
+ return(EINVAL);
+ }
+ rfp->fp_filp[fd]->filp_count--;
+ rfp->fp_filp[fd] = NULL;
+ unlock_filp(rfilp);
+ return(fd);
+ }
+
+ /* File descriptor is not valid for the endpoint. */
+ return(EINVAL);
+}
+
+/*===========================================================================*
+ * do_cancel_fd *
+ *===========================================================================*/
+PUBLIC int do_cancel_fd(void)
+{
+ return cancel_fd(m_in.USER_ENDPT, m_in.COUNT);
+}
+
+/*===========================================================================*
+ * close_filp *
+ *===========================================================================*/
+PUBLIC void close_filp(f)
+struct filp *f;
+{
+/* Close a file. Will also unlock filp when done */
+
+ int mode_word, rw;
+ dev_t dev;
+ struct vnode *vp;
+
+ /* Must be locked */
+ assert(mutex_trylock(&f->filp_lock) == -EDEADLK);
+ assert(tll_islocked(&f->filp_vno->v_lock));
+
+ vp = f->filp_vno;
+
+ if (f->filp_count - 1 == 0 && f->filp_mode != FILP_CLOSED) {
+ /* Check to see if the file is special. */
+ mode_word = vp->v_mode & I_TYPE;
+ if (mode_word == I_CHAR_SPECIAL || mode_word == I_BLOCK_SPECIAL) {
+ dev = (dev_t) vp->v_sdev;
+ if (mode_word == I_BLOCK_SPECIAL) {
+ lock_bsf();
+ if (vp->v_bfs_e == ROOT_FS_E) {
+ /* Invalidate the cache unless the special is
+ * mounted. Assume that the root filesystem's
+ * is open only for fsck.
+ */
+ req_flush(vp->v_bfs_e, dev);
+ }
+ unlock_bsf();
+ }
+ /* Do any special processing on device close. */
+ (void) dev_close(dev, f-filp); /* Ignore any errors, even
+ * SUSPEND. */
+ f->filp_mode = FILP_CLOSED;
+ }
+ }
+
+ /* If the inode being closed is a pipe, release everyone hanging on it. */
+ if (vp->v_pipe == I_PIPE) {
+ rw = (f->filp_mode & R_BIT ? WRITE : READ);
+ release(vp, rw, NR_PROCS);
+ }
+
+ /* If a write has been done, the inode is already marked as DIRTY. */
+ if (--f->filp_count == 0) {
+ if (vp->v_pipe == I_PIPE) {
+ /* Last reader or writer is going. Tell PFS about latest
+ * pipe size.
+ */
+ truncate_vnode(vp, vp->v_size);
+ }
+
+ unlock_vnode(f->filp_vno);
+ put_vnode(f->filp_vno);
+ } else if (f->filp_count < 0) {
+ panic("VFS: invalid filp count: %d ino %d/%d", f->filp_count,
+ vp->v_dev, vp->v_inode_nr);
+ } else {
+ unlock_vnode(f->filp_vno);
+ }
+
+ mutex_unlock(&f->filp_lock);
+}
--- /dev/null
+#ifndef __VFS_FPROC_H__
+#define __VFS_FPROC_H__
+
+#include "threads.h"
+
+#include <sys/select.h>
+#include <minix/safecopies.h>
+
+/* This is the per-process information. A slot is reserved for each potential
+ * process. Thus NR_PROCS must be the same as in the kernel. It is not
+ * possible or even necessary to tell when a slot is free here.
+ */
+#define LOCK_DEBUG 0
+EXTERN struct fproc {
+ unsigned fp_flags;
+
+ pid_t fp_pid; /* process id */
+ endpoint_t fp_endpoint; /* kernel endpoint number of this process */
+
+ struct vnode *fp_wd; /* working directory; NULL during reboot */
+ struct vnode *fp_rd; /* root directory; NULL during reboot */
+
+ struct filp *fp_filp[OPEN_MAX];/* the file descriptor table */
+ fd_set fp_filp_inuse; /* which fd's are in use? */
+ fd_set fp_cloexec_set; /* bit map for POSIX Table 6-2 FD_CLOEXEC */
+
+ dev_t fp_tty; /* major/minor of controlling tty */
+ int fp_block_fd; /* place to save fd if rd/wr can't finish */
+ int fp_block_callnr; /* blocked call if rd/wr can't finish */
+ char *fp_buffer; /* place to save buffer if rd/wr can't finish*/
+ int fp_nbytes; /* place to save bytes if rd/wr can't finish */
+ int fp_cum_io_partial; /* partial byte count if rd/wr can't finish */
+ endpoint_t fp_task; /* which task is proc suspended on */
+ int fp_blocked_on; /* what is it blocked on */
+ endpoint_t fp_ioproc; /* proc no. in suspended-on i/o message */
+
+ cp_grant_id_t fp_grant; /* revoke this grant on unsuspend if > -1 */
+
+ uid_t fp_realuid; /* real user id */
+ uid_t fp_effuid; /* effective user id */
+ gid_t fp_realgid; /* real group id */
+ gid_t fp_effgid; /* effective group id */
+ int fp_ngroups; /* number of supplemental groups */
+ gid_t fp_sgroups[NGROUPS_MAX];/* supplemental groups */
+ mode_t fp_umask; /* mask set by umask system call */
+ message *fp_sendrec; /* request/reply to/from FS/driver */
+ mutex_t fp_lock; /* mutex to lock fproc object */
+ struct job fp_job; /* pending job */
+ thread_t fp_wtid; /* Thread ID of worker */
+#if LOCK_DEBUG
+ int fp_vp_rdlocks; /* number of read-only locks on vnodes */
+ int fp_vmnt_rdlocks; /* number of read-only locks on vmnts */
+#endif
+} fproc[NR_PROCS];
+
+/* fp_flags */
+#define FP_NOFLAGS 00
+#define FP_SUSP_REOPEN 01 /* Process is suspended until the reopens are
+ * completed (after the restart of a driver).
+ */
+#define FP_REVIVED 02 /* Indicates process is being revived */
+#define FP_SESLDR 04 /* Set if process is session leader */
+#define FP_PENDING 010 /* Set if process has pending work */
+#define FP_EXITING 020 /* Set if process is exiting */
+#define FP_PM_PENDING 040 /* Set if process has pending PM request */
+
+/* Field values. */
+#define NOT_REVIVING 0xC0FFEEE /* process is not being revived */
+#define REVIVING 0xDEEAD /* process is being revived from suspension */
+#define PID_FREE 0 /* process slot free */
+
+#endif /* __VFS_FPROC_H__ */
--- /dev/null
+#ifndef __VFS_FS_H__
+#define __VFS_FS_H__
+
+/* This is the master header for fs. It includes some other files
+ * and defines the principal constants.
+ */
+#define _POSIX_SOURCE 1 /* tell headers to include POSIX stuff */
+#define _MINIX 1 /* tell headers to include MINIX stuff */
+#define _SYSTEM 1 /* tell headers that this is the kernel */
+
+#define DO_SANITYCHECKS 0
+
+#if DO_SANITYCHECKS
+#define SANITYCHECK do { \
+ if(!check_vrefs() || !check_pipe()) { \
+ printf("VFS:%s:%d: call_nr %d who_e %d\n", \
+ __FILE__, __LINE__, call_nr, who_e); \
+ panic("sanity check failed"); \
+ } \
+} while(0)
+#else
+#define SANITYCHECK
+#endif
+
+/* The following are so basic, all the *.c files get them automatically. */
+#include <minix/config.h> /* MUST be first */
+#include <minix/ansi.h> /* MUST be second */
+#include <sys/types.h>
+#include <minix/const.h>
+#include <minix/type.h>
+#include <minix/dmap.h>
+#include <minix/ds.h>
+#include <minix/rs.h>
+
+#include <limits.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include <minix/syslib.h>
+#include <minix/sysutil.h>
+#include <minix/timers.h>
+
+#include "const.h"
+#include "dmap.h"
+#include "proto.h"
+#include "threads.h"
+#include "glo.h"
+#include "comm.h"
+#include "vmnt.h"
+
+#endif
--- /dev/null
+/* This file handles nested counter-request calls to VFS sent by file system
+ * (FS) servers in response to VFS requests.
+ *
+ * The entry points into this file are
+ * nested_fs_call perform a nested call from a file system server
+ * nested_dev_call perform a nested call from a device driver server
+ *
+ */
+
+#include "fs.h"
+#include "fproc.h"
+#include <string.h>
+#include <assert.h>
+#include <minix/callnr.h>
+#include <minix/endpoint.h>
+#include <minix/vfsif.h>
+
+/* maximum nested call stack depth */
+#define MAX_DEPTH 1
+
+/* global variables stack */
+PRIVATE struct {
+ struct fproc *g_fp; /* pointer to caller process */
+ message g_m_in; /* request message */
+ message g_m_out; /* reply message */
+ int g_who_e; /* endpoint of caller process */
+ int g_who_p; /* slot number of caller process */
+ int g_call_nr; /* call number */
+ int g_super_user; /* is the caller root? */
+ char g_user_fullpath[PATH_MAX+1]; /* path to look up */
+} globals[MAX_DEPTH];
+
+PRIVATE int depth = 0; /* current globals stack level */
+
+#if ENABLE_SYSCALL_STATS
+EXTERN unsigned long calls_stats[NCALLS];
+#endif
+
+FORWARD _PROTOTYPE( int push_globals, (void) );
+FORWARD _PROTOTYPE( void pop_globals, (void) );
+FORWARD _PROTOTYPE( void set_globals, (message *m) );
+
+/*===========================================================================*
+ * push_globals *
+ *===========================================================================*/
+PRIVATE int push_globals()
+{
+/* Save the global variables of the current call onto the globals stack.
+ */
+
+ if (depth == MAX_DEPTH)
+ return(EPERM);
+
+ globals[depth].g_fp = fp;
+ globals[depth].g_m_in = m_in;
+ globals[depth].g_m_out = m_out;
+ globals[depth].g_super_user = super_user;
+
+ /* err_code is not used across blocking calls */
+ depth++;
+ return(OK);
+}
+
+/*===========================================================================*
+ * pop_globals *
+ *===========================================================================*/
+PRIVATE void pop_globals()
+{
+/* Restore the global variables of a call from the globals stack.
+ */
+
+ if (depth == 0)
+ panic("Popping from empty globals stack!");
+
+ depth--;
+
+ fp = globals[depth].g_fp;
+ m_in = globals[depth].g_m_in;
+ m_out = globals[depth].g_m_out;
+
+}
+
+/*===========================================================================*
+ * set_globals *
+ *===========================================================================*/
+PRIVATE void set_globals(m)
+message *m; /* request message */
+{
+/* Initialize global variables based on a request message.
+ */
+ int proc_p;
+
+ m_in = *m;
+
+ proc_p = _ENDPOINT_P(m_in.m_source);
+ fp = &fproc[proc_p];
+
+ /* the rest need not be initialized */
+}
+
+/*===========================================================================*
+ * nested_fs_call *
+ *===========================================================================*/
+PUBLIC void nested_fs_call(m)
+message *m; /* request/reply message pointer */
+{
+/* Handle a nested call from a file system server.
+ */
+ int r;
+
+ /* Save global variables of the current call */
+ if ((r = push_globals()) != OK) {
+ printf("VFS: error saving global variables in call %d from FS %d\n",
+ m->m_type, m->m_source);
+ } else {
+ /* Initialize global variables for the nested call */
+ set_globals(m);
+
+ /* Perform the nested call - only getsysinfo() is allowed right now */
+ if (call_nr == COMMON_GETSYSINFO) {
+ r = do_getsysinfo();
+ } else {
+ printf("VFS: invalid nested call %d from FS %d\n", call_nr,
+ who_e);
+
+ r = ENOSYS;
+ }
+
+ /* Store the result, and restore original global variables */
+ *m = m_out;
+
+ pop_globals();
+ }
+
+ m->m_type = r;
+}
--- /dev/null
+
+#include "fs.h"
+#include "file.h"
+#include "fproc.h"
+
+_PROTOTYPE( int gcov_flush, (cp_grant_id_t grantid, size_t size ));
+
+/*===========================================================================*
+ * do_gcov_flush *
+ *===========================================================================*/
+PUBLIC int do_gcov_flush()
+{
+/* A userland tool has requested the gcov data from another
+ * process (possibly vfs itself). Grant the target process
+ * access to the supplied buffer, and perform the call that
+ * makes the target copy its buffer to the caller (incl vfs
+ * itself).
+ */
+ struct fproc *rfp;
+ ssize_t size;
+ cp_grant_id_t grantid;
+ int r, n;
+ pid_t target;
+ message m;
+
+ size = m_in.GCOV_BUFF_SZ;
+ target = m_in.GCOV_PID;
+
+ /* If the wrong process is sent to, the system hangs; so make this root-only.
+ */
+
+ if (!super_user) return(EPERM);
+
+ /* Find target gcov process. */
+ for(n = 0; n < NR_PROCS; n++) {
+ if(fproc[n].fp_endpoint != NONE && fproc[n].fp_pid == target)
+ break;
+ }
+ if(n >= NR_PROCS) {
+ printf("VFS: gcov process %d not found\n", target);
+ return(ESRCH);
+ }
+ rfp = &fproc[n];
+
+ /* Grant target process to requestor's buffer. */
+ if ((grantid = cpf_grant_magic(rfp->fp_endpoint, who_e,
+ (vir_bytes) m_in.GCOV_BUFF_P, size,
+ CPF_WRITE)) < 0) {
+ printf("VFS: gcov_flush: grant failed\n");
+ return(ENOMEM);
+ }
+
+ if(rfp->fp_endpoint == VFS_PROC_NR) {
+ /* Request is for VFS itself. */
+ r = gcov_flush(grantid, size);
+ } else {
+ /* Perform generic GCOV request. */
+ m.GCOV_GRANT = grantid;
+ m.GCOV_BUFF_SZ = size;
+ r = _taskcall(rfp->fp_endpoint, COMMON_REQ_GCOV_DATA, &m);
+ }
+
+ cpf_revoke(grantid);
+
+ return(r);
+}
--- /dev/null
+#ifndef __VFS_GLO_H__
+#define __VFS_GLO_H__
+
+/* EXTERN should be extern except for the table file */
+#ifdef _TABLE
+#undef EXTERN
+#define EXTERN
+#endif
+
+/* File System global variables */
+EXTERN struct fproc *fp; /* pointer to caller's fproc struct */
+EXTERN int susp_count; /* number of procs suspended on pipe */
+EXTERN int nr_locks; /* number of locks currently in place */
+EXTERN int reviving; /* number of pipe processes to be revived */
+EXTERN int pending;
+EXTERN int sending;
+
+EXTERN dev_t ROOT_DEV; /* device number of the root device */
+EXTERN int ROOT_FS_E; /* kernel endpoint of the root FS proc */
+EXTERN u32_t system_hz; /* system clock frequency. */
+
+/* The parameters of the call are kept here. */
+EXTERN message m_in; /* the input message itself */
+EXTERN message m_out; /* the output message used for reply */
+# define who_p ((int) (fp - fproc))
+# define isokslot(p) (p >= 0 && \
+ p < (int)(sizeof(fproc) / sizeof(struct fproc)))
+#if 0
+# define who_e (isokslot(who_p) ? fp->fp_endpoint : m_in.m_source)
+#else
+# define who_e (isokslot(who_p) && fp->fp_endpoint != NONE ? \
+ fp->fp_endpoint : m_in.m_source)
+#endif
+# define call_nr (m_in.m_type)
+# define super_user (fp->fp_effuid == SU_UID ? 1 : 0)
+EXTERN struct worker_thread *self;
+EXTERN endpoint_t receive_from;/* endpoint with pending reply */
+EXTERN int force_sync; /* toggle forced synchronous communication */
+EXTERN int verbose;
+EXTERN int deadlock_resolving;
+EXTERN mutex_t exec_lock;
+EXTERN mutex_t bsf_lock;/* Global lock for access to block special files */
+EXTERN struct worker_thread workers[NR_WTHREADS];
+EXTERN struct worker_thread sys_worker;
+EXTERN struct worker_thread dl_worker;
+EXTERN char mount_label[LABEL_MAX]; /* label of file system to mount */
+
+/* The following variables are used for returning results to the caller. */
+EXTERN int err_code; /* temporary storage for error number */
+
+/* Data initialized elsewhere. */
+extern _PROTOTYPE (int (*call_vec[]), (void) ); /* sys call table */
+extern _PROTOTYPE (int (*pfs_call_vec[]), (void) ); /* pfs callback table */
+extern char dot1[2]; /* dot1 (&dot1[0]) and dot2 (&dot2[0]) have a special */
+extern char dot2[3]; /* meaning to search_dir: no access permission check. */
+
+#endif
--- /dev/null
+#ifndef __VFS_WORK_H__
+#define __VFS_WORK_H__
+
+struct job {
+ struct fproc *j_fp;
+ message j_m_in;
+ void *(*j_func)(void *arg);
+ struct job *j_next;
+};
+
+#endif
--- /dev/null
+/* This file handles the LINK and UNLINK system calls. It also deals with
+ * deallocating the storage used by a file when the last UNLINK is done to a
+ * file and the blocks must be returned to the free block pool.
+ *
+ * The entry points into this file are
+ * do_link: perform the LINK system call
+ * do_unlink: perform the UNLINK and RMDIR system calls
+ * do_rename: perform the RENAME system call
+ * do_truncate: perform the TRUNCATE system call
+ * do_ftruncate: perform the FTRUNCATE system call
+ * do_rdlink: perform the RDLNK system call
+ */
+
+#include "fs.h"
+#include <sys/stat.h>
+#include <string.h>
+#include <minix/com.h>
+#include <minix/callnr.h>
+#include <minix/vfsif.h>
+#include <dirent.h>
+#include <assert.h>
+#include "file.h"
+#include "fproc.h"
+#include "path.h"
+#include "vnode.h"
+#include "param.h"
+
+/*===========================================================================*
+ * do_link *
+ *===========================================================================*/
+PUBLIC int do_link()
+{
+/* Perform the link(name1, name2) system call. */
+ int r = OK;
+ struct vnode *vp = NULL, *dirp = NULL;
+ struct vmnt *vmp1 = NULL, *vmp2 = NULL;
+ char fullpath[PATH_MAX+1];
+ struct lookup resolve;
+
+ lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp1, &vp);
+ resolve.l_vmnt_lock = VMNT_WRITE;
+ resolve.l_vnode_lock = VNODE_READ;
+
+ /* See if 'name1' (file to be linked to) exists. */
+ if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
+ return(err_code);
+ if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
+
+ /* Does the final directory of 'name2' exist? */
+ lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp2, &dirp);
+ resolve.l_vmnt_lock = VMNT_READ;
+ resolve.l_vnode_lock = VNODE_READ;
+ if (fetch_name(m_in.name2, m_in.name2_length, M1, fullpath) != OK)
+ r = err_code;
+ else if ((dirp = last_dir(&resolve, fp)) == NULL)
+ r = err_code;
+
+ if (r != OK) {
+ unlock_vnode(vp);
+ unlock_vmnt(vmp1);
+ put_vnode(vp);
+ return(r);
+ }
+
+ /* Check for links across devices. */
+ if (vp->v_fs_e != dirp->v_fs_e)
+ r = EXDEV;
+ else
+ r = forbidden(dirp, W_BIT | X_BIT);
+
+ if (r == OK)
+ r = req_link(vp->v_fs_e, dirp->v_inode_nr, fullpath,
+ vp->v_inode_nr);
+
+ unlock_vnode(vp);
+ unlock_vnode(dirp);
+ if (vmp2 != NULL) unlock_vmnt(vmp2);
+ unlock_vmnt(vmp1);
+ put_vnode(vp);
+ put_vnode(dirp);
+ return(r);
+}
+
+
+/*===========================================================================*
+ * do_unlink *
+ *===========================================================================*/
+PUBLIC int do_unlink()
+{
+/* Perform the unlink(name) or rmdir(name) system call. The code for these two
+ * is almost the same. They differ only in some condition testing. Unlink()
+ * may be used by the superuser to do dangerous things; rmdir() may not.
+ */
+ struct vnode *dirp, *vp;
+ struct vmnt *vmp, *vmp2;
+ int r;
+ char fullpath[PATH_MAX+1];
+ struct lookup resolve;
+
+ lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &dirp);
+ resolve.l_vmnt_lock = VMNT_WRITE;
+ resolve.l_vnode_lock = VNODE_READ;
+
+ /* Get the last directory in the path. */
+ if (fetch_name(m_in.name, m_in.name_length, M3, fullpath) != OK)
+ return(err_code);
+
+ if ((dirp = last_dir(&resolve, fp)) == NULL) return(err_code);
+
+ /* Make sure that the object is a directory */
+ if ((dirp->v_mode & I_TYPE) != I_DIRECTORY) {
+ unlock_vnode(dirp);
+ unlock_vmnt(vmp);
+ put_vnode(dirp);
+ return(ENOTDIR);
+ }
+
+ /* The caller must have both search and execute permission */
+ if ((r = forbidden(dirp, X_BIT | W_BIT)) != OK) {
+ unlock_vnode(dirp);
+ unlock_vmnt(vmp);
+ put_vnode(dirp);
+ return(r);
+ }
+
+ /* Also, if the sticky bit is set, only the owner of the file or a privileged
+ user is allowed to unlink */
+ if ((dirp->v_mode & S_ISVTX) == S_ISVTX) {
+ /* Look up inode of file to unlink to retrieve owner */
+ resolve.l_flags = PATH_RET_SYMLINK;
+ resolve.l_vmp = &vmp2; /* Shouldn't actually get locked */
+ resolve.l_vmnt_lock = VMNT_READ;
+ resolve.l_vnode = &vp;
+ resolve.l_vnode_lock = VNODE_READ;
+ vp = advance(dirp, &resolve, fp);
+ assert(vmp2 == NULL);
+ if (vp != NULL) {
+ if (vp->v_uid != fp->fp_effuid && fp->fp_effuid != SU_UID)
+ r = EPERM;
+ unlock_vnode(vp);
+ put_vnode(vp);
+ } else
+ r = err_code;
+ if (r != OK) {
+ unlock_vnode(dirp);
+ unlock_vmnt(vmp);
+ put_vnode(dirp);
+ return(r);
+ }
+ }
+
+ tll_upgrade(&vmp->m_lock);
+
+ if(call_nr == UNLINK)
+ r = req_unlink(dirp->v_fs_e, dirp->v_inode_nr, fullpath);
+ else
+ r = req_rmdir(dirp->v_fs_e, dirp->v_inode_nr, fullpath);
+ unlock_vnode(dirp);
+ unlock_vmnt(vmp);
+ put_vnode(dirp);
+ return(r);
+}
+
+/*===========================================================================*
+ * do_rename *
+ *===========================================================================*/
+PUBLIC int do_rename()
+{
+/* Perform the rename(name1, name2) system call. */
+ int r = OK, r1;
+ struct vnode *old_dirp, *new_dirp = NULL, *vp;
+ struct vmnt *oldvmp, *newvmp, *vmp2;
+ char old_name[PATH_MAX+1];
+ char fullpath[PATH_MAX+1];
+ struct lookup resolve;
+
+ lookup_init(&resolve, fullpath, PATH_NOFLAGS, &oldvmp, &old_dirp);
+ /* Do not yet request exclusive lock on vmnt to prevent deadlocks later on */
+ resolve.l_vmnt_lock = VMNT_WRITE;
+ resolve.l_vnode_lock = VNODE_READ;
+
+ /* See if 'name1' (existing file) exists. Get dir and file inodes. */
+ if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
+ return(err_code);
+ if ((old_dirp = last_dir(&resolve, fp)) == NULL)
+ return(err_code);
+
+ /* If the sticky bit is set, only the owner of the file or a privileged
+ user is allowed to rename */
+ if ((old_dirp->v_mode & S_ISVTX) == S_ISVTX) {
+ /* Look up inode of file to unlink to retrieve owner */
+ resolve.l_flags = PATH_RET_SYMLINK;
+ resolve.l_vmp = &vmp2; /* Shouldn't actually get locked */
+ resolve.l_vmnt_lock = VMNT_READ;
+ resolve.l_vnode = &vp;
+ resolve.l_vnode_lock = VNODE_READ;
+ resolve.l_flags = PATH_RET_SYMLINK;
+ vp = advance(old_dirp, &resolve, fp);
+ assert(vmp2 == NULL);
+ if (vp != NULL) {
+ if(vp->v_uid != fp->fp_effuid && fp->fp_effuid != SU_UID)
+ r = EPERM;
+ unlock_vnode(vp);
+ put_vnode(vp);
+ } else
+ r = err_code;
+ if (r != OK) {
+ unlock_vnode(old_dirp);
+ unlock_vmnt(oldvmp);
+ put_vnode(old_dirp);
+ return(r);
+ }
+ }
+
+ /* Save the last component of the old name */
+ if(strlen(fullpath) >= sizeof(old_name)) {
+ unlock_vnode(old_dirp);
+ unlock_vmnt(oldvmp);
+ put_vnode(old_dirp);
+ return(ENAMETOOLONG);
+ }
+ strcpy(old_name, fullpath);
+
+ /* See if 'name2' (new name) exists. Get dir inode */
+ lookup_init(&resolve, fullpath, PATH_NOFLAGS, &newvmp, &new_dirp);
+ resolve.l_vmnt_lock = VMNT_READ;
+ resolve.l_vnode_lock = VNODE_READ;
+ if (fetch_name(m_in.name2, m_in.name2_length, M1, fullpath) != OK)
+ r = err_code;
+ else if ((new_dirp = last_dir(&resolve, fp)) == NULL)
+ r = err_code;
+
+ if (r != OK) {
+ unlock_vnode(old_dirp);
+ unlock_vmnt(oldvmp);
+ put_vnode(old_dirp);
+ return(r);
+ }
+
+ /* Both parent directories must be on the same device. */
+ if (old_dirp->v_fs_e != new_dirp->v_fs_e) r = EXDEV;
+
+ /* Parent dirs must be writable, searchable and on a writable device */
+ if ((r1 = forbidden(old_dirp, W_BIT|X_BIT)) != OK ||
+ (r1 = forbidden(new_dirp, W_BIT|X_BIT)) != OK) r = r1;
+
+ if (r == OK) {
+ tll_upgrade(&oldvmp->m_lock); /* Upgrade to exclusive access */
+ r = req_rename(old_dirp->v_fs_e, old_dirp->v_inode_nr, old_name,
+ new_dirp->v_inode_nr, fullpath);
+ }
+ unlock_vnode(old_dirp);
+ unlock_vnode(new_dirp);
+ unlock_vmnt(oldvmp);
+ if (newvmp) unlock_vmnt(newvmp);
+
+ put_vnode(old_dirp);
+ put_vnode(new_dirp);
+
+ return(r);
+}
+
+/*===========================================================================*
+ * do_truncate *
+ *===========================================================================*/
+PUBLIC int do_truncate()
+{
+/* truncate_vnode() does the actual work of do_truncate() and do_ftruncate().
+ * do_truncate() and do_ftruncate() have to get hold of the inode, either
+ * by name or fd, do checks on it, and call truncate_inode() to do the
+ * work.
+ */
+ struct vnode *vp;
+ struct vmnt *vmp;
+ int r;
+ char fullpath[PATH_MAX+1];
+ struct lookup resolve;
+
+ lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+ resolve.l_vmnt_lock = VMNT_EXCL;
+ resolve.l_vnode_lock = VNODE_WRITE;
+
+ if ((off_t) m_in.flength < 0) return(EINVAL);
+
+ /* Temporarily open file */
+ if (fetch_name(m_in.m2_p1, m_in.m2_i1, M1, fullpath) != OK) return(err_code);
+ if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
+
+ /* Ask FS to truncate the file */
+ if ((r = forbidden(vp, W_BIT)) == OK)
+ r = truncate_vnode(vp, m_in.flength);
+
+ unlock_vnode(vp);
+ unlock_vmnt(vmp);
+ put_vnode(vp);
+ return(r);
+}
+
+/*===========================================================================*
+ * do_ftruncate *
+ *===========================================================================*/
+PUBLIC int do_ftruncate()
+{
+/* As with do_truncate(), truncate_vnode() does the actual work. */
+ struct filp *rfilp;
+ int r;
+
+ if ((off_t) m_in.flength < 0) return(EINVAL);
+
+ /* File is already opened; get a vnode pointer from filp */
+ if ((rfilp = get_filp(m_in.m2_i1, VNODE_WRITE)) == NULL) return(err_code);
+
+ if (!(rfilp->filp_mode & W_BIT))
+ r = EBADF;
+ else
+ r = truncate_vnode(rfilp->filp_vno, m_in.flength);
+
+ unlock_filp(rfilp);
+ return(r);
+}
+
+
+/*===========================================================================*
+ * truncate_vnode *
+ *===========================================================================*/
+PUBLIC int truncate_vnode(vp, newsize)
+struct vnode *vp;
+off_t newsize;
+{
+/* Truncate a regular file or a pipe */
+ int r, file_type;
+
+ assert(tll_locked_by_me(&vp->v_lock));
+ file_type = vp->v_mode & I_TYPE;
+ if (file_type != I_REGULAR && file_type != I_NAMED_PIPE) return(EINVAL);
+ if ((r = req_ftrunc(vp->v_fs_e, vp->v_inode_nr, newsize, 0)) == OK)
+ vp->v_size = newsize;
+ return(r);
+}
+
+
+/*===========================================================================*
+ * do_slink *
+ *===========================================================================*/
+PUBLIC int do_slink()
+{
+/* Perform the symlink(name1, name2) system call. */
+ int r;
+ struct vnode *vp;
+ struct vmnt *vmp;
+ char fullpath[PATH_MAX+1];
+ struct lookup resolve;
+
+ lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+ resolve.l_vmnt_lock = VMNT_WRITE;
+ resolve.l_vnode_lock = VNODE_READ;
+
+ if (m_in.name1_length <= 1) return(ENOENT);
+ if (m_in.name1_length >= SYMLINK_MAX) return(ENAMETOOLONG);
+
+ /* Get dir inode of 'name2' */
+ if (fetch_name(m_in.name2, m_in.name2_length, M1, fullpath) != OK)
+ return(err_code);
+
+ if ((vp = last_dir(&resolve, fp)) == NULL) return(err_code);
+
+ if ((r = forbidden(vp, W_BIT|X_BIT)) == OK) {
+ r = req_slink(vp->v_fs_e, vp->v_inode_nr, fullpath, who_e,
+ m_in.name1, m_in.name1_length - 1, fp->fp_effuid,
+ fp->fp_effgid);
+ }
+
+ unlock_vnode(vp);
+ unlock_vmnt(vmp);
+ put_vnode(vp);
+
+ return(r);
+}
+
+/*===========================================================================*
+ * rdlink_direct *
+ *===========================================================================*/
+PUBLIC int rdlink_direct(orig_path, link_path, rfp)
+char *orig_path;
+char *link_path; /* should have length PATH_MAX+1 */
+struct fproc *rfp;
+{
+/* Perform a readlink()-like call from within the VFS */
+ int r;
+ struct vnode *vp;
+ struct vmnt *vmp;
+ char fullpath[PATH_MAX+1];
+ struct lookup resolve;
+
+ lookup_init(&resolve, fullpath, PATH_RET_SYMLINK, &vmp, &vp);
+ resolve.l_vmnt_lock = VMNT_READ;
+ resolve.l_vnode_lock = VNODE_READ;
+
+ /* Temporarily open the file containing the symbolic link */
+ strncpy(fullpath, orig_path, PATH_MAX);
+ if ((vp = eat_path(&resolve, rfp)) == NULL) return(err_code);
+
+ /* Make sure this is a symbolic link */
+ if ((vp->v_mode & I_TYPE) != I_SYMBOLIC_LINK)
+ r = EINVAL;
+ else
+ r = req_rdlink(vp->v_fs_e, vp->v_inode_nr, (endpoint_t) 0,
+ link_path, PATH_MAX+1, 1);
+
+ if (r > 0) link_path[r] = '\0'; /* Terminate string when succesful */
+
+ unlock_vnode(vp);
+ unlock_vmnt(vmp);
+ put_vnode(vp);
+
+ return r;
+}
+
+/*===========================================================================*
+ * do_rdlink *
+ *===========================================================================*/
+PUBLIC int do_rdlink()
+{
+/* Perform the readlink(name, buf, bufsize) system call. */
+ int r, copylen;
+ struct vnode *vp;
+ struct vmnt *vmp;
+ char fullpath[PATH_MAX+1];
+ struct lookup resolve;
+
+ lookup_init(&resolve, fullpath, PATH_RET_SYMLINK, &vmp, &vp);
+ resolve.l_vmnt_lock = VMNT_READ;
+ resolve.l_vnode_lock = VNODE_READ;
+
+ copylen = m_in.nbytes;
+ if (copylen < 0) return(EINVAL);
+
+ /* Temporarily open the file containing the symbolic link */
+ if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
+ return(err_code);
+ if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
+
+ /* Make sure this is a symbolic link */
+ if ((vp->v_mode & I_TYPE) != I_SYMBOLIC_LINK)
+ r = EINVAL;
+ else
+ r = req_rdlink(vp->v_fs_e, vp->v_inode_nr, who_e, m_in.name2,
+ copylen, 0);
+
+ unlock_vnode(vp);
+ unlock_vmnt(vmp);
+ put_vnode(vp);
+
+ return(r);
+}
--- /dev/null
+/* This file handles advisory file locking as required by POSIX.
+ *
+ * The entry points into this file are
+ * lock_op: perform locking operations for FCNTL system call
+ * lock_revive: revive processes when a lock is released
+ */
+
+#include "fs.h"
+#include <minix/com.h>
+#include <minix/u64.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "file.h"
+#include "fproc.h"
+#include "lock.h"
+#include "vnode.h"
+#include "param.h"
+
+/*===========================================================================*
+ * lock_op *
+ *===========================================================================*/
+PUBLIC int lock_op(f, req)
+struct filp *f;
+int req; /* either F_SETLK or F_SETLKW */
+{
+/* Perform the advisory locking required by POSIX. */
+
+ int r, ltype, i, conflict = 0, unlocking = 0;
+ mode_t mo;
+ off_t first, last;
+ struct flock flock;
+ vir_bytes user_flock;
+ struct file_lock *flp, *flp2, *empty;
+
+ /* Fetch the flock structure from user space. */
+ user_flock = (vir_bytes) m_in.name1;
+ r = sys_datacopy(who_e, (vir_bytes) user_flock, VFS_PROC_NR,
+ (vir_bytes) &flock, (phys_bytes) sizeof(flock));
+ if (r != OK) return(EINVAL);
+
+ /* Make some error checks. */
+ ltype = flock.l_type;
+ mo = f->filp_mode;
+ if (ltype != F_UNLCK && ltype != F_RDLCK && ltype != F_WRLCK) return(EINVAL);
+ if (req == F_GETLK && ltype == F_UNLCK) return(EINVAL);
+ if ( (f->filp_vno->v_mode & I_TYPE) != I_REGULAR) return(EINVAL);
+ if (req != F_GETLK && ltype == F_RDLCK && (mo & R_BIT) == 0) return(EBADF);
+ if (req != F_GETLK && ltype == F_WRLCK && (mo & W_BIT) == 0) return(EBADF);
+
+ /* Compute the first and last bytes in the lock region. */
+ switch (flock.l_whence) {
+ case SEEK_SET: first = 0; break;
+ case SEEK_CUR:
+ if (ex64hi(f->filp_pos) != 0)
+ panic("lock_op: position in file too high");
+ first = ex64lo(f->filp_pos);
+ break;
+ case SEEK_END: first = f->filp_vno->v_size; break;
+ default: return(EINVAL);
+ }
+
+ /* Check for overflow. */
+ if (((long) flock.l_start > 0) && ((first + flock.l_start) < first))
+ return(EINVAL);
+ if (((long) flock.l_start < 0) && ((first + flock.l_start) > first))
+ return(EINVAL);
+ first = first + flock.l_start;
+ last = first + flock.l_len - 1;
+ if (flock.l_len == 0) last = MAX_FILE_POS;
+ if (last < first) return(EINVAL);
+
+ /* Check if this region conflicts with any existing lock. */
+ empty = NULL;
+ for (flp = &file_lock[0]; flp < &file_lock[NR_LOCKS]; flp++) {
+ if (flp->lock_type == 0) {
+ if (empty == NULL) empty = flp;
+ continue; /* 0 means unused slot */
+ }
+ if (flp->lock_vnode != f->filp_vno) continue; /* different file */
+ if (last < flp->lock_first) continue; /* new one is in front */
+ if (first > flp->lock_last) continue; /* new one is afterwards */
+ if (ltype == F_RDLCK && flp->lock_type == F_RDLCK) continue;
+ if (ltype != F_UNLCK && flp->lock_pid == fp->fp_pid) continue;
+
+ /* There might be a conflict. Process it. */
+ conflict = 1;
+ if (req == F_GETLK) break;
+
+ /* If we are trying to set a lock, it just failed. */
+ if (ltype == F_RDLCK || ltype == F_WRLCK) {
+ if (req == F_SETLK) {
+ /* For F_SETLK, just report back failure. */
+ return(EAGAIN);
+ } else {
+ /* For F_SETLKW, suspend the process. */
+ suspend(FP_BLOCKED_ON_LOCK);
+ return(SUSPEND);
+ }
+ }
+
+ /* We are clearing a lock and we found something that overlaps. */
+ unlocking = 1;
+ if (first <= flp->lock_first && last >= flp->lock_last) {
+ flp->lock_type = 0; /* mark slot as unused */
+ nr_locks--; /* number of locks is now 1 less */
+ continue;
+ }
+
+ /* Part of a locked region has been unlocked. */
+ if (first <= flp->lock_first) {
+ flp->lock_first = last + 1;
+ continue;
+ }
+
+ if (last >= flp->lock_last) {
+ flp->lock_last = first - 1;
+ continue;
+ }
+
+ /* Bad luck. A lock has been split in two by unlocking the middle. */
+ if (nr_locks == NR_LOCKS) return(ENOLCK);
+ for (i = 0; i < NR_LOCKS; i++)
+ if (file_lock[i].lock_type == 0) break;
+ flp2 = &file_lock[i];
+ flp2->lock_type = flp->lock_type;
+ flp2->lock_pid = flp->lock_pid;
+ flp2->lock_vnode = flp->lock_vnode;
+ flp2->lock_first = last + 1;
+ flp2->lock_last = flp->lock_last;
+ flp->lock_last = first - 1;
+ nr_locks++;
+ }
+ if (unlocking) lock_revive();
+
+ if (req == F_GETLK) {
+ if (conflict) {
+ /* GETLK and conflict. Report on the conflicting lock. */
+ flock.l_type = flp->lock_type;
+ flock.l_whence = SEEK_SET;
+ flock.l_start = flp->lock_first;
+ flock.l_len = flp->lock_last - flp->lock_first + 1;
+ flock.l_pid = flp->lock_pid;
+
+ } else {
+ /* It is GETLK and there is no conflict. */
+ flock.l_type = F_UNLCK;
+ }
+
+ /* Copy the flock structure back to the caller. */
+ r = sys_datacopy(VFS_PROC_NR, (vir_bytes) &flock,
+ who_e, (vir_bytes) user_flock, (phys_bytes) sizeof(flock));
+ return(r);
+ }
+
+ if (ltype == F_UNLCK) return(OK); /* unlocked a region with no locks */
+
+ /* There is no conflict. If space exists, store new lock in the table. */
+ if (empty == NULL) return(ENOLCK); /* table full */
+ empty->lock_type = ltype;
+ empty->lock_pid = fp->fp_pid;
+ empty->lock_vnode = f->filp_vno;
+ empty->lock_first = first;
+ empty->lock_last = last;
+ nr_locks++;
+ return(OK);
+}
+
+
+/*===========================================================================*
+ * lock_revive *
+ *===========================================================================*/
+PUBLIC void lock_revive()
+{
+/* Go find all the processes that are waiting for any kind of lock and
+ * revive them all. The ones that are still blocked will block again when
+ * they run. The others will complete. This strategy is a space-time
+ * tradeoff. Figuring out exactly which ones to unblock now would take
+ * extra code, and the only thing it would win would be some performance in
+ * extremely rare circumstances (namely, that somebody actually used
+ * locking).
+ */
+
+ struct fproc *fptr;
+
+ for (fptr = &fproc[0]; fptr < &fproc[NR_PROCS]; fptr++){
+ if (fptr->fp_pid == PID_FREE) continue;
+ if (fptr->fp_blocked_on == FP_BLOCKED_ON_LOCK) {
+ revive(fptr->fp_endpoint, 0);
+ }
+ }
+}
--- /dev/null
+#ifndef __VFS_LOCK_H__
+#define __VFS_LOCK_H__
+
+/* This is the file locking table. Like the filp table, it points to the
+ * inode table, however, in this case to achieve advisory locking.
+ */
+EXTERN struct file_lock {
+ short lock_type; /* F_RDLOCK or F_WRLOCK; 0 means unused slot */
+ pid_t lock_pid; /* pid of the process holding the lock */
+ struct vnode *lock_vnode;
+ off_t lock_first; /* offset of first byte locked */
+ off_t lock_last; /* offset of last byte locked */
+} file_lock[NR_LOCKS];
+
+#endif
--- /dev/null
+/*
+ * a loop that gets messages requesting work, carries out the work, and sends
+ * replies.
+ *
+ * The entry points into this file are:
+ * main: main program of the Virtual File System
+ * reply: send a reply to a process after the requested work is done
+ *
+ */
+
+#include "fs.h"
+#include <fcntl.h>
+#include <string.h>
+#include <stdio.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <sys/ioc_memory.h>
+#include <sys/svrctl.h>
+#include <sys/select.h>
+#include <minix/callnr.h>
+#include <minix/com.h>
+#include <minix/keymap.h>
+#include <minix/const.h>
+#include <minix/endpoint.h>
+#include <minix/safecopies.h>
+#include <minix/debug.h>
+#include <minix/vfsif.h>
+#include "file.h"
+#include "dmap.h"
+#include "fproc.h"
+#include "vmnt.h"
+#include "vnode.h"
+#include "job.h"
+#include "param.h"
+
+#if ENABLE_SYSCALL_STATS
+EXTERN unsigned long calls_stats[NCALLS];
+#endif
+
+/* Thread related prototypes */
+FORWARD _PROTOTYPE( void thread_cleanup_f, (struct fproc *rfp, char *f,
+ int l) );
+#define thread_cleanup(x) thread_cleanup_f(x, __FILE__, __LINE__)
+FORWARD _PROTOTYPE( void *do_async_dev_result, (void *arg) );
+FORWARD _PROTOTYPE( void *do_control_msgs, (void *arg) );
+FORWARD _PROTOTYPE( void *do_fs_reply, (struct job *job) );
+FORWARD _PROTOTYPE( void *do_work, (void *arg) );
+FORWARD _PROTOTYPE( void *do_pm, (void *arg) );
+FORWARD _PROTOTYPE( void *do_init_root, (void *arg) );
+FORWARD _PROTOTYPE( void handle_work, (void *(*func)(void *arg)) );
+
+FORWARD _PROTOTYPE( void get_work, (void) );
+FORWARD _PROTOTYPE( void lock_pm, (void) );
+FORWARD _PROTOTYPE( void unlock_pm, (void) );
+FORWARD _PROTOTYPE( void service_pm, (void) );
+FORWARD _PROTOTYPE( void service_pm_postponed, (void) );
+FORWARD _PROTOTYPE( int unblock, (struct fproc *rfp) );
+
+/* SEF functions and variables. */
+FORWARD _PROTOTYPE( void sef_local_startup, (void) );
+FORWARD _PROTOTYPE( int sef_cb_init_fresh, (int type, sef_init_info_t *info) );
+PRIVATE mutex_t pm_lock;
+
+/*===========================================================================*
+ * main *
+ *===========================================================================*/
+PUBLIC int main(void)
+{
+/* This is the main program of the file system. The main loop consists of
+ * three major activities: getting new work, processing the work, and sending
+ * the reply. This loop never terminates as long as the file system runs.
+ */
+ int transid, req;
+ struct job *job;
+
+ /* SEF local startup. */
+ sef_local_startup();
+
+ printf("Started AVFS\n");
+ verbose = 0;
+
+ /* This is the main loop that gets work, processes it, and sends replies. */
+ while (TRUE) {
+ yield_all(); /* let other threads run */
+ send_work();
+ get_work();
+
+ transid = TRNS_GET_ID(m_in.m_type);
+ req = TRNS_DEL_ID(m_in.m_type);
+ job = worker_getjob( (thread_t) transid - VFS_TRANSID);
+
+ /* Transaction encoding changes original m_type value; restore. */
+ if (job == NULL)
+ m_in.m_type = transid;
+ else
+ m_in.m_type = req;
+
+ if (job != NULL) {
+ do_fs_reply(job);
+ continue;
+ } else if (who_e == PM_PROC_NR) { /* Calls from PM */
+ /* Special control messages from PM */
+ sys_worker_start(do_pm);
+ continue;
+ } else if (is_notify(call_nr)) {
+ /* A task notify()ed us */
+ sys_worker_start(do_control_msgs);
+ continue;
+ } else if (who_p < 0) { /* i.e., message comes from a task */
+ /* We're going to ignore this message. Tasks should
+ * send notify()s only.
+ */
+ printf("VFS: ignoring message from %d (%d)\n", who_e, call_nr);
+ continue;
+ }
+
+ /* At this point we either have results from an asynchronous device
+ * or a new system call. In both cases a new worker thread has to be
+ * started and there might not be one available from the pool. This is
+ * not a problem (requests/replies are simply queued), except when
+ * they're from an FS endpoint, because these can cause a deadlock.
+ * handle_work() takes care of the details. */
+ if (IS_DEV_RS(call_nr)) {
+ /* We've got results for a device request */
+ handle_work(do_async_dev_result);
+ continue;
+ } else {
+ /* Normal syscall. */
+ handle_work(do_work);
+ }
+ }
+ return(OK); /* shouldn't come here */
+}
+
+/*===========================================================================*
+ * handle_work *
+ *===========================================================================*/
+PRIVATE void handle_work(void *(*func)(void *arg))
+{
+/* Handle asynchronous device replies and new system calls. If the originating
+ * endpoint is an FS endpoint, take extra care not to get in deadlock. */
+ struct vmnt *vmp;
+
+ if ((vmp = find_vmnt(who_e)) != NULL) {
+ /* A back call or dev result from an FS endpoint */
+ if (worker_available() == 0) {
+ /* No worker threads available to handle call */
+ if (deadlock_resolving) {
+ /* Already trying to resolve a deadlock, can't
+ * handle more, sorry */
+
+ reply(who_e, EAGAIN);
+ return;
+ }
+ deadlock_resolving = 1;
+ vmp->m_flags |= VMNT_BACKCALL;
+ dl_worker_start(func);
+ return;
+ }
+ }
+
+ worker_start(func);
+}
+
+/*===========================================================================*
+ * do_async_dev_result *
+ *===========================================================================*/
+PRIVATE void *do_async_dev_result(void *arg)
+{
+ endpoint_t endpt;
+ struct job my_job;
+
+ my_job = *((struct job *) arg);
+ fp = my_job.j_fp;
+ m_in = my_job.j_m_in;
+
+ /* An asynchronous character driver has results for us */
+ if (call_nr == DEV_REVIVE) {
+ endpt = m_in.REP_ENDPT;
+ if (endpt == VFS_PROC_NR)
+ endpt = find_suspended_ep(m_in.m_source, m_in.REP_IO_GRANT);
+
+ if (endpt == NONE) {
+ printf("VFS: proc with grant %d from %d not found\n",
+ m_in.REP_IO_GRANT, m_in.m_source);
+ } else if (m_in.REP_STATUS == SUSPEND) {
+ printf("VFS: got SUSPEND on DEV_REVIVE: not reviving proc\n");
+ } else
+ revive(endpt, m_in.REP_STATUS);
+ }
+ else if (call_nr == DEV_OPEN_REPL) open_reply();
+ else if (call_nr == DEV_REOPEN_REPL) reopen_reply();
+ else if (call_nr == DEV_CLOSE_REPL) close_reply();
+ else if (call_nr == DEV_SEL_REPL1)
+ select_reply1(m_in.m_source, m_in.DEV_MINOR, m_in.DEV_SEL_OPS);
+ else if (call_nr == DEV_SEL_REPL2)
+ select_reply2(m_in.m_source, m_in.DEV_MINOR, m_in.DEV_SEL_OPS);
+
+ if (deadlock_resolving) {
+ struct vmnt *vmp;
+ if ((vmp = find_vmnt(who_e)) != NULL)
+ vmp->m_flags &= ~VMNT_BACKCALL;
+
+ if (fp != NULL && fp->fp_wtid == dl_worker.w_tid)
+ deadlock_resolving = 0;
+ }
+
+ thread_cleanup(NULL);
+ return(NULL);
+}
+
+/*===========================================================================*
+ * do_control_msgs *
+ *===========================================================================*/
+PRIVATE void *do_control_msgs(void *arg)
+{
+ struct job my_job;
+
+ my_job = *((struct job *) arg);
+ fp = my_job.j_fp;
+ m_in = my_job.j_m_in;
+
+ /* Check for special control messages. */
+ if (who_e == CLOCK) {
+ /* Alarm timer expired. Used only for select(). Check it. */
+ expire_timers(m_in.NOTIFY_TIMESTAMP);
+ } else if (who_e == DS_PROC_NR) {
+ /* DS notifies us of an event. */
+ ds_event();
+ } else {
+ /* Device notifies us of an event. */
+ dev_status(&m_in);
+ }
+
+ thread_cleanup(NULL);
+ return(NULL);
+}
+
+/*===========================================================================*
+ * do_fs_reply *
+ *===========================================================================*/
+PRIVATE void *do_fs_reply(struct job *job)
+{
+ struct vmnt *vmp;
+ struct fproc *rfp;
+
+ if (verbose) printf("VFS: reply to request!\n");
+ if ((vmp = find_vmnt(who_e)) == NULL)
+ panic("Couldn't find vmnt for endpoint %d", who_e);
+
+ rfp = job->j_fp;
+
+ if (rfp == NULL || rfp->fp_endpoint == NONE) {
+ printf("VFS: spurious reply from %d\n", who_e);
+ return(NULL);
+ }
+
+ *rfp->fp_sendrec = m_in;
+ vmp->m_comm.c_cur_reqs--; /* We've got our reply, make room for others */
+
+ worker_signal(worker_get(rfp->fp_wtid));/* Continue this worker thread */
+ return(NULL);
+}
+
+/*===========================================================================*
+ * lock_pm *
+ *===========================================================================*/
+PRIVATE void lock_pm(void)
+{
+ message org_m_in;
+ struct fproc *org_fp;
+ struct worker_thread *org_self;
+
+ /* First try to get it right off the bat */
+ if (mutex_trylock(&pm_lock) == 0)
+ return;
+
+ org_m_in = m_in;
+ org_fp = fp;
+ org_self = self;
+
+ if (mutex_lock(&pm_lock) != 0)
+ panic("Could not obtain lock on pm\n");
+
+ m_in = org_m_in;
+ fp = org_fp;
+ self = org_self;
+}
+
+/*===========================================================================*
+ * unlock_pm *
+ *===========================================================================*/
+PRIVATE void unlock_pm(void)
+{
+ if (mutex_unlock(&pm_lock) != 0)
+ panic("Could not release lock on pm");
+}
+
+/*===========================================================================*
+ * do_pm *
+ *===========================================================================*/
+PRIVATE void *do_pm(void *arg)
+{
+ struct job my_job;
+ struct fproc *rfp;
+
+ my_job = *((struct job *) arg);
+ rfp = fp = my_job.j_fp;
+ m_in = my_job.j_m_in;
+
+ lock_pm();
+ service_pm();
+ unlock_pm();
+
+ thread_cleanup(NULL);
+ return(NULL);
+}
+
+/*===========================================================================*
+ * do_pending_pipe *
+ *===========================================================================*/
+PRIVATE void *do_pending_pipe(void *arg)
+{
+ int r, fd_nr;
+ struct filp *f;
+ struct job my_job;
+ tll_access_t locktype;
+
+ my_job = *((struct job *) arg);
+ fp = my_job.j_fp;
+ m_in = my_job.j_m_in;
+
+ lock_proc(fp, 1 /* force lock */);
+
+ fd_nr = fp->fp_block_fd;
+ locktype = (call_nr == READ) ? VNODE_READ : VNODE_WRITE;
+ f = get_filp(fd_nr, locktype);
+ assert(f != NULL);
+
+ r = rw_pipe((call_nr == READ) ? READING : WRITING, who_e, fd_nr, f,
+ fp->fp_buffer, fp->fp_nbytes);
+
+ if (r != SUSPEND) /* Do we have results to report? */
+ reply(who_e, r);
+
+ unlock_filp(f);
+
+ thread_cleanup(fp);
+ return(NULL);
+}
+
+/*===========================================================================*
+ * do_dummy *
+ *===========================================================================*/
+PUBLIC void *do_dummy(void *arg)
+{
+ struct job my_job;
+ int r;
+
+ my_job = *((struct job *) arg);
+ fp = my_job.j_fp;
+ m_in = my_job.j_m_in;
+
+ if ((r = mutex_trylock(&fp->fp_lock)) == 0) {
+ thread_cleanup(fp);
+ } else {
+ /* Proc is busy, let that worker thread carry out the work */
+ thread_cleanup(NULL);
+ }
+ return(NULL);
+}
+
+/*===========================================================================*
+ * do_work *
+ *===========================================================================*/
+PRIVATE void *do_work(void *arg)
+{
+ int error;
+ struct job my_job;
+
+ my_job = *((struct job *) arg);
+ fp = my_job.j_fp;
+ m_in = my_job.j_m_in;
+
+ lock_proc(fp, 0); /* This proc is busy */
+
+ if (call_nr == MAPDRIVER) {
+ error = do_mapdriver();
+ } else if (call_nr == COMMON_GETSYSINFO) {
+ error = do_getsysinfo();
+ } else if (IS_PFS_VFS_RQ(call_nr)) {
+ if (who_e != PFS_PROC_NR) {
+ printf("VFS: only PFS is allowed to make nested VFS calls\n");
+ error = ENOSYS;
+ } else if (call_nr <= PFS_BASE || call_nr >= PFS_BASE + PFS_NREQS) {
+ error = ENOSYS;
+ } else {
+ call_nr -= PFS_BASE;
+ error = (*pfs_call_vec[call_nr])();
+ }
+ } else {
+ /* We're dealing with a POSIX system call from a normal
+ * process. Call the internal function that does the work.
+ */
+ if (call_nr < 0 || call_nr >= NCALLS) {
+ error = ENOSYS;
+ } else if (fp->fp_flags & FP_EXITING) {
+ error = SUSPEND;
+ } else if (fp->fp_pid == PID_FREE) {
+ /* Process vanished before we were able to handle request.
+ * Replying has no use. Just drop it. */
+ error = SUSPEND;
+ } else {
+#if ENABLE_SYSCALL_STATS
+ calls_stats[call_nr]++;
+#endif
+ error = (*call_vec[call_nr])();
+ }
+ }
+
+ /* Copy the results back to the user and send reply. */
+ if (error != SUSPEND) {
+ if (deadlock_resolving) {
+ struct vmnt *vmp;
+ if ((vmp = find_vmnt(who_e)) != NULL)
+ vmp->m_flags &= ~VMNT_BACKCALL;
+
+ if (fp->fp_wtid == dl_worker.w_tid)
+ deadlock_resolving = 0;
+ }
+ reply(who_e, error );
+ }
+
+ thread_cleanup(fp);
+ return(NULL);
+}
+
+/*===========================================================================*
+ * sef_local_startup *
+ *===========================================================================*/
+PRIVATE void sef_local_startup()
+{
+ /* Register init callbacks. */
+ sef_setcb_init_fresh(sef_cb_init_fresh);
+ sef_setcb_init_restart(sef_cb_init_fail);
+
+ /* No live update support for now. */
+
+ /* Let SEF perform startup. */
+ sef_startup();
+}
+
+/*===========================================================================*
+ * sef_cb_init_fresh *
+ *===========================================================================*/
+PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info)
+{
+/* Initialize the virtual file server. */
+ int s, i;
+ struct fproc *rfp;
+ message mess;
+ struct rprocpub rprocpub[NR_BOOT_PROCS];
+
+ force_sync = 0;
+
+ /* Initialize proc endpoints to NONE */
+ for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+ rfp->fp_endpoint = NONE;
+ rfp->fp_pid = PID_FREE;
+ }
+
+ /* Initialize the process table with help of the process manager messages.
+ * Expect one message for each system process with its slot number and pid.
+ * When no more processes follow, the magic process number NONE is sent.
+ * Then, stop and synchronize with the PM.
+ */
+ do {
+ if ((s = sef_receive(PM_PROC_NR, &mess)) != OK)
+ panic("VFS: couldn't receive from PM: %d", s);
+
+ if (mess.m_type != PM_INIT)
+ panic("unexpected message from PM: %d", mess.m_type);
+
+ if (NONE == mess.PM_PROC) break;
+
+ rfp = &fproc[mess.PM_SLOT];
+ rfp->fp_flags = FP_NOFLAGS;
+ rfp->fp_pid = mess.PM_PID;
+ rfp->fp_endpoint = mess.PM_PROC;
+ rfp->fp_grant = GRANT_INVALID;
+ rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
+ rfp->fp_realuid = (uid_t) SYS_UID;
+ rfp->fp_effuid = (uid_t) SYS_UID;
+ rfp->fp_realgid = (gid_t) SYS_GID;
+ rfp->fp_effgid = (gid_t) SYS_GID;
+ rfp->fp_umask = ~0;
+ } while (TRUE); /* continue until process NONE */
+ mess.m_type = OK; /* tell PM that we succeeded */
+ s = send(PM_PROC_NR, &mess); /* send synchronization message */
+
+ /* All process table entries have been set. Continue with initialization. */
+ fp = &fproc[_ENDPOINT_P(VFS_PROC_NR)];/* During init all communication with
+ * FSes is on behalf of myself */
+ init_dmap(); /* Initialize device table. */
+ system_hz = sys_hz();
+
+ /* Map all the services in the boot image. */
+ if ((s = sys_safecopyfrom(RS_PROC_NR, info->rproctab_gid, 0,
+ (vir_bytes) rprocpub, sizeof(rprocpub), S)) != OK){
+ panic("sys_safecopyfrom failed: %d", s);
+ }
+ for (i = 0; i < NR_BOOT_PROCS; i++) {
+ if (rprocpub[i].in_use) {
+ if ((s = map_service(&rprocpub[i])) != OK) {
+ panic("VFS: unable to map service: %d", s);
+ }
+ }
+ }
+
+ /* Subscribe to driver events for VFS drivers. */
+ if ((s = ds_subscribe("drv\\.vfs\\..*", DSF_INITIAL | DSF_OVERWRITE) != OK)){
+ panic("VFS: can't subscribe to driver events (%d)", s);
+ }
+
+#if DO_SANITYCHECKS
+ FIXME("VFS: DO_SANITYCHECKS is on");
+#endif
+
+ /* Initialize worker threads */
+ for (i = 0; i < NR_WTHREADS; i++) {
+ worker_init(&workers[i]);
+ }
+ worker_init(&sys_worker); /* exclusive system worker thread */
+ worker_init(&dl_worker); /* exclusive worker thread to resolve deadlocks */
+
+ /* Initialize global locks */
+ if (mthread_mutex_init(&pm_lock, NULL) != 0)
+ panic("VFS: couldn't initialize pm lock mutex");
+ if (mthread_mutex_init(&exec_lock, NULL) != 0)
+ panic("VFS: couldn't initialize exec lock");
+ if (mthread_mutex_init(&bsf_lock, NULL) != 0)
+ panic("VFS: couldn't initialize block special file lock");
+
+ /* Initialize event resources for boot procs and locks for all procs */
+ for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+ assert(mutex_init(&rfp->fp_lock, NULL) == 0);
+#if LOCK_DEBUG
+ rfp->fp_vp_rdlocks = 0;
+ rfp->fp_vmnt_rdlocks = 0;
+#endif
+ }
+
+ init_vnodes(); /* init vnodes */
+ init_vmnts(); /* init vmnt structures */
+ init_select(); /* init select() structures */
+ init_filps(); /* Init filp structures */
+ mount_pfs(); /* mount Pipe File Server */
+ worker_start(do_init_root); /* mount initial ramdisk as file system root */
+
+ return(OK);
+}
+
+/*===========================================================================*
+ * do_init_root *
+ *===========================================================================*/
+PRIVATE void *do_init_root(void *arg)
+{
+ struct fproc *rfp;
+ struct job my_job;
+ int r;
+ char *mount_label = "fs_imgrd"; /* FIXME: obtain this from RS */
+
+ my_job = *((struct job *) arg);
+ fp = my_job.j_fp;
+
+ lock_proc(fp, 1 /* force lock */); /* This proc is busy */
+ lock_pm();
+
+ /* Initialize process directories. mount_fs will set them to the correct
+ * values */
+ for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+ FD_ZERO(&(rfp->fp_filp_inuse));
+ rfp->fp_rd = NULL;
+ rfp->fp_wd = NULL;
+ }
+
+ if ((r = mount_fs(DEV_IMGRD, "/", MFS_PROC_NR, 0, mount_label)) != OK)
+ panic("Failed to initialize root");
+
+ unlock_pm();
+ thread_cleanup(fp);
+ return(NULL);
+}
+
+/*===========================================================================*
+ * lock_proc *
+ *===========================================================================*/
+PUBLIC void lock_proc(struct fproc *rfp, int force_lock)
+{
+ int r;
+ message org_m_in;
+ struct fproc *org_fp;
+ struct worker_thread *org_self;
+
+ r = mutex_trylock(&rfp->fp_lock);
+
+ /* Were we supposed to obtain this lock immediately? */
+ if (force_lock) {
+ assert(r == 0);
+ return;
+ }
+
+ if (r == 0) return;
+
+ org_m_in = m_in;
+ org_fp = fp;
+ org_self = self;
+ assert(mutex_lock(&rfp->fp_lock) == 0);
+ m_in = org_m_in;
+ fp = org_fp;
+ self = org_self;
+}
+
+/*===========================================================================*
+ * unlock_proc *
+ *===========================================================================*/
+PUBLIC void unlock_proc(struct fproc *rfp)
+{
+ int r;
+
+ if ((r = mutex_unlock(&rfp->fp_lock)) != 0)
+ panic("Failed to unlock: %d", r);
+}
+
+/*===========================================================================*
+ * thread_cleanup *
+ *===========================================================================*/
+PRIVATE void thread_cleanup_f(struct fproc *rfp, char *f, int l)
+{
+/* Clean up worker thread. Skip parts if this thread is not associated
+ * with a particular process (i.e., rfp is NULL) */
+
+ if (verbose) printf("AVFS: thread %d is cleaning up for fp=%p (%s:%d)\n",
+ mthread_self(), rfp, f, l);
+
+ assert(mthread_self() != -1);
+
+#if LOCK_DEBUG
+ if (rfp != NULL) {
+ check_filp_locks_by_me();
+ check_vnode_locks_by_me(rfp);
+ check_vmnt_locks_by_me(rfp);
+ }
+#endif
+
+ if (rfp != NULL && rfp->fp_flags & FP_PM_PENDING) { /* Postponed PM call */
+ m_in = rfp->fp_job.j_m_in;
+ rfp->fp_flags &= ~FP_PM_PENDING;
+ service_pm_postponed();
+ }
+
+#if LOCK_DEBUG
+ if (rfp != NULL) {
+ check_filp_locks_by_me();
+ check_vnode_locks_by_me(rfp);
+ check_vmnt_locks_by_me(rfp);
+ }
+#endif
+
+ if (rfp != NULL) unlock_proc(rfp);
+
+#if 0
+ mthread_exit(NULL);
+#endif
+}
+
+/*===========================================================================*
+ * get_work *
+ *===========================================================================*/
+PRIVATE void get_work()
+{
+ /* Normally wait for new input. However, if 'reviving' is
+ * nonzero, a suspended process must be awakened.
+ */
+ int r, found_one, proc_p;
+ register struct fproc *rp;
+
+ if (verbose) printf("VFS: get_work looking for work\n");
+
+ while (reviving != 0) {
+ found_one = FALSE;
+
+ /* Find a suspended process. */
+ for (rp = &fproc[0]; rp < &fproc[NR_PROCS]; rp++)
+ if (rp->fp_pid != PID_FREE && (rp->fp_flags & FP_REVIVED)) {
+ found_one = TRUE; /* Found a suspended process */
+ if (unblock(rp))
+ return; /* So main loop can process job */
+ send_work();
+ }
+
+ if (!found_one) /* Consistency error */
+ panic("VFS: get_work couldn't revive anyone");
+ }
+
+ for(;;) {
+ /* Normal case. No one to revive. Get a useful request. */
+ if ((r = sef_receive(ANY, &m_in)) != OK) {
+ panic("VFS: sef_receive error: %d", r);
+ }
+
+ proc_p = _ENDPOINT_P(m_in.m_source);
+ if (proc_p < 0) fp = NULL;
+ else fp = &fproc[proc_p];
+
+ if (m_in.m_type == EDEADSRCDST) return; /* Failed 'sendrec' */
+
+ if (verbose) printf("AVFS: got work from %d (fp=%p)\n", m_in.m_source,
+ fp);
+
+ /* Negative who_p is never used to access the fproc array. Negative
+ * numbers (kernel tasks) are treated in a special way.
+ */
+ if (who_p >= (int)(sizeof(fproc) / sizeof(struct fproc)))
+ panic("receive process out of range: %d", who_p);
+ if (who_p >= 0 && fproc[who_p].fp_endpoint == NONE) {
+ printf("VFS: ignoring request from %d, endpointless slot %d (%d)\n",
+ m_in.m_source, who_p, m_in.m_type);
+ continue;
+ }
+
+ /* Internal consistency check; our mental image of process numbers and
+ * endpoints must match with how the rest of the system thinks of them.
+ */
+ if (who_p >= 0 && fproc[who_p].fp_endpoint != who_e) {
+ if (fproc[who_p].fp_endpoint == NONE)
+ printf("slot unknown even\n");
+
+ printf("VFS: receive endpoint inconsistent (source %d, who_p "
+ "%d, stored ep %d, who_e %d).\n", m_in.m_source, who_p,
+ fproc[who_p].fp_endpoint, who_e);
+ panic("VFS: inconsistent endpoint ");
+ }
+
+ return;
+ }
+}
+
+
+/*===========================================================================*
+ * reply *
+ *===========================================================================*/
+PUBLIC void reply(whom, result)
+int whom; /* process to reply to */
+int result; /* result of the call (usually OK or error #) */
+{
+/* Send a reply to a user process. If the send fails, just ignore it. */
+ int r;
+
+ m_out.reply_type = result;
+ r = sendnb(whom, &m_out);
+ if (r != OK) {
+ printf("VFS: couldn't send reply %d to %d: %d\n", result, whom, r);
+ panic("Yikes %d", call_nr);
+ }
+}
+
+/*===========================================================================*
+ * service_pm_postponed *
+ *===========================================================================*/
+PRIVATE void service_pm_postponed(void)
+{
+ int r;
+ vir_bytes pc;
+
+#if 0
+ printf("executing postponed: ");
+ if (call_nr == PM_EXEC) printf("PM_EXEC");
+ if (call_nr == PM_EXIT) printf("PM_EXIT");
+ if (call_nr == PM_DUMPCORE) printf("PM_DUMPCORE");
+ printf("\n");
+#endif
+
+ switch(call_nr) {
+ case PM_EXEC:
+ r = pm_exec(m_in.PM_PROC, m_in.PM_PATH, m_in.PM_PATH_LEN,
+ m_in.PM_FRAME, m_in.PM_FRAME_LEN, &pc);
+
+ /* Reply status to PM */
+ m_out.m_type = PM_EXEC_REPLY;
+ m_out.PM_PROC = m_in.PM_PROC;
+ m_out.PM_PC = (void*)pc;
+ m_out.PM_STATUS = r;
+
+ break;
+
+ case PM_EXIT:
+ pm_exit(m_in.PM_PROC);
+
+ /* Reply dummy status to PM for synchronization */
+ m_out.m_type = PM_EXIT_REPLY;
+ m_out.PM_PROC = m_in.PM_PROC;
+
+ break;
+
+ case PM_DUMPCORE:
+ r = pm_dumpcore(m_in.PM_PROC,
+ NULL /* (struct mem_map *) m_in.PM_SEGPTR */);
+
+ /* Reply status to PM */
+ m_out.m_type = PM_CORE_REPLY;
+ m_out.PM_PROC = m_in.PM_PROC;
+ m_out.PM_STATUS = r;
+
+ break;
+
+ default:
+ panic("Unhandled postponed PM call %d", m_in.m_type);
+ }
+
+ r = send(PM_PROC_NR, &m_out);
+ if (r != OK)
+ panic("service_pm_postponed: send failed: %d", r);
+}
+
+/*===========================================================================*
+ * service_pm *
+ *===========================================================================*/
+PRIVATE void service_pm()
+{
+ int r, slot;
+
+ if (verbose) printf("service_pm: %d (%d)\n", call_nr, mthread_self());
+ switch (call_nr) {
+ case PM_SETUID:
+ pm_setuid(m_in.PM_PROC, m_in.PM_EID, m_in.PM_RID);
+
+ m_out.m_type = PM_SETUID_REPLY;
+ m_out.PM_PROC = m_in.PM_PROC;
+
+ break;
+
+ case PM_SETGID:
+ pm_setgid(m_in.PM_PROC, m_in.PM_EID, m_in.PM_RID);
+
+ m_out.m_type = PM_SETGID_REPLY;
+ m_out.PM_PROC = m_in.PM_PROC;
+
+ break;
+
+ case PM_SETSID:
+ pm_setsid(m_in.PM_PROC);
+
+ m_out.m_type = PM_SETSID_REPLY;
+ m_out.PM_PROC = m_in.PM_PROC;
+
+ break;
+
+ case PM_EXEC:
+ case PM_EXIT:
+ case PM_DUMPCORE:
+ okendpt(m_in.PM_PROC, &slot);
+ fp = &fproc[slot];
+
+ assert(!(fp->fp_flags & FP_PENDING));
+ fp->fp_job.j_m_in = m_in;
+ fp->fp_flags |= FP_PM_PENDING;
+
+#if 0
+ printf("Postponing: ");
+ if (call_nr == PM_EXEC) printf("PM_EXEC");
+ if (call_nr == PM_EXIT) printf("PM_EXIT");
+ if (call_nr == PM_DUMPCORE) printf("PM_DUMPCORE");
+ printf("\n");
+#endif
+
+ /* PM requests on behalf of a proc are handled after the system call
+ * that might be in progress for that proc has finished. If the proc
+ * is not busy, we start a dummy call */
+ if (!(fp->fp_flags & FP_PENDING) && mutex_trylock(&fp->fp_lock) == 0) {
+ mutex_unlock(&fp->fp_lock);
+ worker_start(do_dummy);
+ yield();
+ }
+
+ return;
+
+ case PM_FORK:
+ case PM_SRV_FORK:
+ pm_fork(m_in.PM_PPROC, m_in.PM_PROC, m_in.PM_CPID);
+
+ m_out.m_type = (call_nr == PM_FORK) ? PM_FORK_REPLY : PM_SRV_FORK_REPLY;
+ m_out.PM_PROC = m_in.PM_PROC;
+
+ break;
+ case PM_SETGROUPS:
+ pm_setgroups(m_in.PM_PROC, m_in.PM_GROUP_NO, m_in.PM_GROUP_ADDR);
+
+ m_out.m_type = PM_SETGROUPS_REPLY;
+ m_out.PM_PROC = m_in.PM_PROC;
+
+ break;
+
+ case PM_UNPAUSE:
+ unpause(m_in.PM_PROC);
+
+ m_out.m_type = PM_UNPAUSE_REPLY;
+ m_out.PM_PROC = m_in.PM_PROC;
+
+ break;
+
+ case PM_REBOOT:
+ pm_reboot();
+
+ /* Reply dummy status to PM for synchronization */
+ m_out.m_type = PM_REBOOT_REPLY;
+
+ break;
+
+ default:
+ printf("VFS: don't know how to handle PM request %d\n", call_nr);
+
+ return;
+ }
+
+ r = send(PM_PROC_NR, &m_out);
+ if (r != OK)
+ panic("service_pm: send failed: %d", r);
+
+}
+
+
+/*===========================================================================*
+ * unblock *
+ *===========================================================================*/
+PRIVATE int unblock(rfp)
+struct fproc *rfp;
+{
+ int blocked_on;
+
+ fp = rfp;
+ blocked_on = rfp->fp_blocked_on;
+ m_in.m_type = rfp->fp_block_callnr;
+ m_in.fd = rfp->fp_block_fd;
+ m_in.buffer = rfp->fp_buffer;
+ m_in.nbytes = rfp->fp_nbytes;
+
+ rfp->fp_blocked_on = FP_BLOCKED_ON_NONE; /* no longer blocked */
+ rfp->fp_flags &= ~FP_REVIVED;
+ reviving--;
+ assert(reviving >= 0);
+
+ /* This should be a pipe I/O, not a device I/O. If it is, it'll 'leak'
+ * grants.
+ */
+ assert(!GRANT_VALID(rfp->fp_grant));
+
+ /* Pending pipe reads/writes can be handled directly */
+ if (blocked_on == FP_BLOCKED_ON_PIPE) {
+ worker_start(do_pending_pipe);
+ yield(); /* Give thread a chance to run */
+ return(0); /* Retrieve more work */
+ }
+
+ return(1); /* We've unblocked a process */
+}
--- /dev/null
+/* This file contains a collection of miscellaneous procedures. Some of them
+ * perform simple system calls. Some others do a little part of system calls
+ * that are mostly performed by the Memory Manager.
+ *
+ * The entry points into this file are
+ * do_dup: perform the DUP system call
+ * do_fcntl: perform the FCNTL system call
+ * do_sync: perform the SYNC system call
+ * do_fsync: perform the FSYNC system call
+ * pm_reboot: sync disks and prepare for shutdown
+ * pm_fork: adjust the tables after PM has performed a FORK system call
+ * do_exec: handle files with FD_CLOEXEC on after PM has done an EXEC
+ * do_exit: a process has exited; note that in the tables
+ * do_set: set uid or gid for some process
+ * do_revive: revive a process that was waiting for something (e.g. TTY)
+ * do_svrctl: file system control
+ * do_getsysinfo: request copy of FS data structure
+ * pm_dumpcore: create a core dump
+ */
+
+#include "fs.h"
+#include <fcntl.h>
+#include <assert.h>
+#include <unistd.h>
+#include <string.h>
+#include <minix/callnr.h>
+#include <minix/safecopies.h>
+#include <minix/endpoint.h>
+#include <minix/com.h>
+#include <minix/sysinfo.h>
+#include <minix/u64.h>
+#include <sys/ptrace.h>
+#include <sys/svrctl.h>
+#include "file.h"
+#include "fproc.h"
+#include "dmap.h"
+#include <minix/vfsif.h>
+#include "vnode.h"
+#include "vmnt.h"
+#include "param.h"
+
+#define CORE_NAME "core"
+#define CORE_MODE 0777 /* mode to use on core image files */
+
+#if ENABLE_SYSCALL_STATS
+PUBLIC unsigned long calls_stats[NCALLS];
+#endif
+
+FORWARD _PROTOTYPE( void free_proc, (struct fproc *freed, int flags) );
+/*
+FORWARD _PROTOTYPE( int dumpcore, (int proc_e, struct mem_map *seg_ptr) );
+FORWARD _PROTOTYPE( int write_bytes, (struct inode *rip, off_t off,
+ char *buf, size_t bytes) );
+FORWARD _PROTOTYPE( int write_seg, (struct inode *rip, off_t off, int proc_e,
+ int seg, off_t seg_off, phys_bytes seg_bytes) );
+*/
+
+/*===========================================================================*
+ * do_getsysinfo *
+ *===========================================================================*/
+PUBLIC int do_getsysinfo()
+{
+ vir_bytes src_addr, dst_addr;
+ size_t len;
+
+ /* Only su may call do_getsysinfo. This call may leak information (and is not
+ * stable enough to be part of the API/ABI).
+ */
+
+ if (!super_user) return(EPERM);
+
+ /* This call should no longer be used by user applications. In the future,
+ * requests from non-system processes should be denied. For now, just warn.
+ */
+ if (call_nr == GETSYSINFO) {
+ printf("VFS: obsolete call of do_getsysinfo() by proc %d\n",
+ fp->fp_endpoint);
+ }
+
+ switch(m_in.info_what) {
+ case SI_PROC_TAB:
+ src_addr = (vir_bytes) fproc;
+ len = sizeof(struct fproc) * NR_PROCS;
+ break;
+ case SI_DMAP_TAB:
+ src_addr = (vir_bytes) dmap;
+ len = sizeof(struct dmap) * NR_DEVICES;
+ break;
+#if ENABLE_SYSCALL_STATS
+ case SI_CALL_STATS:
+ src_addr = (vir_bytes) calls_stats;
+ len = sizeof(calls_stats);
+ break;
+#endif
+ default:
+ return(EINVAL);
+ }
+
+ dst_addr = (vir_bytes) m_in.info_where;
+ return sys_datacopy(SELF, src_addr, who_e, dst_addr, len);
+}
+
+/*===========================================================================*
+ * do_dup *
+ *===========================================================================*/
+PUBLIC int do_dup()
+{
+/* Perform the dup(fd) or dup2(fd,fd2) system call. These system calls are
+ * obsolete. In fact, it is not even possible to invoke them using the
+ * current library because the library routines call fcntl(). They are
+ * provided to permit old binary programs to continue to run.
+ */
+
+ register int rfd;
+ register struct filp *f;
+ int r = OK;
+
+ /* Is the file descriptor valid? */
+ rfd = m_in.fd & ~DUP_MASK; /* kill off dup2 bit, if on */
+ if ((f = get_filp(rfd, VNODE_READ)) == NULL) return(err_code);
+
+ /* Distinguish between dup and dup2. */
+ if (m_in.fd == rfd) { /* bit not on */
+ /* dup(fd) */
+ r = get_fd(0, 0, &m_in.fd2, NULL);
+ } else {
+ /* dup2(old_fd, new_fd) */
+ if (m_in.fd2 < 0 || m_in.fd2 >= OPEN_MAX) {
+ r = EBADF;
+ } else if (rfd == m_in.fd2) { /* ignore the call: dup2(x, x) */
+ r = m_in.fd2;
+ } else {
+ /* All is fine, close new_fd if necessary */
+ m_in.fd = m_in.fd2; /* prepare to close fd2 */
+ unlock_filp(f); /* or it might deadlock on do_close */
+ (void) do_close(); /* cannot fail */
+ f = get_filp(rfd, VNODE_READ); /* lock old_fd again */
+ }
+ }
+
+ if (r == OK) {
+ /* Success. Set up new file descriptors. */
+ f->filp_count++;
+ fp->fp_filp[m_in.fd2] = f;
+ FD_SET(m_in.fd2, &fp->fp_filp_inuse);
+ r = m_in.fd2;
+ }
+
+ unlock_filp(f);
+ return(r);
+}
+
+/*===========================================================================*
+ * do_fcntl *
+ *===========================================================================*/
+PUBLIC int do_fcntl()
+{
+/* Perform the fcntl(fd, request, ...) system call. */
+
+ register struct filp *f;
+ int new_fd, fl, r = OK;
+ tll_access_t locktype;
+
+ /* Is the file descriptor valid? */
+ locktype = (m_in.request == F_FREESP) ? VNODE_WRITE : VNODE_READ;
+ if ((f = get_filp(m_in.fd, locktype)) == NULL) return(err_code);
+
+ switch (m_in.request) {
+ case F_DUPFD:
+ /* This replaces the old dup() system call. */
+ if (m_in.addr < 0 || m_in.addr >= OPEN_MAX) r = EINVAL;
+ else if ((r = get_fd(m_in.addr, 0, &new_fd, NULL)) == OK) {
+ f->filp_count++;
+ fp->fp_filp[new_fd] = f;
+ r = new_fd;
+ }
+ break;
+
+ case F_GETFD:
+ /* Get close-on-exec flag (FD_CLOEXEC in POSIX Table 6-2). */
+ r = FD_ISSET(m_in.fd, &fp->fp_cloexec_set) ? FD_CLOEXEC : 0;
+ break;
+
+ case F_SETFD:
+ /* Set close-on-exec flag (FD_CLOEXEC in POSIX Table 6-2). */
+ if(m_in.addr & FD_CLOEXEC)
+ FD_SET(m_in.fd, &fp->fp_cloexec_set);
+ else
+ FD_CLR(m_in.fd, &fp->fp_cloexec_set);
+ break;
+
+ case F_GETFL:
+ /* Get file status flags (O_NONBLOCK and O_APPEND). */
+ fl = f->filp_flags & (O_NONBLOCK | O_APPEND | O_ACCMODE);
+ r = fl;
+ break;
+
+ case F_SETFL:
+ /* Set file status flags (O_NONBLOCK and O_APPEND). */
+ fl = O_NONBLOCK | O_APPEND | O_REOPEN;
+ f->filp_flags = (f->filp_flags & ~fl) | (m_in.addr & fl);
+ break;
+
+ case F_GETLK:
+ case F_SETLK:
+ case F_SETLKW:
+ /* Set or clear a file lock. */
+ r = lock_op(f, m_in.request);
+ break;
+
+ case F_FREESP:
+ {
+ /* Free a section of a file. Preparation is done here, actual freeing
+ * in freesp_inode().
+ */
+ off_t start, end;
+ struct flock flock_arg;
+ signed long offset;
+
+ /* Check if it's a regular file. */
+ if ((f->filp_vno->v_mode & I_TYPE) != I_REGULAR) r = EINVAL;
+ else if (!(f->filp_mode & W_BIT)) r = EBADF;
+ else
+ /* Copy flock data from userspace. */
+ r = sys_datacopy(who_e, (vir_bytes) m_in.name1, SELF,
+ (vir_bytes) &flock_arg,
+ (phys_bytes) sizeof(flock_arg));
+
+ if (r != OK) break;
+
+ /* Convert starting offset to signed. */
+ offset = (signed long) flock_arg.l_start;
+
+ /* Figure out starting position base. */
+ switch(flock_arg.l_whence) {
+ case SEEK_SET: start = 0; break;
+ case SEEK_CUR:
+ if (ex64hi(f->filp_pos) != 0)
+ panic("do_fcntl: position in file too high");
+ start = ex64lo(f->filp_pos);
+ break;
+ case SEEK_END: start = f->filp_vno->v_size; break;
+ default: r = EINVAL;
+ }
+ if (r != OK) break;
+
+ /* Check for overflow or underflow. */
+ if (offset > 0 && start + offset < start) r = EINVAL;
+ else if (offset < 0 && start + offset > start) r = EINVAL;
+ else {
+ start += offset;
+ if (start < 0) r = EINVAL;
+ }
+ if (r != OK) break;
+
+ if (flock_arg.l_len != 0) {
+ if (start >= f->filp_vno->v_size) r = EINVAL;
+ else if ((end = start + flock_arg.l_len) <= start) r = EINVAL;
+ else if (end > f->filp_vno->v_size) end = f->filp_vno->v_size;
+ } else {
+ end = 0;
+ }
+ if (r != OK) break;
+
+ r = req_ftrunc(f->filp_vno->v_fs_e, f->filp_vno->v_inode_nr,start,end);
+
+ if (r == OK && flock_arg.l_len == 0)
+ f->filp_vno->v_size = start;
+
+ break;
+ }
+
+ default:
+ r = EINVAL;
+ }
+
+ unlock_filp(f);
+ return(r);
+}
+
+/*===========================================================================*
+ * do_sync *
+ *===========================================================================*/
+PUBLIC int do_sync()
+{
+ struct vmnt *vmp;
+ for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) {
+ lock_vmnt(vmp, VMNT_EXCL);
+ if (vmp->m_dev != NO_DEV && vmp->m_fs_e != NONE)
+ req_sync(vmp->m_fs_e);
+ unlock_vmnt(vmp);
+ }
+
+ return(OK);
+}
+
+/*===========================================================================*
+ * do_fsync *
+ *===========================================================================*/
+PUBLIC int do_fsync()
+{
+/* Perform the fsync() system call. For now, don't be unnecessarily smart. */
+ struct filp *rfilp;
+ struct vmnt *vmp;
+ dev_t dev;
+
+ if ((rfilp = get_filp(m_in.m1_i1, VNODE_READ)) == NULL) return(err_code);
+ dev = rfilp->filp_vno->v_dev;
+ for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) {
+ lock_vmnt(vmp, VMNT_EXCL);
+ if (vmp->m_dev != NO_DEV && vmp->m_dev == dev && vmp->m_fs_e != NONE)
+ req_sync(vmp->m_fs_e);
+ unlock_vmnt(vmp);
+ }
+
+ unlock_filp(rfilp);
+
+ return(OK);
+}
+
+/*===========================================================================*
+ * pm_reboot *
+ *===========================================================================*/
+PUBLIC void pm_reboot()
+{
+ /* Perform the VFS side of the reboot call. */
+ int i;
+ struct fproc *rfp;
+
+ do_sync();
+
+ /* Do exit processing for all leftover processes and servers,
+ * but don't actually exit them (if they were really gone, PM
+ * will tell us about it).
+ */
+ for (i = 0; i < NR_PROCS; i++) {
+ /* Don't just free the proc right away, but let it finish what it was
+ * doing first */
+ rfp = &fproc[i];
+ if (rfp->fp_endpoint != NONE) {
+ lock_proc(rfp, 0);
+ free_proc(rfp, 0);
+ unlock_proc(rfp);
+ }
+ }
+
+ unmount_all();
+}
+
+/*===========================================================================*
+ * pm_fork *
+ *===========================================================================*/
+PUBLIC void pm_fork(pproc, cproc, cpid)
+int pproc; /* Parent process */
+int cproc; /* Child process */
+int cpid; /* Child process id */
+{
+/* Perform those aspects of the fork() system call that relate to files.
+ * In particular, let the child inherit its parent's file descriptors.
+ * The parent and child parameters tell who forked off whom. The file
+ * system uses the same slot numbers as the kernel. Only PM makes this call.
+ */
+
+ register struct fproc *cp, *pp;
+ int i, parentno, childno;
+ mutex_t c_fp_lock;
+
+ /* Check up-to-dateness of fproc. */
+ okendpt(pproc, &parentno);
+
+ /* PM gives child endpoint, which implies process slot information.
+ * Don't call isokendpt, because that will verify if the endpoint
+ * number is correct in fproc, which it won't be.
+ */
+ childno = _ENDPOINT_P(cproc);
+ if (childno < 0 || childno >= NR_PROCS)
+ panic("VFS: bogus child for forking: %d", m_in.child_endpt);
+ if (fproc[childno].fp_pid != PID_FREE)
+ panic("VFS: forking on top of in-use child: %d", childno);
+
+ /* Copy the parent's fproc struct to the child. */
+ /* However, the mutex variables belong to a slot and must stay the same. */
+ c_fp_lock = fproc[childno].fp_lock;
+ fproc[childno] = fproc[parentno];
+ fproc[childno].fp_lock = c_fp_lock;
+
+ /* Increase the counters in the 'filp' table. */
+ cp = &fproc[childno];
+ pp = &fproc[parentno];
+
+ for (i = 0; i < OPEN_MAX; i++)
+ if (cp->fp_filp[i] != NULL) cp->fp_filp[i]->filp_count++;
+
+ /* Fill in new process and endpoint id. */
+ cp->fp_pid = cpid;
+ cp->fp_endpoint = cproc;
+
+ /* A forking process never has an outstanding grant, as it isn't blocking on
+ * I/O. */
+ if(GRANT_VALID(pp->fp_grant)) {
+ panic("VFS: fork: pp (endpoint %d) has grant %d\n", pp->fp_endpoint,
+ pp->fp_grant);
+ }
+ if(GRANT_VALID(cp->fp_grant)) {
+ panic("VFS: fork: cp (endpoint %d) has grant %d\n", cp->fp_endpoint,
+ cp->fp_grant);
+ }
+
+ /* A child is not a process leader, not being revived, etc. */
+ cp->fp_flags = FP_NOFLAGS;
+
+ /* Record the fact that both root and working dir have another user. */
+ if (cp->fp_rd) dup_vnode(cp->fp_rd);
+ if (cp->fp_wd) dup_vnode(cp->fp_wd);
+}
+
+/*===========================================================================*
+ * free_proc *
+ *===========================================================================*/
+PRIVATE void free_proc(struct fproc *exiter, int flags)
+{
+ int i;
+ register struct fproc *rfp;
+ register struct filp *rfilp;
+ register struct vnode *vp;
+ dev_t dev;
+
+ if (exiter->fp_endpoint == NONE)
+ panic("free_proc: already free");
+
+ if (fp_is_blocked(exiter))
+ unpause(exiter->fp_endpoint);
+
+ /* Loop on file descriptors, closing any that are open. */
+ for (i = 0; i < OPEN_MAX; i++) {
+ (void) close_fd(exiter, i);
+ }
+
+ /* Check if any process is SUSPENDed on this driver.
+ * If a driver exits, unmap its entries in the dmap table.
+ * (unmapping has to be done after the first step, because the
+ * dmap table is used in the first step.)
+ */
+ unsuspend_by_endpt(exiter->fp_endpoint);
+
+ /* Release root and working directories. */
+ if (exiter->fp_rd) { put_vnode(exiter->fp_rd); exiter->fp_rd = NULL; }
+ if (exiter->fp_wd) { put_vnode(exiter->fp_wd); exiter->fp_wd = NULL; }
+
+ /* The rest of these actions is only done when processes actually exit. */
+ if (!(flags & FP_EXITING)) return;
+
+ /* Invalidate endpoint number for error and sanity checks. */
+ exiter->fp_endpoint = NONE;
+ exiter->fp_flags |= FP_EXITING;
+
+ /* If a session leader exits and it has a controlling tty, then revoke
+ * access to its controlling tty from all other processes using it.
+ */
+ if ((exiter->fp_flags & FP_SESLDR) && exiter->fp_tty != 0) {
+ dev = exiter->fp_tty;
+ for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+ if(rfp->fp_pid == PID_FREE) continue;
+ if (rfp->fp_tty == dev) rfp->fp_tty = 0;
+
+ for (i = 0; i < OPEN_MAX; i++) {
+ if ((rfilp = rfp->fp_filp[i]) == NULL) continue;
+ if (rfilp->filp_mode == FILP_CLOSED) continue;
+ vp = rfilp->filp_vno;
+ if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL) continue;
+ if ((dev_t) vp->v_sdev != dev) continue;
+ lock_filp(rfilp, VNODE_READ);
+ (void) dev_close(dev, rfilp-filp); /* Ignore any errors, even
+ * SUSPEND. */
+
+ rfilp->filp_mode = FILP_CLOSED;
+ unlock_filp(rfilp);
+ }
+ }
+ }
+
+ /* Exit done. Mark slot as free. */
+ exiter->fp_pid = PID_FREE;
+ if (exiter->fp_flags & FP_PENDING)
+ pending--; /* No longer pending job, not going to do it */
+ exiter->fp_flags = FP_NOFLAGS;
+}
+
+/*===========================================================================*
+ * pm_exit *
+ *===========================================================================*/
+PUBLIC void pm_exit(proc)
+int proc;
+{
+/* Perform the file system portion of the exit(status) system call. */
+ int exitee_p;
+
+ /* Nevertheless, pretend that the call came from the user. */
+ okendpt(proc, &exitee_p);
+ fp = &fproc[exitee_p];
+ free_proc(fp, FP_EXITING);
+}
+
+/*===========================================================================*
+ * pm_setgid *
+ *===========================================================================*/
+PUBLIC void pm_setgid(proc_e, egid, rgid)
+int proc_e;
+int egid;
+int rgid;
+{
+ register struct fproc *tfp;
+ int slot;
+
+ okendpt(proc_e, &slot);
+ tfp = &fproc[slot];
+
+ tfp->fp_effgid = egid;
+ tfp->fp_realgid = rgid;
+}
+
+
+/*===========================================================================*
+ * pm_setgroups *
+ *===========================================================================*/
+PUBLIC void pm_setgroups(proc_e, ngroups, groups)
+int proc_e;
+int ngroups;
+gid_t *groups;
+{
+ struct fproc *rfp;
+ int slot;
+
+ okendpt(proc_e, &slot);
+ rfp = &fproc[slot];
+ if (ngroups * sizeof(gid_t) > sizeof(rfp->fp_sgroups))
+ panic("VFS: pm_setgroups: too much data to copy");
+ if (sys_datacopy(who_e, (vir_bytes) groups, SELF, (vir_bytes) rfp->fp_sgroups,
+ ngroups * sizeof(gid_t)) == OK) {
+ rfp->fp_ngroups = ngroups;
+ } else
+ panic("VFS: pm_setgroups: datacopy failed");
+}
+
+
+/*===========================================================================*
+ * pm_setuid *
+ *===========================================================================*/
+PUBLIC void pm_setuid(proc_e, euid, ruid)
+int proc_e;
+int euid;
+int ruid;
+{
+ struct fproc *tfp;
+ int slot;
+
+ okendpt(proc_e, &slot);
+ tfp = &fproc[slot];
+
+ tfp->fp_effuid = euid;
+ tfp->fp_realuid = ruid;
+}
+
+/*===========================================================================*
+ * do_svrctl *
+ *===========================================================================*/
+PUBLIC int do_svrctl()
+{
+ switch (m_in.svrctl_req) {
+ /* No control request implemented yet. */
+ default:
+ return(EINVAL);
+ }
+}
+
+/*===========================================================================*
+ * pm_dumpcore *
+ *===========================================================================*/
+PUBLIC int pm_dumpcore(proc_e, seg_ptr)
+int proc_e;
+struct mem_map *seg_ptr;
+{
+ int slot;
+
+ okendpt(proc_e, &slot);
+ free_proc(&fproc[slot], FP_EXITING);
+ return(OK);
+}
+
+/*===========================================================================*
+ * ds_event *
+ *===========================================================================*/
+PUBLIC void ds_event()
+{
+ char key[DS_MAX_KEYLEN];
+ char *drv_prefix = "drv.vfs.";
+ u32_t value;
+ int type, r;
+ endpoint_t owner_endpoint;
+
+ /* Get the event and the owner from DS. */
+ if ((r = ds_check(key, &type, &owner_endpoint)) != OK) {
+ if(r != ENOENT) printf("VFS: ds_event: ds_check failed: %d\n", r);
+ return;
+ }
+ if ((r = ds_retrieve_u32(key, &value)) != OK) {
+ printf("VFS: ds_event: ds_retrieve_u32 failed\n");
+ return;
+ }
+
+ /* Only check for VFS driver up events. */
+ if (strncmp(key, drv_prefix, sizeof(drv_prefix)) || value != DS_DRIVER_UP)
+ return;
+
+ /* Perform up. */
+ dmap_endpt_up(owner_endpoint);
+}
--- /dev/null
+/* This file performs the MOUNT and UMOUNT system calls.
+ *
+ * The entry points into this file are
+ * do_fsready: perform the FS_READY system call
+ * do_mount: perform the MOUNT system call
+ * do_umount: perform the UMOUNT system call
+ * unmount: unmount a file system
+ */
+
+#include "fs.h"
+#include <fcntl.h>
+#include <string.h>
+#include <minix/callnr.h>
+#include <minix/com.h>
+#include <minix/keymap.h>
+#include <minix/const.h>
+#include <minix/endpoint.h>
+#include <minix/syslib.h>
+#include <minix/bitmap.h>
+#include <minix/ds.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <dirent.h>
+#include <assert.h>
+#include "file.h"
+#include "fproc.h"
+#include "dmap.h"
+#include <minix/vfsif.h>
+#include "vnode.h"
+#include "vmnt.h"
+#include "path.h"
+#include "param.h"
+
+/* Allow the root to be replaced before the first 'real' mount. */
+PRIVATE int have_root = 0;
+
+/* Bitmap of in-use "none" pseudo devices. */
+PRIVATE bitchunk_t nonedev[BITMAP_CHUNKS(NR_NONEDEVS)] = { 0 };
+
+#define alloc_nonedev(dev) SET_BIT(nonedev, minor(dev) - 1)
+#define free_nonedev(dev) UNSET_BIT(nonedev, minor(dev) - 1)
+
+FORWARD _PROTOTYPE( dev_t name_to_dev, (int allow_mountpt,
+ char path[PATH_MAX+1]) );
+FORWARD _PROTOTYPE( int is_nonedev, (dev_t dev) );
+FORWARD _PROTOTYPE( dev_t find_free_nonedev, (void) );
+FORWARD _PROTOTYPE( void update_bspec, (dev_t dev, endpoint_t fs_e,
+ int send_drv_e) );
+
+/*===========================================================================*
+ * update_bspec *
+ *===========================================================================*/
+PRIVATE void update_bspec(dev_t dev, endpoint_t fs_e, int send_drv_e)
+{
+/* Update all block special files for a certain device, to use a new FS endpt
+ * to route raw block I/O requests through.
+ */
+ struct vnode *vp;
+ struct dmap *dp;
+ int r, major;
+
+ for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp)
+ if (vp->v_ref_count > 0 && S_ISBLK(vp->v_mode) && vp->v_sdev == dev) {
+ vp->v_bfs_e = fs_e;
+ if (send_drv_e) {
+ major = major(dev);
+ if (major < 0 || major >= NR_DEVICES) {
+ /* Can't update driver endpoint for out of
+ * range major */
+ continue;
+ }
+ dp = &dmap[major(dev)];
+ if (dp->dmap_driver == NONE) {
+ /* Can't send new driver endpoint for
+ * vanished driver */
+ printf("VFS: can't send new driver endpt\n");
+ continue;
+ }
+
+ if ((r = req_newdriver(fs_e, vp->v_sdev,
+ dp->dmap_driver)) != OK) {
+ printf("VFS: Failed to send new driver endpoint"
+ " for moved block special file\n");
+ }
+ }
+ }
+}
+
+/*===========================================================================*
+ * do_fsready *
+ *===========================================================================*/
+PUBLIC int do_fsready()
+{
+ /* deprecated */
+ return(SUSPEND);
+}
+
+/*===========================================================================*
+ * do_mount *
+ *===========================================================================*/
+PUBLIC int do_mount()
+{
+/* Perform the mount(name, mfile, mount_flags) system call. */
+ endpoint_t fs_e;
+ int r, slot, rdonly, nodev;
+ char fullpath[PATH_MAX+1];
+ char mount_label[LABEL_MAX];
+ dev_t dev;
+
+ /* Only the super-user may do MOUNT. */
+ if (!super_user) return(EPERM);
+
+ /* FS process' endpoint number */
+ if (m_in.mount_flags & MS_LABEL16) {
+ /* Get the label from the caller, and ask DS for the endpoint. */
+ r = sys_datacopy(who_e, (vir_bytes) m_in.fs_label, SELF,
+ (vir_bytes) mount_label, (phys_bytes) sizeof(mount_label));
+ if (r != OK) return(r);
+
+ mount_label[sizeof(mount_label)-1] = 0;
+
+ r = ds_retrieve_label_endpt(mount_label, &fs_e);
+ if (r != OK) return(r);
+ } else {
+ /* Legacy support: get the endpoint from the request itself. */
+ fs_e = (endpoint_t) m_in.fs_label;
+ mount_label[0] = 0;
+ }
+
+ /* Sanity check on process number. */
+ if (isokendpt(fs_e, &slot) != OK) return(EINVAL);
+
+ /* Should the file system be mounted read-only? */
+ rdonly = (m_in.mount_flags & MS_RDONLY);
+
+ /* A null string for block special device means don't use a device at all. */
+ nodev = (m_in.name1_length == 0);
+ if (!nodev) {
+ /* If 'name' is not for a block special file, return error. */
+ if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
+ return(err_code);
+ if ((dev = name_to_dev(FALSE /*allow_mountpt*/, fullpath)) == NO_DEV)
+ return(err_code);
+ } else {
+ /* Find a free pseudo-device as substitute for an actual device. */
+ if ((dev = find_free_nonedev()) == NO_DEV)
+ return(err_code);
+ }
+
+ /* Fetch the name of the mountpoint */
+ if (fetch_name(m_in.name2, m_in.name2_length, M1, fullpath) != OK)
+ return(err_code);
+
+ /* Do the actual job */
+ return mount_fs(dev, fullpath, fs_e, rdonly, mount_label);
+}
+
+
+/*===========================================================================*
+ * mount_fs *
+ *===========================================================================*/
+PUBLIC int mount_fs(
+dev_t dev,
+char mountpoint[PATH_MAX+1],
+endpoint_t fs_e,
+int rdonly,
+char mount_label[LABEL_MAX] )
+{
+ int rdir, mdir; /* TRUE iff {root|mount} file is dir */
+ int i, r = OK, found, isroot, mount_root;
+ struct fproc *tfp;
+ struct dmap *dp;
+ struct vnode *root_node, *vp = NULL, *bspec;
+ struct vmnt *new_vmp, *parent_vmp;
+ char *label;
+ struct node_details res;
+ struct lookup resolve;
+
+ /* Look up block device driver label when dev is not a pseudo-device */
+ label = "";
+ if (!is_nonedev(dev)) {
+ /* Get driver process' endpoint */
+ dp = &dmap[major(dev)];
+ if (dp->dmap_driver == NONE) {
+ printf("VFS: no driver for dev %d\n", dev);
+ return(EINVAL);
+ }
+
+ label = dp->dmap_label;
+ assert(strlen(label) > 0);
+ }
+
+ lock_bsf();
+
+ /* Check whether there is a block special file open which uses the
+ * same device (partition) */
+ for (bspec = &vnode[0]; bspec < &vnode[NR_VNODES]; ++bspec) {
+ if (bspec->v_ref_count > 0 && bspec->v_sdev == dev) {
+ /* Found, flush and invalidate any blocks for this device. */
+ req_flush(bspec->v_fs_e, dev);
+ break;
+ }
+ }
+
+ /* Scan vmnt table to see if dev already mounted. If not, find a free slot.*/
+ found = FALSE;
+ for (i = 0; i < NR_MNTS; ++i) {
+ if (vmnt[i].m_dev == dev) found = TRUE;
+ }
+ if (found) {
+ unlock_bsf();
+ return(EBUSY);
+ } else if ((new_vmp = get_free_vmnt()) == NULL) {
+ unlock_bsf();
+ return(ENOMEM);
+ }
+
+ lock_vmnt(new_vmp, VMNT_EXCL);
+
+ isroot = (strcmp(mountpoint, "/") == 0);
+ mount_root = (isroot && have_root < 2); /* Root can be mounted twice:
+ * 1: ramdisk
+ * 2: boot disk (e.g., harddisk)
+ */
+
+ if (!mount_root) {
+ /* Get vnode of mountpoint */
+ lookup_init(&resolve, mountpoint, PATH_NOFLAGS, &parent_vmp, &vp);
+ resolve.l_vmnt_lock = VMNT_EXCL;
+ resolve.l_vnode_lock = VNODE_WRITE;
+ if ((vp = eat_path(&resolve, fp)) == NULL)
+ r = err_code;
+ else if (vp->v_ref_count == 1) {
+ /*Tell FS on which vnode it is mounted (glue into mount tree)*/
+ r = req_mountpoint(vp->v_fs_e, vp->v_inode_nr);
+ } else
+ r = EBUSY;
+
+ if (r != OK) {
+ if (vp != NULL) {
+ unlock_vnode(vp);
+ unlock_vmnt(parent_vmp);
+ put_vnode(vp);
+ }
+ unlock_vmnt(new_vmp);
+ unlock_bsf();
+ return(r);
+ }
+ }
+
+/* XXX: move this upwards before lookup after proper locking. */
+ /* We'll need a vnode for the root inode */
+ if ((root_node = get_free_vnode()) == NULL || dev == 266) {
+ if (vp != NULL) {
+ unlock_vnode(vp);
+ unlock_vmnt(parent_vmp);
+ put_vnode(vp);
+ }
+ unlock_vmnt(new_vmp);
+ unlock_bsf();
+ return(err_code);
+ }
+
+ lock_vnode(root_node, VNODE_OPCL);
+
+ /* Store some essential vmnt data first */
+ new_vmp->m_fs_e = fs_e;
+ new_vmp->m_dev = dev;
+ if (rdonly) new_vmp->m_flags |= VMNT_READONLY;
+ else new_vmp->m_flags &= ~VMNT_READONLY;
+
+ /* Tell FS which device to mount */
+ if ((r = req_readsuper(fs_e, label, dev, rdonly, isroot, &res)) != OK) {
+ if (vp != NULL) {
+ unlock_vnode(vp);
+ unlock_vmnt(parent_vmp);
+ put_vnode(vp);
+ }
+ new_vmp->m_fs_e = NONE;
+ new_vmp->m_dev = NO_DEV;
+ unlock_vnode(root_node);
+ unlock_vmnt(new_vmp);
+ unlock_bsf();
+ return(r);
+ }
+
+ /* Fill in root node's fields */
+ root_node->v_fs_e = res.fs_e;
+ root_node->v_inode_nr = res.inode_nr;
+ root_node->v_mode = res.fmode;
+ root_node->v_uid = res.uid;
+ root_node->v_gid = res.gid;
+ root_node->v_size = res.fsize;
+ root_node->v_sdev = NO_DEV;
+ root_node->v_fs_count = 1;
+ root_node->v_ref_count = 1;
+
+ /* Root node is indeed on the partition */
+ root_node->v_vmnt = new_vmp;
+ root_node->v_dev = new_vmp->m_dev;
+
+ if(mount_root) {
+ /* Superblock and root node already read.
+ * Nothing else can go wrong. Perform the mount. */
+ new_vmp->m_root_node = root_node;
+ new_vmp->m_mounted_on = NULL;
+ strcpy(new_vmp->m_label, mount_label);
+ if (is_nonedev(dev)) alloc_nonedev(dev);
+ update_bspec(dev, fs_e, 0 /* Don't send new driver endpoint */);
+
+ ROOT_DEV = dev;
+ ROOT_FS_E = fs_e;
+
+ /* Replace all root and working directories */
+ for (i = 0, tfp = fproc; i < NR_PROCS; i++, tfp++) {
+ if (tfp->fp_pid == PID_FREE)
+ continue;
+
+#define MAKEROOT(what) { \
+ if (what) put_vnode(what); \
+ dup_vnode(root_node); \
+ what = root_node; \
+ }
+
+ MAKEROOT(tfp->fp_rd);
+ MAKEROOT(tfp->fp_wd);
+ }
+
+ unlock_vnode(root_node);
+ unlock_vmnt(new_vmp);
+ have_root++; /* We have a (new) root */
+ unlock_bsf();
+ return(OK);
+ }
+
+ /* File types may not conflict. */
+ mdir = ((vp->v_mode & I_TYPE) == I_DIRECTORY); /*TRUE iff dir*/
+ rdir = ((root_node->v_mode & I_TYPE) == I_DIRECTORY);
+ if (!mdir && rdir) r = EISDIR;
+
+ /* If error, return the super block and both inodes; release the vmnt. */
+ if (r != OK) {
+ unlock_vnode(vp);
+ unlock_vmnt(parent_vmp);
+ unlock_vnode(root_node);
+ unlock_vmnt(new_vmp);
+ put_vnode(vp);
+ put_vnode(root_node);
+ new_vmp->m_dev = NO_DEV;
+ unlock_bsf();
+ return(r);
+ }
+
+ /* Nothing else can go wrong. Perform the mount. */
+ new_vmp->m_mounted_on = vp;
+ new_vmp->m_root_node = root_node;
+ strcpy(new_vmp->m_label, mount_label);
+
+ /* Allocate the pseudo device that was found, if not using a real device. */
+ if (is_nonedev(dev)) alloc_nonedev(dev);
+
+ /* The new FS will handle block I/O requests for its device now. */
+ update_bspec(dev, fs_e, 0 /* Don't send new driver endpoint */);
+
+ unlock_vnode(vp);
+ unlock_vmnt(parent_vmp);
+ unlock_vnode(root_node);
+ unlock_vmnt(new_vmp);
+ unlock_bsf();
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * mount_pfs *
+ *===========================================================================*/
+PUBLIC void mount_pfs(void)
+{
+/* Mount the Pipe File Server. It's not really mounted onto the file system,
+ but it's necessary it has a vmnt entry to make locking easier */
+
+ dev_t dev;
+ struct vmnt *vmp;
+
+ if ((dev = find_free_nonedev()) == NO_DEV)
+ panic("VFS: no nonedev to initialize PFS");
+
+ if ((vmp = get_free_vmnt()) == NULL)
+ panic("VFS: no vmnt to initialize PFS");
+
+ alloc_nonedev(dev);
+
+ vmp->m_dev = dev;
+ vmp->m_fs_e = PFS_PROC_NR;
+ strcpy(vmp->m_label, "pfs");
+}
+
+/*===========================================================================*
+ * do_umount *
+ *===========================================================================*/
+PUBLIC int do_umount(void)
+{
+/* Perform the umount(name) system call. */
+ char label[LABEL_MAX];
+ dev_t dev;
+ int r;
+ char fullpath[PATH_MAX+1];
+
+ /* Only the super-user may do umount. */
+ if (!super_user) return(EPERM);
+
+ /* If 'name' is not for a block special file or mountpoint, return error. */
+ if (fetch_name(m_in.name, m_in.name_length, M3, fullpath) != OK)
+ return(err_code);
+ if ((dev = name_to_dev(TRUE /*allow_mountpt*/, fullpath)) == NO_DEV)
+ return(err_code);
+
+ if ((r = unmount(dev, label)) != OK) return(r);
+
+ /* Return the label of the mounted file system, so that the caller
+ * can shut down the corresponding server process.
+ */
+ if (strlen(label) >= M3_LONG_STRING) /* should never evaluate to true */
+ label[M3_LONG_STRING-1] = 0;
+ strcpy(m_out.umount_label, label);
+ return(OK);
+}
+
+
+/*===========================================================================*
+ * unmount *
+ *===========================================================================*/
+PUBLIC int unmount(
+ dev_t dev, /* block-special device */
+ char *label /* buffer to retrieve label, or NULL */
+)
+{
+ struct vnode *vp;
+ struct vmnt *vmp_i = NULL, *vmp = NULL;
+ int count, locks, r;
+
+ /* Find vmnt that is to be unmounted */
+ for (vmp_i = &vmnt[0]; vmp_i < &vmnt[NR_MNTS]; ++vmp_i) {
+ if (vmp_i->m_dev == dev) {
+ if(vmp) panic("device mounted more than once: %d", dev);
+ vmp = vmp_i;
+ }
+ }
+
+ /* Did we find the vmnt (i.e., was dev a mounted device)? */
+ if(!vmp) return(EINVAL);
+
+ lock_bsf();
+
+ assert(lock_vmnt(vmp, VMNT_EXCL) == OK);
+
+ /* See if the mounted device is busy. Only 1 vnode using it should be
+ * open -- the root vnode -- and that inode only 1 time. */
+ locks = count = 0;
+ for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; vp++)
+ if (vp->v_ref_count > 0 && vp->v_dev == dev) {
+ count += vp->v_ref_count;
+ if (is_vnode_locked(vp)) locks++;
+ }
+
+ if (count > 1 || locks > 1) {
+ unlock_vmnt(vmp);
+ unlock_bsf();
+ return(EBUSY); /* can't umount a busy file system */
+ }
+
+ /* Tell FS to drop all inode references for root inode except 1. */
+ vnode_clean_refs(vmp->m_root_node);
+
+ if (vmp->m_mounted_on) {
+ put_vnode(vmp->m_mounted_on);
+ vmp->m_mounted_on = NULL;
+ }
+
+ vmp->m_comm.c_max_reqs = 1; /* Force max concurrent reqs to just one, so
+ * we won't send any messages after the
+ * unmount request */
+
+ /* Tell FS to unmount */
+ if ((r = req_unmount(vmp->m_fs_e)) != OK) /* Not recoverable. */
+ printf("VFS: ignoring failed umount attempt FS endpoint: %d (%d)\n",
+ vmp->m_fs_e, r);
+
+ if (is_nonedev(vmp->m_dev)) free_nonedev(vmp->m_dev);
+
+ if (label != NULL) strcpy(label, vmp->m_label);
+
+ if (vmp->m_root_node) { /* PFS lacks a root node */
+ vmp->m_root_node->v_ref_count = 0;
+ vmp->m_root_node->v_fs_count = 0;
+ vmp->m_root_node->v_sdev = NO_DEV;
+ vmp->m_root_node = NULL;
+ }
+ vmp->m_dev = NO_DEV;
+ vmp->m_fs_e = NONE;
+
+ /* The root FS will handle block I/O requests for this device now. */
+ update_bspec(dev, ROOT_FS_E, 1 /* send new driver endpoint */);
+
+ unlock_vmnt(vmp);
+ unlock_bsf();
+ return(OK);
+}
+
+
+/*===========================================================================*
+ * unmount_all *
+ *===========================================================================*/
+PUBLIC void unmount_all(void)
+{
+/* Unmount all filesystems. File systems are mounted on other file systems,
+ * so you have to pull off the loose bits repeatedly to get it all undone.
+ */
+
+ int i;
+ struct vmnt *vmp;
+
+ /* Now unmount the rest */
+ for (i = 0; i < NR_MNTS; i++) {
+ /* Unmount at least one. */
+ for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; vmp++) {
+ if (vmp->m_dev != NO_DEV)
+ unmount(vmp->m_dev, NULL);
+ }
+ }
+ check_vnode_locks();
+ check_vmnt_locks();
+ check_filp_locks();
+ check_bsf_lock();
+}
+
+/*===========================================================================*
+ * name_to_dev *
+ *===========================================================================*/
+PRIVATE dev_t name_to_dev(int allow_mountpt, char path[PATH_MAX+1])
+{
+/* Convert the block special file in 'user_fullpath' to a device number.
+ * If the given path is not a block special file, but 'allow_mountpt' is set
+ * and the path is the root node of a mounted file system, return that device
+ * number. In all other cases, return NO_DEV and an error code in 'err_code'.
+ */
+ dev_t dev;
+ struct vnode *vp;
+ struct vmnt *vmp;
+ struct lookup resolve;
+
+ lookup_init(&resolve, path, PATH_NOFLAGS, &vmp, &vp);
+ resolve.l_vmnt_lock = VMNT_READ;
+ resolve.l_vnode_lock = VNODE_READ;
+
+ /* Request lookup */
+ if ((vp = eat_path(&resolve, fp)) == NULL) return(NO_DEV);
+
+ if ((vp->v_mode & I_TYPE) == I_BLOCK_SPECIAL) {
+ dev = vp->v_sdev;
+ } else if (allow_mountpt && vp->v_vmnt->m_root_node == vp) {
+ dev = vp->v_dev;
+ } else {
+ err_code = ENOTBLK;
+ dev = NO_DEV;
+ }
+
+ unlock_vnode(vp);
+ unlock_vmnt(vmp);
+ put_vnode(vp);
+ return(dev);
+}
+
+
+/*===========================================================================*
+ * is_nonedev *
+ *===========================================================================*/
+PRIVATE int is_nonedev(dev_t dev)
+{
+/* Return whether the given device is a "none" pseudo device.
+ */
+
+ return (major(dev) == NONE_MAJOR &&
+ minor(dev) > 0 && minor(dev) <= NR_NONEDEVS);
+}
+
+
+/*===========================================================================*
+ * find_free_nonedev *
+ *===========================================================================*/
+PRIVATE dev_t find_free_nonedev(void)
+{
+/* Find a free "none" pseudo device. Do not allocate it yet.
+ */
+ int i;
+
+ for (i = 0; i < NR_NONEDEVS; i++)
+ if (!GET_BIT(nonedev, i))
+ return makedev(NONE_MAJOR, i + 1);
+
+ err_code = EMFILE;
+ return NO_DEV;
+}
--- /dev/null
+/* This file contains the procedures for creating, opening, closing, and
+ * seeking on files.
+ *
+ * The entry points into this file are
+ * do_creat: perform the CREAT system call
+ * do_open: perform the OPEN system call
+ * do_mknod: perform the MKNOD system call
+ * do_mkdir: perform the MKDIR system call
+ * do_close: perform the CLOSE system call
+ * do_lseek: perform the LSEEK system call
+ * do_llseek: perform the LLSEEK system call
+ */
+
+#include "fs.h"
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include <minix/callnr.h>
+#include <minix/com.h>
+#include <minix/u64.h>
+#include "file.h"
+#include "fproc.h"
+#include "dmap.h"
+#include "lock.h"
+#include "param.h"
+#include <dirent.h>
+#include <assert.h>
+#include <minix/vfsif.h>
+#include "vnode.h"
+#include "vmnt.h"
+#include "path.h"
+
+PRIVATE char mode_map[] = {R_BIT, W_BIT, R_BIT|W_BIT, 0};
+
+FORWARD _PROTOTYPE( int common_open, (char path[PATH_MAX+1], int oflags,
+ mode_t omode) );
+FORWARD _PROTOTYPE( struct vnode *new_node, (struct lookup *resolve,
+ int oflags, mode_t bits) );
+FORWARD _PROTOTYPE( int pipe_open, (struct vnode *vp, mode_t bits,
+ int oflags) );
+
+
+/*===========================================================================*
+ * do_creat *
+ *===========================================================================*/
+PUBLIC int do_creat()
+{
+/* Perform the creat(name, mode) system call. */
+ int r;
+ char fullpath[PATH_MAX+1];
+
+ if (fetch_name(m_in.name, m_in.name_length, M3, fullpath) != OK)
+ return(err_code);
+ r = common_open(fullpath, O_WRONLY | O_CREAT | O_TRUNC, (mode_t) m_in.mode);
+ return(r);
+}
+
+
+/*===========================================================================*
+ * do_open *
+ *===========================================================================*/
+PUBLIC int do_open()
+{
+/* Perform the open(name, flags,...) system call. */
+ int create_mode = 0; /* is really mode_t but this gives problems */
+ int r;
+ char fullpath[PATH_MAX+1];
+
+ /* If O_CREAT is set, open has three parameters, otherwise two. */
+ if (m_in.mode & O_CREAT) {
+ create_mode = m_in.c_mode;
+ r = fetch_name(m_in.c_name, m_in.name1_length, M1, fullpath);
+ } else {
+ r = fetch_name(m_in.name, m_in.name_length, M3, fullpath);
+ }
+
+ if (r != OK) return(err_code); /* name was bad */
+ r = common_open(fullpath, m_in.mode, create_mode);
+ return(r);
+}
+
+
+/*===========================================================================*
+ * common_open *
+ *===========================================================================*/
+PRIVATE int common_open(char path[PATH_MAX+1], int oflags, mode_t omode)
+{
+/* Common code from do_creat and do_open. */
+ int b, r, exist = TRUE, major_dev;
+ dev_t dev;
+ mode_t bits;
+ struct filp *filp, *filp2;
+ struct vnode *vp;
+ struct vmnt *vmp;
+ struct dmap *dp;
+ struct lookup resolve;
+
+ /* Remap the bottom two bits of oflags. */
+ bits = (mode_t) mode_map[oflags & O_ACCMODE];
+ if (!bits) return(EINVAL);
+
+ /* See if file descriptor and filp slots are available. */
+ if ((r = get_fd(0, bits, &m_in.fd, &filp)) != OK) return(r);
+
+ lookup_init(&resolve, path, PATH_NOFLAGS, &vmp, &vp);
+
+ /* If O_CREATE is set, try to make the file. */
+ if (oflags & O_CREAT) {
+ omode = I_REGULAR | (omode & ALL_MODES & fp->fp_umask);
+ vp = new_node(&resolve, oflags, omode);
+ r = err_code;
+ if (r == OK) exist = FALSE; /* We just created the file */
+ else if (r != EEXIST) { /* other error */
+ if (vp) unlock_vnode(vp);
+ unlock_filp(filp);
+ return(r);
+ }
+ else exist = !(oflags & O_EXCL);/* file exists, if the O_EXCL
+ flag is set this is an error */
+ } else {
+ /* Scan path name */
+ resolve.l_vmnt_lock = VMNT_READ;
+ resolve.l_vnode_lock = VNODE_OPCL;
+ if ((vp = eat_path(&resolve, fp)) == NULL) {
+ unlock_filp(filp);
+ return(err_code);
+ }
+
+ if (vmp != NULL) unlock_vmnt(vmp);
+ }
+
+ /* Claim the file descriptor and filp slot and fill them in. */
+ fp->fp_filp[m_in.fd] = filp;
+ FD_SET(m_in.fd, &fp->fp_filp_inuse);
+ filp->filp_count = 1;
+ filp->filp_vno = vp;
+ filp->filp_flags = oflags;
+
+ /* Only do the normal open code if we didn't just create the file. */
+ if (exist) {
+ /* Check protections. */
+ if ((r = forbidden(vp, bits)) == OK) {
+ /* Opening reg. files, directories, and special files differ */
+ switch (vp->v_mode & I_TYPE) {
+ case I_REGULAR:
+ /* Truncate regular file if O_TRUNC. */
+ if (oflags & O_TRUNC) {
+ if ((r = forbidden(vp, W_BIT)) != OK)
+ break;
+ truncate_vnode(vp, 0);
+ }
+ break;
+ case I_DIRECTORY:
+ /* Directories may be read but not written. */
+ r = (bits & W_BIT ? EISDIR : OK);
+ break;
+ case I_CHAR_SPECIAL:
+ /* Invoke the driver for special processing. */
+ dev = (dev_t) vp->v_sdev;
+ r = dev_open(dev, who_e, bits | (oflags & ~O_ACCMODE));
+ if (r == SUSPEND) suspend(FP_BLOCKED_ON_DOPEN);
+ else vp = filp->filp_vno; /* Might be updated by
+ * dev_open/clone_opcl */
+ break;
+ case I_BLOCK_SPECIAL:
+
+ lock_bsf();
+
+ /* Invoke the driver for special processing. */
+ dev = (dev_t) vp->v_sdev;
+ r = dev_open(dev, who_e, bits | (oflags & ~O_ACCMODE));
+ if (r != OK) {
+ unlock_bsf();
+ break;
+ }
+
+ /* Check whether the device is mounted or not. If so,
+ * then that FS is responsible for this device. Else
+ * we default to ROOT_FS. */
+ vp->v_bfs_e = ROOT_FS_E; /* By default */
+ for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp)
+ if (vmp->m_dev == vp->v_sdev)
+ vp->v_bfs_e = vmp->m_fs_e;
+
+ /* Get the driver endpoint of the block spec device */
+ major_dev = major(vp->v_sdev);
+ if (major_dev < 0 || major_dev >= NR_DEVICES)
+ r = ENXIO;
+ else
+ dp = &dmap[major_dev];
+ if (r != OK || dp->dmap_driver == NONE) {
+ printf("VFS: driver not found for device %d\n",
+ vp->v_sdev);
+ r = ENXIO;
+ unlock_bsf();
+ break;
+ }
+
+ /* Send the driver endpoint (even when known already)*/
+ if (vp->v_bfs_e != ROOT_FS_E) {
+ /* but only when it's the ROOT_FS */
+ unlock_bsf();
+ break;
+ }
+ if ((r = req_newdriver(vp->v_bfs_e, vp->v_sdev,
+ dp->dmap_driver)) != OK) {
+ printf("VFS: error sending driver endpoint\n");
+ r = ENXIO;
+ }
+ unlock_bsf();
+ break;
+
+ case I_NAMED_PIPE:
+ /* Create a mapped inode on PFS which handles reads
+ and writes to this named pipe. */
+ tll_upgrade(&vp->v_lock);
+ r = map_vnode(vp, PFS_PROC_NR);
+ if (r == OK) {
+ vp->v_pipe = I_PIPE;
+ if (vp->v_ref_count == 1) {
+ vp->v_pipe_rd_pos = 0;
+ vp->v_pipe_wr_pos = 0;
+ if (vp->v_size != 0)
+ r = truncate_vnode(vp, 0);
+ }
+ oflags |= O_APPEND; /* force append mode */
+ filp->filp_flags = oflags;
+ }
+ if (r == OK) {
+ r = pipe_open(vp, bits, oflags);
+ }
+ if (r != ENXIO) {
+ /* See if someone else is doing a rd or wt on
+ * the FIFO. If so, use its filp entry so the
+ * file position will be automatically shared.
+ */
+ b = (bits & R_BIT ? R_BIT : W_BIT);
+ filp->filp_count = 0; /* don't find self */
+ if ((filp2 = find_filp(vp, b)) != NULL) {
+ /* Co-reader or writer found. Use it.*/
+ fp->fp_filp[m_in.fd] = filp2;
+ filp2->filp_count++;
+ filp2->filp_vno = vp;
+ filp2->filp_flags = oflags;
+
+ /* v_count was incremented after the
+ * vnode has been found. i_count was
+ * incremented incorrectly in FS, not
+ * knowing that we were going to use an
+ * existing filp entry. Correct this
+ * error.
+ */
+ unlock_vnode(vp);
+ put_vnode(vp);
+ } else {
+ /* Nobody else found. Restore filp. */
+ filp->filp_count = 1;
+ }
+ }
+ break;
+ }
+ }
+ }
+
+ unlock_filp(filp);
+
+ /* If error, release inode. */
+ if (r != OK) {
+ if (r != SUSPEND) {
+ fp->fp_filp[m_in.fd] = NULL;
+ FD_CLR(m_in.fd, &fp->fp_filp_inuse);
+ filp->filp_count = 0;
+ filp->filp_vno = NULL;
+ put_vnode(vp);
+ }
+ } else {
+ r = m_in.fd;
+ }
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * new_node *
+ *===========================================================================*/
+PRIVATE struct vnode *new_node(struct lookup *resolve, int oflags, mode_t bits)
+{
+/* Try to create a new inode and return a pointer to it. If the inode already
+ exists, return a pointer to it as well, but set err_code accordingly.
+ NULL is returned if the path cannot be resolved up to the last
+ directory, or when the inode cannot be created due to permissions or
+ otherwise. */
+ struct vnode *dirp, *vp;
+ struct vmnt *dir_vmp, *vp_vmp;
+ int r;
+ struct node_details res;
+ struct lookup findnode;
+ char *path;
+
+ path = resolve->l_path; /* For easy access */
+
+ lookup_init(&findnode, path, resolve->l_flags, &dir_vmp, &dirp);
+ findnode.l_vmnt_lock = VMNT_WRITE;
+ findnode.l_vnode_lock = VNODE_WRITE; /* dir node */
+
+ /* When O_CREAT and O_EXCL flags are set, the path may not be named by a
+ * symbolic link. */
+ if (oflags & O_EXCL) findnode.l_flags |= PATH_RET_SYMLINK;
+
+ /* See if the path can be opened down to the last directory. */
+ if ((dirp = last_dir(&findnode, fp)) == NULL) return(NULL);
+
+ /* The final directory is accessible. Get final component of the path. */
+ findnode.l_vmp = &vp_vmp;
+ findnode.l_vnode = &vp;
+ findnode.l_vnode_lock = (oflags & O_TRUNC) ? VNODE_WRITE : VNODE_OPCL;
+ vp = advance(dirp, &findnode, fp);
+ assert(vp_vmp == NULL); /* Lookup to last dir should have yielded lock
+ * on vmp or final component does not exist. */
+
+ /* The combination of a symlink with absolute path followed by a danglink
+ * symlink results in a new path that needs to be re-resolved entirely. */
+ if (path[0] == '/') {
+printf("XXX: dangling symlink needs re-resolving\n");
+ unlock_vnode(dirp);
+ unlock_vmnt(dir_vmp);
+ put_vnode(dirp);
+ if (vp != NULL) {
+ unlock_vnode(vp);
+ put_vnode(vp);
+ }
+ return new_node(resolve, oflags, bits);
+ }
+
+ if (vp == NULL && err_code == ENOENT) {
+ /* Last path component does not exist. Make a new directory entry. */
+ if ((vp = get_free_vnode()) == NULL) {
+ /* Can't create new entry: out of vnodes. */
+ unlock_vnode(dirp);
+ unlock_vmnt(dir_vmp);
+ put_vnode(dirp);
+ return(NULL);
+ }
+
+ lock_vnode(vp, VNODE_OPCL);
+
+ if ((r = forbidden(dirp, W_BIT|X_BIT)) != OK ||
+ (r = req_create(dirp->v_fs_e, dirp->v_inode_nr,bits, fp->fp_effuid,
+ fp->fp_effgid, path, &res)) != OK ) {
+ /* Can't create inode either due to permissions or some other
+ * problem. In case r is EEXIST, we might be dealing with a
+ * dangling symlink.*/
+ if (r == EEXIST) {
+ struct vnode *slp, *old_wd;
+
+ /* Resolve path up to symlink */
+ findnode.l_flags = PATH_RET_SYMLINK;
+ findnode.l_vnode_lock = VNODE_READ;
+ findnode.l_vnode = &slp;
+ slp = advance(dirp, &findnode, fp);
+ if (slp != NULL) {
+ if (S_ISLNK(slp->v_mode)) {
+ /* Get contents of link */
+
+ r = req_rdlink(slp->v_fs_e,
+ slp->v_inode_nr,
+ VFS_PROC_NR,
+ path,
+ PATH_MAX, 0);
+ if (r < 0) {
+ /* Failed to read link */
+ unlock_vnode(slp);
+ unlock_vnode(dirp);
+ unlock_vmnt(dir_vmp);
+ put_vnode(slp);
+ put_vnode(dirp);
+ err_code = r;
+ return(NULL);
+ }
+ path[r] = '\0'; /* Terminate path */
+ }
+ unlock_vnode(slp);
+ put_vnode(slp);
+ }
+
+ /* Try to create the inode the dangling symlink was
+ * pointing to. We have to use dirp as starting point
+ * as there might be multiple successive symlinks
+ * crossing multiple mountpoints. */
+ old_wd = fp->fp_wd; /* Save orig. working dirp */
+ fp->fp_wd = dirp;
+ vp = new_node(resolve, oflags, bits);
+ fp->fp_wd = old_wd; /* Restore */
+
+ if (vp != NULL) {
+ unlock_vnode(dirp);
+ unlock_vmnt(dir_vmp);
+ put_vnode(dirp);
+ *(resolve->l_vnode) = vp;
+ return(vp);
+ }
+ r = err_code;
+ }
+
+ if (r == EEXIST)
+ err_code = EIO; /* Impossible, we have verified that
+ * the last component doesn't exist and
+ * is not a dangling symlink. */
+ else
+ err_code = r;
+
+ unlock_vnode(dirp);
+ unlock_vnode(vp);
+ unlock_vmnt(dir_vmp);
+ put_vnode(dirp);
+ return(NULL);
+ }
+
+ /* Store results and mark vnode in use */
+
+ vp->v_fs_e = res.fs_e;
+ vp->v_inode_nr = res.inode_nr;
+ vp->v_mode = res.fmode;
+ vp->v_size = res.fsize;
+ vp->v_uid = res.uid;
+ vp->v_gid = res.gid;
+ vp->v_sdev = res.dev;
+ vp->v_vmnt = dirp->v_vmnt;
+ vp->v_dev = vp->v_vmnt->m_dev;
+ vp->v_fs_count = 1;
+ vp->v_ref_count = 1;
+ } else {
+ /* Either last component exists, or there is some other problem. */
+ if (vp != NULL) {
+ r = EEXIST; /* File exists or a symlink names a file while
+ * O_EXCL is set. */
+ } else
+ r = err_code; /* Other problem. */
+ }
+
+ err_code = r;
+ /* When dirp equals vp, we shouldn't release the lock as a vp is locked only
+ * once. Releasing the lock would cause the resulting vp not be locked and
+ * cause mayhem later on. */
+ if (dirp != vp) {
+ unlock_vnode(dirp);
+ }
+ unlock_vmnt(dir_vmp);
+ put_vnode(dirp);
+
+ *(resolve->l_vnode) = vp;
+ return(vp);
+}
+
+
+/*===========================================================================*
+ * pipe_open *
+ *===========================================================================*/
+PRIVATE int pipe_open(register struct vnode *vp, register mode_t bits,
+ register int oflags)
+{
+/* This function is called from common_open. It checks if
+ * there is at least one reader/writer pair for the pipe, if not
+ * it suspends the caller, otherwise it revives all other blocked
+ * processes hanging on the pipe.
+ */
+
+ vp->v_pipe = I_PIPE;
+
+ if((bits & (R_BIT|W_BIT)) == (R_BIT|W_BIT)) return(ENXIO);
+
+ /* Find the reader/writer at the other end of the pipe */
+ if (find_filp(vp, bits & W_BIT ? R_BIT : W_BIT) == NULL) {
+ /* Not found */
+ if (oflags & O_NONBLOCK) {
+ if (bits & W_BIT) return(ENXIO);
+ } else {
+ /* Let's wait for the other side to show up */
+ suspend(FP_BLOCKED_ON_POPEN); /* suspend caller */
+ return(SUSPEND);
+ }
+ } else if (susp_count > 0) { /* revive blocked processes */
+ release(vp, OPEN, susp_count);
+ release(vp, CREAT, susp_count);
+ }
+ return(OK);
+}
+
+
+/*===========================================================================*
+ * do_mknod *
+ *===========================================================================*/
+PUBLIC int do_mknod()
+{
+/* Perform the mknod(name, mode, addr) system call. */
+ register mode_t bits, mode_bits;
+ int r;
+ struct vnode *vp;
+ struct vmnt *vmp;
+ char fullpath[PATH_MAX+1];
+ struct lookup resolve;
+
+ lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+ resolve.l_vmnt_lock = VMNT_WRITE;
+ resolve.l_vnode_lock = VNODE_READ;
+
+ /* Only the super_user may make nodes other than fifos. */
+ mode_bits = (mode_t) m_in.mk_mode; /* mode of the inode */
+ if (!super_user && (((mode_bits & I_TYPE) != I_NAMED_PIPE) &&
+ ((mode_bits & I_TYPE) != I_UNIX_SOCKET))) {
+ return(EPERM);
+ }
+ bits = (mode_bits & I_TYPE) | (mode_bits & ALL_MODES & fp->fp_umask);
+
+ /* Open directory that's going to hold the new node. */
+ if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
+ return(err_code);
+ if ((vp = last_dir(&resolve, fp)) == NULL) return(err_code);
+
+ /* Make sure that the object is a directory */
+ if ((vp->v_mode & I_TYPE) != I_DIRECTORY) {
+ r = ENOTDIR;
+ } else if ((r = forbidden(vp, W_BIT|X_BIT)) == OK) {
+ r = req_mknod(vp->v_fs_e, vp->v_inode_nr, fullpath, fp->fp_effuid,
+ fp->fp_effgid, bits, m_in.mk_z0);
+ }
+
+ unlock_vnode(vp);
+ unlock_vmnt(vmp);
+ put_vnode(vp);
+ return(r);
+}
+
+/*===========================================================================*
+ * do_mkdir *
+ *===========================================================================*/
+PUBLIC int do_mkdir()
+{
+/* Perform the mkdir(name, mode) system call. */
+ mode_t bits; /* mode bits for the new inode */
+ int r;
+ struct vnode *vp;
+ struct vmnt *vmp;
+ char fullpath[PATH_MAX+1];
+ struct lookup resolve;
+
+ lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+ resolve.l_vmnt_lock = VMNT_WRITE;
+ resolve.l_vnode_lock = VNODE_READ;
+
+ if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
+ return(err_code);
+ bits = I_DIRECTORY | (m_in.mode & RWX_MODES & fp->fp_umask);
+ if ((vp = last_dir(&resolve, fp)) == NULL) return(err_code);
+
+ /* Make sure that the object is a directory */
+ if ((vp->v_mode & I_TYPE) != I_DIRECTORY) {
+ r = ENOTDIR;
+ } else if ((r = forbidden(vp, W_BIT|X_BIT)) == OK) {
+ r = req_mkdir(vp->v_fs_e, vp->v_inode_nr, fullpath, fp->fp_effuid,
+ fp->fp_effgid, bits);
+ }
+
+ unlock_vnode(vp);
+ unlock_vmnt(vmp);
+ put_vnode(vp);
+ return(r);
+}
+
+/*===========================================================================*
+ * do_lseek *
+ *===========================================================================*/
+PUBLIC int do_lseek()
+{
+/* Perform the lseek(ls_fd, offset, whence) system call. */
+ register struct filp *rfilp;
+ int r = OK;
+ long offset;
+ u64_t pos, newpos;
+
+ /* Check to see if the file descriptor is valid. */
+ if ( (rfilp = get_filp(m_in.ls_fd, VNODE_READ)) == NULL) return(err_code);
+
+ /* No lseek on pipes. */
+ if (rfilp->filp_vno->v_pipe == I_PIPE) {
+ unlock_filp(rfilp);
+ return(ESPIPE);
+ }
+
+ /* The value of 'whence' determines the start position to use. */
+ switch(m_in.whence) {
+ case SEEK_SET: pos = cvu64(0); break;
+ case SEEK_CUR: pos = rfilp->filp_pos; break;
+ case SEEK_END: pos = cvul64(rfilp->filp_vno->v_size); break;
+ default: unlock_filp(rfilp); return(EINVAL);
+ }
+
+ offset = m_in.offset_lo;
+ if (offset >= 0)
+ newpos = add64ul(pos, offset);
+ else
+ newpos = sub64ul(pos, -offset);
+
+ /* Check for overflow. */
+ if (ex64hi(newpos) != 0)
+ r = EINVAL;
+ else {
+ rfilp->filp_pos = newpos;
+
+ /* insert the new position into the output message */
+ m_out.reply_l1 = ex64lo(newpos);
+
+ if (cmp64(newpos, rfilp->filp_pos) != 0) {
+ /* Inhibit read ahead request */
+ r = req_inhibread(rfilp->filp_vno->v_fs_e,
+ rfilp->filp_vno->v_inode_nr);
+ }
+ }
+
+ unlock_filp(rfilp);
+ return(r);
+}
+
+/*===========================================================================*
+ * do_llseek *
+ *===========================================================================*/
+PUBLIC int do_llseek()
+{
+/* Perform the llseek(ls_fd, offset, whence) system call. */
+ register struct filp *rfilp;
+ u64_t pos, newpos;
+ int r = OK;
+
+ /* Check to see if the file descriptor is valid. */
+ if ( (rfilp = get_filp(m_in.ls_fd, VNODE_READ)) == NULL) return(err_code);
+
+ /* No lseek on pipes. */
+ if (rfilp->filp_vno->v_pipe == I_PIPE) {
+ unlock_filp(rfilp);
+ return(ESPIPE);
+ }
+
+ /* The value of 'whence' determines the start position to use. */
+ switch(m_in.whence) {
+ case SEEK_SET: pos = cvu64(0); break;
+ case SEEK_CUR: pos = rfilp->filp_pos; break;
+ case SEEK_END: pos = cvul64(rfilp->filp_vno->v_size); break;
+ default: unlock_filp(rfilp); return(EINVAL);
+ }
+
+ newpos = add64(pos, make64(m_in.offset_lo, m_in.offset_high));
+
+ /* Check for overflow. */
+ if (( (long) m_in.offset_high > 0) && cmp64(newpos, pos) < 0)
+ r = EINVAL;
+ else if (( (long) m_in.offset_high < 0) && cmp64(newpos, pos) > 0)
+ r = EINVAL;
+ else {
+ rfilp->filp_pos = newpos;
+
+ /* insert the new position into the output message */
+ m_out.reply_l1 = ex64lo(newpos);
+ m_out.reply_l2 = ex64hi(newpos);
+
+ if (cmp64(newpos, rfilp->filp_pos) != 0) {
+ /* Inhibit read ahead request */
+ r = req_inhibread(rfilp->filp_vno->v_fs_e,
+ rfilp->filp_vno->v_inode_nr);
+ }
+ }
+
+ unlock_filp(rfilp);
+ return(r);
+}
+
+/*===========================================================================*
+ * do_close *
+ *===========================================================================*/
+PUBLIC int do_close()
+{
+/* Perform the close(fd) system call. */
+
+ return close_fd(fp, m_in.fd);
+}
+
+
+/*===========================================================================*
+ * close_fd *
+ *===========================================================================*/
+PUBLIC int close_fd(rfp, fd_nr)
+struct fproc *rfp;
+int fd_nr;
+{
+/* Perform the close(fd) system call. */
+ register struct filp *rfilp;
+ register struct vnode *vp;
+ struct file_lock *flp;
+ int lock_count;
+
+ /* First locate the vnode that belongs to the file descriptor. */
+ if ( (rfilp = get_filp2(rfp, fd_nr, VNODE_OPCL)) == NULL) return(err_code);
+ vp = rfilp->filp_vno;
+
+ close_filp(rfilp);
+ rfp->fp_filp[fd_nr] = NULL;
+ FD_CLR(fd_nr, &rfp->fp_cloexec_set);
+ FD_CLR(fd_nr, &rfp->fp_filp_inuse);
+
+ /* Check to see if the file is locked. If so, release all locks. */
+ if (nr_locks > 0) {
+ lock_count = nr_locks; /* save count of locks */
+ for (flp = &file_lock[0]; flp < &file_lock[NR_LOCKS]; flp++) {
+ if (flp->lock_type == 0) continue; /* slot not in use */
+ if (flp->lock_vnode == vp && flp->lock_pid == rfp->fp_pid) {
+ flp->lock_type = 0;
+ nr_locks--;
+ }
+ }
+ if (nr_locks < lock_count)
+ lock_revive(); /* one or more locks released */
+ }
+
+ return(OK);
+}
+
+/*===========================================================================*
+ * close_reply *
+ *===========================================================================*/
+PUBLIC void close_reply()
+{
+ /* No need to do anything */
+}
--- /dev/null
+#ifndef __VFS_PARAM_H__
+#define __VFS_PARAM_H__
+
+/* The following names are synonyms for the variables in the input message. */
+#define addr m1_i3
+#define buffer m1_p1
+#define child_endpt m1_i2
+#define co_mode m1_i1
+#define fd m1_i1
+#define fd2 m1_i2
+#define group m1_i3
+#define ls_fd m2_i1
+#define mk_mode m1_i2
+#define mk_z0 m1_i3
+#define mode m3_i2
+#define c_mode m1_i3
+#define c_name m1_p1
+#define name m3_p1
+#define flength m2_l1
+#define name1 m1_p1
+#define name2 m1_p2
+#define name_length m3_i1
+#define name1_length m1_i1
+#define name2_length m1_i2
+#define nbytes m1_i2
+#define owner m1_i2
+#define pathname m3_ca1
+#define pid m1_i3
+#define ENDPT m1_i1
+#define offset_lo m2_l1
+#define offset_high m2_l2
+#define ctl_req m4_l1
+#define mount_flags m1_i3
+#define request m1_i2
+#define sig m1_i2
+#define endpt1 m1_i1
+#define fs_label m1_p3
+#define umount_label m3_ca1
+#define tp m2_l1
+#define utime_actime m2_l1
+#define utime_modtime m2_l2
+#define utime_file m2_p1
+#define utime_length m2_i1
+#define utime_strlen m2_i2
+#define whence m2_i2
+#define svrctl_req m2_i1
+#define svrctl_argp m2_p1
+#define info_what m1_i1
+#define info_where m1_p1
+#define md_label m2_p1
+#define md_label_len m2_l1
+#define md_major m2_i1
+#define md_style m2_i2
+#define md_flags m2_i3
+
+/* The following names are synonyms for the variables in the output message. */
+#define reply_type m_type
+#define reply_l1 m2_l1
+#define reply_l2 m2_l2
+#define reply_i1 m1_i1
+#define reply_i2 m1_i2
+
+#endif
--- /dev/null
+/* lookup() is the main routine that controls the path name lookup. It
+ * handles mountpoints and symbolic links. The actual lookup requests
+ * are sent through the req_lookup wrapper function.
+ */
+
+#include "fs.h"
+#include <string.h>
+#include <minix/callnr.h>
+#include <minix/com.h>
+#include <minix/keymap.h>
+#include <minix/const.h>
+#include <minix/endpoint.h>
+#include <unistd.h>
+#include <assert.h>
+#include <minix/vfsif.h>
+#include <sys/stat.h>
+#include <sys/un.h>
+#include <dirent.h>
+#include "threads.h"
+#include "vmnt.h"
+#include "vnode.h"
+#include "path.h"
+#include "fproc.h"
+#include "param.h"
+
+/* Set to following define to 1 if you really want to use the POSIX definition
+ * (IEEE Std 1003.1, 2004) of pathname resolution. POSIX requires pathnames
+ * with a traling slash (and that do not entirely consist of slash characters)
+ * to be treated as if a single dot is appended. This means that for example
+ * mkdir("dir/", ...) and rmdir("dir/") will fail because the call tries to
+ * create or remove the directory '.'. Historically, Unix systems just ignore
+ * trailing slashes.
+ */
+#define DO_POSIX_PATHNAME_RES 0
+
+FORWARD _PROTOTYPE( int lookup, (struct vnode *dirp, struct lookup *resolve,
+ node_details_t *node, struct fproc *rfp));
+FORWARD _PROTOTYPE( int check_perms, (endpoint_t ep, cp_grant_id_t io_gr,
+ size_t pathlen) );
+
+/*===========================================================================*
+ * advance *
+ *===========================================================================*/
+PUBLIC struct vnode *advance(dirp, resolve, rfp)
+struct vnode *dirp;
+struct lookup *resolve;
+struct fproc *rfp;
+{
+/* Resolve a path name starting at dirp to a vnode. */
+ int r;
+ int do_downgrade = 1;
+ struct vnode *new_vp, *vp;
+ struct vmnt *vmp;
+ struct node_details res = {0,0,0,0,0,0,0};
+ tll_access_t initial_locktype;
+
+ assert(dirp);
+ assert(resolve->l_vnode_lock != TLL_NONE);
+ assert(resolve->l_vmnt_lock != TLL_NONE);
+
+ if (resolve->l_vnode_lock == VNODE_READ)
+ initial_locktype = VNODE_OPCL;
+ else
+ initial_locktype = resolve->l_vnode_lock;
+
+ /* Get a free vnode and lock it */
+ if ((new_vp = get_free_vnode()) == NULL) return(NULL);
+ lock_vnode(new_vp, initial_locktype);
+
+ /* Lookup vnode belonging to the file. */
+ if ((r = lookup(dirp, resolve, &res, rfp)) != OK) {
+ err_code = r;
+ unlock_vnode(new_vp);
+ return(NULL);
+ }
+
+ /* Check whether we already have a vnode for that file */
+ if ((vp = find_vnode(res.fs_e, res.inode_nr)) != NULL) {
+ unlock_vnode(new_vp); /* Don't need this anymore */
+ do_downgrade = (lock_vnode(vp, initial_locktype) != EBUSY);
+
+ /* Unfortunately, by the time we get the lock, another thread might've
+ * rid of the vnode (e.g., find_vnode found the vnode while a
+ * req_putnode was being processed). */
+ if (vp->v_ref_count == 0) { /* vnode vanished! */
+ /* As the lookup before increased the usage counters in the FS,
+ * we can simply set the usage counters to 1 and proceed as
+ * normal, because the putnode resulted in a use count of 1 in
+ * the FS. Other data is still valid, because the vnode was
+ * marked as pending lock, so get_free_vnode hasn't
+ * reinitialized the vnode yet. */
+ vp->v_fs_count = 1;
+ if (vp->v_mapfs_e != NONE) vp->v_mapfs_count = 1;
+ } else {
+ vp->v_fs_count++; /* We got a reference from the FS */
+ }
+
+ } else {
+ /* Vnode not found, fill in the free vnode's fields */
+
+ new_vp->v_fs_e = res.fs_e;
+ new_vp->v_inode_nr = res.inode_nr;
+ new_vp->v_mode = res.fmode;
+ new_vp->v_size = res.fsize;
+ new_vp->v_uid = res.uid;
+ new_vp->v_gid = res.gid;
+ new_vp->v_sdev = res.dev;
+
+ if( (vmp = find_vmnt(new_vp->v_fs_e)) == NULL)
+ panic("advance: vmnt not found");
+
+ new_vp->v_vmnt = vmp;
+ new_vp->v_dev = vmp->m_dev;
+ new_vp->v_fs_count = 1;
+
+ vp = new_vp;
+ }
+
+ dup_vnode(vp);
+ if (do_downgrade) {
+ /* Only downgrade a lock if we managed to lock it in the first place */
+ *(resolve->l_vnode) = vp;
+
+ if (initial_locktype != resolve->l_vnode_lock)
+ tll_downgrade(&vp->v_lock);
+
+#if LOCK_DEBUG
+ if (resolve->l_vnode_lock == VNODE_READ)
+ fp->fp_vp_rdlocks++;
+#endif
+ }
+
+ return(vp);
+}
+
+
+/*===========================================================================*
+ * eat_path *
+ *===========================================================================*/
+PUBLIC struct vnode *eat_path(resolve, rfp)
+struct lookup *resolve;
+struct fproc *rfp;
+{
+/* Resolve path to a vnode. advance does the actual work. */
+ struct vnode *start_dir;
+
+ start_dir = (resolve->l_path[0] == '/' ? rfp->fp_rd : rfp->fp_wd);
+ return advance(start_dir, resolve, rfp);
+}
+
+
+/*===========================================================================*
+ * last_dir *
+ *===========================================================================*/
+PUBLIC struct vnode *last_dir(resolve, rfp)
+struct lookup *resolve;
+struct fproc *rfp;
+{
+/* Parse a path, as far as the last directory, fetch the vnode
+ * for the last directory into the vnode table, and return a pointer to the
+ * vnode. In addition, return the final component of the path in 'string'. If
+ * the last directory can't be opened, return NULL and the reason for
+ * failure in 'err_code'. We can't parse component by component as that would
+ * be too expensive. Alternatively, we cut off the last component of the path,
+ * and parse the path up to the penultimate component.
+ */
+
+ size_t len;
+ char *cp;
+ char dir_entry[PATH_MAX+1];
+ struct vnode *start_dir, *res;
+
+ /* Is the path absolute or relative? Initialize 'start_dir' accordingly. */
+ start_dir = (resolve->l_path[0] == '/' ? rfp->fp_rd : rfp->fp_wd);
+
+ len = strlen(resolve->l_path);
+
+ /* If path is empty, return ENOENT. */
+ if (len == 0) {
+ err_code = ENOENT;
+ return(NULL);
+ }
+
+#if !DO_POSIX_PATHNAME_RES
+ /* Remove trailing slashes */
+ while (len > 1 && resolve->l_path[len-1] == '/') {
+ len--;
+ resolve->l_path[len]= '\0';
+ }
+#endif
+
+ cp = strrchr(resolve->l_path, '/');
+ if (cp == NULL) {
+ /* Just one entry in the current working directory */
+ struct vmnt *vmp;
+
+ vmp = find_vmnt(start_dir->v_fs_e);
+ if (lock_vmnt(vmp, resolve->l_vmnt_lock) != EBUSY)
+ *resolve->l_vmp = vmp;
+ lock_vnode(start_dir, resolve->l_vnode_lock);
+ *resolve->l_vnode = start_dir;
+ dup_vnode(start_dir);
+ return(start_dir);
+
+ } else if (cp[1] == '\0') {
+ /* Path ends in a slash. The directory entry is '.' */
+ strcpy(dir_entry, ".");
+ } else {
+ /* A path name for the directory and a directory entry */
+ strcpy(dir_entry, cp+1);
+ cp[1] = '\0';
+ }
+
+ /* Remove trailing slashes */
+ while(cp > resolve->l_path && cp[0] == '/') {
+ cp[0]= '\0';
+ cp--;
+ }
+
+ resolve->l_flags = PATH_NOFLAGS;
+ res = advance(start_dir, resolve, rfp);
+ if (res == NULL) return(NULL);
+
+ /* Copy the directory entry back to user_fullpath */
+ strncpy(resolve->l_path, dir_entry, PATH_MAX);
+
+ return(res);
+}
+
+/*===========================================================================*
+ * lookup *
+ *===========================================================================*/
+PRIVATE int lookup(start_node, resolve, result_node, rfp)
+struct vnode *start_node;
+struct lookup *resolve;
+node_details_t *result_node;
+struct fproc *rfp;
+{
+/* Resolve a path name relative to start_node. */
+
+ int r, symloop;
+ endpoint_t fs_e;
+ size_t path_off, path_left_len;
+ ino_t dir_ino, root_ino;
+ uid_t uid;
+ gid_t gid;
+ struct vnode *dir_vp;
+ struct vmnt *vmp, *vmpres;
+ struct lookup_res res;
+
+ assert(resolve->l_vmp);
+ assert(resolve->l_vnode);
+
+ *(resolve->l_vmp) = vmpres = NULL; /* No vmnt found nor locked yet */
+
+ /* Empty (start) path? */
+ if (resolve->l_path[0] == '\0') {
+ result_node->inode_nr = 0;
+ return(ENOENT);
+ }
+
+ if (!rfp->fp_rd || !rfp->fp_wd) {
+ printf("VFS: lookup %d: no rd/wd\n", rfp->fp_endpoint);
+ return(ENOENT);
+ }
+
+ fs_e = start_node->v_fs_e;
+ dir_ino = start_node->v_inode_nr;
+ vmpres = find_vmnt(fs_e);
+
+ /* Is the process' root directory on the same partition?,
+ * if so, set the chroot directory too. */
+ if (rfp->fp_rd->v_dev == rfp->fp_wd->v_dev)
+ root_ino = rfp->fp_rd->v_inode_nr;
+ else
+ root_ino = 0;
+
+ /* Set user and group ids according to the system call */
+ uid = (call_nr == ACCESS ? rfp->fp_realuid : rfp->fp_effuid);
+ gid = (call_nr == ACCESS ? rfp->fp_realgid : rfp->fp_effgid);
+
+ symloop = 0; /* Number of symlinks seen so far */
+
+ /* Lock vmnt */
+ if ((r = lock_vmnt(vmpres, resolve->l_vmnt_lock)) != OK) {
+ if (r == EBUSY) /* vmnt already locked */
+ vmpres = NULL;
+ }
+ *(resolve->l_vmp) = vmpres;
+
+ /* Issue the request */
+ r = req_lookup(fs_e, dir_ino, root_ino, uid, gid, resolve, &res, rfp);
+
+ if (r != OK && r != EENTERMOUNT && r != ELEAVEMOUNT && r != ESYMLINK) {
+ if (vmpres) unlock_vmnt(vmpres);
+ *(resolve->l_vmp) = NULL;
+ return(r); /* i.e., an error occured */
+ }
+
+ /* While the response is related to mount control set the
+ * new requests respectively */
+ while (r == EENTERMOUNT || r == ELEAVEMOUNT || r == ESYMLINK) {
+ /* Update user_fullpath to reflect what's left to be parsed. */
+ path_off = res.char_processed;
+ path_left_len = strlen(&resolve->l_path[path_off]);
+ memmove(resolve->l_path, &resolve->l_path[path_off], path_left_len);
+ resolve->l_path[path_left_len] = '\0'; /* terminate string */
+
+ /* Update the current value of the symloop counter */
+ symloop += res.symloop;
+ if (symloop > SYMLOOP_MAX) {
+ if (vmpres) unlock_vmnt(vmpres);
+ *(resolve->l_vmp) = NULL;
+ return(ELOOP);
+ }
+
+ /* Symlink encountered with absolute path */
+ if (r == ESYMLINK) {
+ dir_vp = rfp->fp_rd;
+ vmp = NULL;
+ } else if (r == EENTERMOUNT) {
+ /* Entering a new partition */
+ dir_vp = 0;
+ /* Start node is now the mounted partition's root node */
+ for (vmp = &vmnt[0]; vmp != &vmnt[NR_MNTS]; ++vmp) {
+ if (vmp->m_dev != NO_DEV && vmp->m_mounted_on) {
+ if (vmp->m_mounted_on->v_inode_nr == res.inode_nr &&
+ vmp->m_mounted_on->v_fs_e == res.fs_e) {
+ dir_vp = vmp->m_root_node;
+ break;
+ }
+ }
+ }
+ assert(dir_vp);
+ } else {
+ /* Climbing up mount */
+ /* Find the vmnt that represents the partition on
+ * which we "climb up". */
+ if ((vmp = find_vmnt(res.fs_e)) == NULL) {
+ panic("VFS lookup: can't find parent vmnt");
+ }
+
+ /* Make sure that the child FS does not feed a bogus path
+ * to the parent FS. That is, when we climb up the tree, we
+ * must've encountered ".." in the path, and that is exactly
+ * what we're going to feed to the parent */
+ if(strncmp(resolve->l_path, "..", 2) != 0 ||
+ (resolve->l_path[2] != '\0' && resolve->l_path[2] != '/')) {
+ printf("VFS: bogus path: %s\n", resolve->l_path);
+ if (vmpres) unlock_vmnt(vmpres);
+ *(resolve->l_vmp) = NULL;
+ return(ENOENT);
+ }
+
+ /* Start node is the vnode on which the partition is
+ * mounted */
+ dir_vp = vmp->m_mounted_on;
+ }
+
+ /* Set the starting directories inode number and FS endpoint */
+ fs_e = dir_vp->v_fs_e;
+ dir_ino = dir_vp->v_inode_nr;
+
+ /* Is the process' root directory on the same partition?,
+ * if so, set the chroot directory too. */
+ if (dir_vp->v_dev == rfp->fp_rd->v_dev)
+ root_ino = rfp->fp_rd->v_inode_nr;
+ else
+ root_ino = 0;
+
+ /* Unlock a previously locked vmnt if locked and lock new vmnt */
+ if (vmpres) unlock_vmnt(vmpres);
+ vmpres = find_vmnt(fs_e);
+ if ((r = lock_vmnt(vmpres, resolve->l_vmnt_lock)) != OK) {
+ if (r == EBUSY)
+ vmpres = NULL; /* Already locked */
+ }
+ *(resolve->l_vmp) = vmpres;
+
+ r = req_lookup(fs_e, dir_ino, root_ino, uid, gid, resolve, &res, rfp);
+
+ if (r != OK && r != EENTERMOUNT && r != ELEAVEMOUNT && r != ESYMLINK) {
+ if (vmpres) unlock_vmnt(vmpres);
+ *(resolve->l_vmp) = NULL;
+ return(r);
+ }
+ }
+
+ /* Fill in response fields */
+ result_node->inode_nr = res.inode_nr;
+ result_node->fmode = res.fmode;
+ result_node->fsize = res.fsize;
+ result_node->dev = res.dev;
+ result_node->fs_e = res.fs_e;
+ result_node->uid = res.uid;
+ result_node->gid = res.gid;
+
+ return(r);
+}
+
+/*===========================================================================*
+ * lookup_init *
+ *===========================================================================*/
+PUBLIC void lookup_init(resolve, path, flags, vmp, vp)
+struct lookup *resolve;
+char *path;
+int flags;
+struct vmnt **vmp;
+struct vnode **vp;
+{
+ assert(vmp != NULL);
+ assert(vp != NULL);
+
+ resolve->l_path = path;
+ resolve->l_flags = flags;
+ resolve->l_vmp = vmp;
+ resolve->l_vnode = vp;
+ resolve->l_vmnt_lock = TLL_NONE;
+ resolve->l_vnode_lock = TLL_NONE;
+ *vmp = NULL; /* Initialize lookup result to NULL */
+ *vp = NULL;
+}
+
+/*===========================================================================*
+ * get_name *
+ *===========================================================================*/
+PUBLIC int get_name(dirp, entry, ename)
+struct vnode *dirp;
+struct vnode *entry;
+char ename[NAME_MAX + 1];
+{
+ u64_t pos, new_pos;
+ int r, consumed, totalbytes;
+ char buf[(sizeof(struct dirent) + NAME_MAX) * 8];
+ struct dirent *cur;
+
+ pos = make64(0, 0);
+
+ if ((dirp->v_mode & I_TYPE) != I_DIRECTORY) {
+ return(EBADF);
+ }
+
+ do {
+ r = req_getdents(dirp->v_fs_e, dirp->v_inode_nr, pos, buf, sizeof(buf),
+ &new_pos, 1);
+
+ if (r == 0) {
+ return(ENOENT); /* end of entries -- matching inode !found */
+ } else if (r < 0) {
+ return(r); /* error */
+ }
+
+ consumed = 0; /* bytes consumed */
+ totalbytes = r; /* number of bytes to consume */
+
+ do {
+ cur = (struct dirent *) (buf + consumed);
+ if (entry->v_inode_nr == cur->d_ino) {
+ /* found the entry we were looking for */
+ strncpy(ename, cur->d_name, NAME_MAX);
+ ename[NAME_MAX] = '\0';
+ return(OK);
+ }
+
+ /* not a match -- move on to the next dirent */
+ consumed += cur->d_reclen;
+ } while (consumed < totalbytes);
+
+ pos = new_pos;
+ } while (1);
+}
+
+/*===========================================================================*
+ * canonical_path *
+ *===========================================================================*/
+PUBLIC int canonical_path(orig_path, canon_path, rfp)
+char *orig_path;
+char canon_path[PATH_MAX+1]; /* should have length PATH_MAX+1 */
+struct fproc *rfp;
+{
+ int len = 0;
+ int r, symloop = 0;
+ struct vnode *dir_vp, *parent_dir;
+ struct vmnt *dir_vmp, *parent_vmp;
+ char component[NAME_MAX+1];
+ char link_path[PATH_MAX+1];
+ char temp_path[PATH_MAX+1];
+ struct lookup resolve;
+
+ dir_vp = NULL;
+ strncpy(temp_path, orig_path, PATH_MAX);
+
+ do {
+ if (dir_vp) {
+ unlock_vnode(dir_vp);
+ unlock_vmnt(dir_vmp);
+ put_vnode(dir_vp);
+ }
+
+ /* Resolve to the last directory holding the file */
+ lookup_init(&resolve, temp_path, PATH_NOFLAGS, &dir_vmp, &dir_vp);
+ resolve.l_vmnt_lock = VMNT_READ;
+ resolve.l_vnode_lock = VNODE_READ;
+ if ((dir_vp = last_dir(&resolve, rfp)) == NULL) return(err_code);
+
+ /* dir_vp points to dir and resolve path now contains only the
+ * filename.
+ */
+ strcpy(canon_path, resolve.l_path); /* Store file name */
+
+ /* check if the file is a symlink, if so resolve it */
+ r = rdlink_direct(canon_path, link_path, rfp);
+ if (r <= 0) {
+ strcpy(temp_path, canon_path);
+ break;
+ }
+
+ /* encountered a symlink -- loop again */
+ strcpy(temp_path, link_path);
+
+ symloop++;
+ } while (symloop < SYMLOOP_MAX);
+
+ if (symloop >= SYMLOOP_MAX) {
+ if (dir_vp) {
+ unlock_vnode(dir_vp);
+ unlock_vmnt(dir_vmp);
+ put_vnode(dir_vp);
+ }
+ return(ELOOP);
+ }
+
+ while(dir_vp != rfp->fp_rd) {
+
+ strcpy(temp_path, "..");
+
+ /* check if we're at the root node of the file system */
+ if (dir_vp->v_vmnt->m_root_node == dir_vp) {
+ unlock_vnode(dir_vp);
+ unlock_vmnt(dir_vmp);
+ put_vnode(dir_vp);
+ dir_vp = dir_vp->v_vmnt->m_mounted_on;
+ dir_vmp = dir_vp->v_vmnt;
+ assert(lock_vmnt(dir_vmp, VMNT_READ) == OK);
+ assert(lock_vnode(dir_vp, VNODE_READ) == OK);
+ dup_vnode(dir_vp);
+ }
+
+ lookup_init(&resolve, temp_path, PATH_NOFLAGS, &parent_vmp,
+ &parent_dir);
+ resolve.l_vmnt_lock = VMNT_READ;
+ resolve.l_vnode_lock = VNODE_READ;
+
+ if ((parent_dir = advance(dir_vp, &resolve, rfp)) == NULL) {
+ unlock_vnode(dir_vp);
+ unlock_vmnt(dir_vmp);
+ put_vnode(dir_vp);
+ return(err_code);
+ }
+
+ /* now we have to retrieve the name of the parent directory */
+ if (get_name(parent_dir, dir_vp, component) != OK) {
+ unlock_vnode(parent_dir);
+ unlock_vmnt(parent_vmp);
+ unlock_vnode(dir_vp);
+ unlock_vmnt(dir_vmp);
+ put_vnode(parent_dir);
+ put_vnode(dir_vp);
+ return(ENOENT);
+ }
+
+ len += strlen(component) + 1;
+ if (len > PATH_MAX) {
+ /* adding the component to canon_path would exceed PATH_MAX */
+ unlock_vnode(parent_dir);
+ unlock_vmnt(parent_vmp);
+ unlock_vnode(dir_vp);
+ unlock_vmnt(dir_vmp);
+ put_vnode(parent_dir);
+ put_vnode(dir_vp);
+ return(ENOMEM);
+ }
+
+ /* store result of component in canon_path */
+
+ /* first make space by moving the contents of canon_path to
+ * the right. Move strlen + 1 bytes to include the terminating '\0'.
+ */
+ memmove(canon_path+strlen(component)+1, canon_path,
+ strlen(canon_path) + 1);
+
+ /* Copy component into canon_path */
+ memmove(canon_path, component, strlen(component));
+
+ /* Put slash into place */
+ canon_path[strlen(component)] = '/';
+
+ /* Store parent_dir result, and continue the loop once more */
+ unlock_vnode(dir_vp);
+ unlock_vmnt(dir_vmp);
+ put_vnode(dir_vp);
+ dir_vp = parent_dir;
+ }
+
+ unlock_vnode(dir_vp);
+ unlock_vmnt(parent_vmp);
+
+ put_vnode(dir_vp);
+
+ /* add the leading slash */
+ if (strlen(canon_path) >= PATH_MAX) return(ENAMETOOLONG);
+ memmove(canon_path+1, canon_path, strlen(canon_path));
+ canon_path[0] = '/';
+
+ return(OK);
+}
+
+/*===========================================================================*
+ * check_perms *
+ *===========================================================================*/
+PRIVATE int check_perms(ep, io_gr, pathlen)
+endpoint_t ep;
+cp_grant_id_t io_gr;
+size_t pathlen;
+{
+ int r, slot;
+ struct vnode *vp;
+ struct vmnt *vmp;
+ struct fproc *rfp;
+ char orig_path[PATH_MAX+1];
+ char canon_path[PATH_MAX+1];
+ char temp_path[PATH_MAX+1];
+ struct lookup resolve;
+
+ if (isokendpt(ep, &slot) != OK) return(EINVAL);
+ if (pathlen < UNIX_PATH_MAX || pathlen > PATH_MAX) return(EINVAL);
+
+ rfp = &(fproc[slot]);
+ memset(canon_path, '\0', PATH_MAX+1);
+
+ r = sys_safecopyfrom(PFS_PROC_NR, io_gr, (vir_bytes) 0,
+ (vir_bytes) temp_path, pathlen, D);
+ if (r != OK) return(r);
+
+ temp_path[pathlen] = '\0';
+
+ /* save path from pfs before permissions checking modifies it */
+ memcpy(orig_path, temp_path, PATH_MAX+1);
+
+ /* get the canonical path to the socket file */
+ if ((r = canonical_path(orig_path, canon_path, rfp)) != OK)
+ return(r);
+
+ if (strlen(canon_path) >= pathlen) return(ENAMETOOLONG);
+
+ /* copy canon_path back to PFS */
+ r = sys_safecopyto(PFS_PROC_NR, (cp_grant_id_t) io_gr, (vir_bytes) 0,
+ (vir_bytes) canon_path, strlen(canon_path)+1,
+ D);
+ if (r != OK) return(r);
+
+ /* reload user_fullpath for permissions checking */
+ memcpy(temp_path, orig_path, PATH_MAX+1);
+ lookup_init(&resolve, temp_path, PATH_NOFLAGS, &vmp, &vp);
+ resolve.l_vmnt_lock = VMNT_READ;
+ resolve.l_vnode_lock = VNODE_READ;
+
+ if ((vp = eat_path(&resolve, rfp)) == NULL) return(err_code);
+
+ /* check permissions */
+ r = forbidden(vp, (R_BIT | W_BIT));
+
+ unlock_vnode(vp);
+ unlock_vmnt(vmp);
+
+ put_vnode(vp);
+ return(r);
+}
+
+/*===========================================================================*
+ * do_check_perms *
+ *===========================================================================*/
+PUBLIC int do_check_perms(void)
+{
+ return check_perms(m_in.USER_ENDPT, (cp_grant_id_t) m_in.IO_GRANT,
+ (size_t) m_in.COUNT);
+}
--- /dev/null
+#ifndef __VFS_PATH_H__
+#define __VFS_PATH_H__
+
+struct lookup {
+ char *l_path; /* Path to lookup */
+ int l_flags; /* VFS/FS flags (see <minix/vfsif.h>) */
+ tll_access_t l_vmnt_lock; /* Lock to obtain on vmnt */
+ tll_access_t l_vnode_lock; /* Lock to obtain on vnode */
+ struct vmnt **l_vmp; /* vmnt object that was locked */
+ struct vnode **l_vnode; /* vnode object that was locked */
+};
+
+#endif
--- /dev/null
+/* This file deals with the suspension and revival of processes. A process can
+ * be suspended because it wants to read or write from a pipe and can't, or
+ * because it wants to read or write from a special file and can't. When a
+ * process can't continue it is suspended, and revived later when it is able
+ * to continue.
+ *
+ * The entry points into this file are
+ * do_pipe: perform the PIPE system call
+ * pipe_check: check to see that a read or write on a pipe is feasible now
+ * suspend: suspend a process that cannot do a requested read or write
+ * release: check to see if a suspended process can be released and do
+ * it
+ * revive: mark a suspended process as able to run again
+ * unsuspend_by_endpt: revive all processes blocking on a given process
+ * do_unpause: a signal has been sent to a process; see if it suspended
+ */
+
+#include "fs.h"
+#include <fcntl.h>
+#include <signal.h>
+#include <assert.h>
+#include <minix/callnr.h>
+#include <minix/endpoint.h>
+#include <minix/com.h>
+#include <minix/u64.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include "file.h"
+#include "fproc.h"
+#include "dmap.h"
+#include "param.h"
+#include "select.h"
+#include <minix/vfsif.h>
+#include "vnode.h"
+#include "vmnt.h"
+
+
+/*===========================================================================*
+ * do_pipe *
+ *===========================================================================*/
+PUBLIC int do_pipe()
+{
+/* Perform the pipe(fil_des) system call. */
+
+ register struct fproc *rfp;
+ int r;
+ struct filp *fil_ptr0, *fil_ptr1;
+ int fil_des[2]; /* reply goes here */
+ struct vnode *vp;
+ struct vmnt *vmp;
+ struct node_details res;
+
+ /* See if a free vnode is available */
+ if ((vp = get_free_vnode()) == NULL) return(err_code);
+ lock_vnode(vp, VNODE_OPCL);
+
+ /* Get a lock on PFS */
+ if ((vmp = find_vmnt(PFS_PROC_NR)) == NULL) panic("PFS gone");
+ lock_vmnt(vmp, VMNT_WRITE);
+
+ /* Acquire two file descriptors. */
+ rfp = fp;
+ if ((r = get_fd(0, R_BIT, &fil_des[0], &fil_ptr0)) != OK) {
+ unlock_vnode(vp);
+ unlock_vmnt(vmp);
+ return(r);
+ }
+ rfp->fp_filp[fil_des[0]] = fil_ptr0;
+ FD_SET(fil_des[0], &rfp->fp_filp_inuse);
+ fil_ptr0->filp_count = 1; /* mark filp in use */
+ if ((r = get_fd(0, W_BIT, &fil_des[1], &fil_ptr1)) != OK) {
+ rfp->fp_filp[fil_des[0]] = NULL;
+ FD_CLR(fil_des[0], &rfp->fp_filp_inuse);
+ fil_ptr0->filp_count = 0; /* mark filp free */
+ unlock_filp(fil_ptr0);
+ unlock_vnode(vp);
+ unlock_vmnt(vmp);
+ return(r);
+ }
+ rfp->fp_filp[fil_des[1]] = fil_ptr1;
+ FD_SET(fil_des[1], &rfp->fp_filp_inuse);
+ fil_ptr1->filp_count = 1;
+
+ /* Create a named pipe inode on PipeFS */
+ r = req_newnode(PFS_PROC_NR, fp->fp_effuid, fp->fp_effgid, I_NAMED_PIPE,
+ NO_DEV, &res);
+
+ if (r != OK) {
+ rfp->fp_filp[fil_des[0]] = NULL;
+ FD_CLR(fil_des[0], &rfp->fp_filp_inuse);
+ fil_ptr0->filp_count = 0;
+ rfp->fp_filp[fil_des[1]] = NULL;
+ FD_CLR(fil_des[1], &rfp->fp_filp_inuse);
+ fil_ptr1->filp_count = 0;
+ unlock_filp(fil_ptr1);
+ unlock_filp(fil_ptr0);
+ unlock_vnode(vp);
+ unlock_vmnt(vmp);
+ return(r);
+ }
+
+ /* Fill in vnode */
+ vp->v_fs_e = res.fs_e;
+ vp->v_mapfs_e = res.fs_e;
+ vp->v_inode_nr = res.inode_nr;
+ vp->v_mapinode_nr = res.inode_nr;
+ vp->v_mode = res.fmode;
+ vp->v_pipe = I_PIPE;
+ vp->v_pipe_rd_pos= 0;
+ vp->v_pipe_wr_pos= 0;
+ vp->v_fs_count = 1;
+ vp->v_mapfs_count = 1;
+ vp->v_ref_count = 1;
+ vp->v_size = 0;
+ vp->v_vmnt = NULL;
+ vp->v_dev = NO_DEV;
+
+ /* Fill in filp objects */
+ fil_ptr0->filp_vno = vp;
+ dup_vnode(vp);
+ fil_ptr1->filp_vno = vp;
+ fil_ptr0->filp_flags = O_RDONLY;
+ fil_ptr1->filp_flags = O_WRONLY;
+
+ m_out.reply_i1 = fil_des[0];
+ m_out.reply_i2 = fil_des[1];
+
+ unlock_filps(fil_ptr0, fil_ptr1);
+ unlock_vmnt(vmp);
+
+ return(OK);
+}
+
+
+/*===========================================================================*
+ * map_vnode *
+ *===========================================================================*/
+PUBLIC int map_vnode(vp, map_to_fs_e)
+struct vnode *vp;
+endpoint_t map_to_fs_e;
+{
+ int r;
+ struct vmnt *vmp;
+ struct node_details res;
+
+ if(vp->v_mapfs_e != NONE) return(OK); /* Already mapped; nothing to do. */
+
+ if ((vmp = find_vmnt(map_to_fs_e)) == NULL)
+ panic("Can't map to unknown endpoint");
+ if (lock_vmnt(vmp, VMNT_WRITE) == EBUSY)
+ vmp = NULL; /* Already locked, do not unlock */
+
+ /* Create a temporary mapping of this inode to another FS. Read and write
+ * operations on data will be handled by that FS. The rest by the 'original'
+ * FS that holds the inode. */
+ if ((r = req_newnode(map_to_fs_e, fp->fp_effuid, fp->fp_effgid, I_NAMED_PIPE,
+ vp->v_dev, &res)) == OK) {
+ vp->v_mapfs_e = res.fs_e;
+ vp->v_mapinode_nr = res.inode_nr;
+ vp->v_mapfs_count = 1;
+ }
+
+ if (vmp) unlock_vmnt(vmp);
+
+ return(r);
+}
+
+/*===========================================================================*
+ * pipe_check *
+ *===========================================================================*/
+PUBLIC int pipe_check(vp, rw_flag, oflags, bytes, position, notouch)
+register struct vnode *vp; /* the inode of the pipe */
+int rw_flag; /* READING or WRITING */
+int oflags; /* flags set by open or fcntl */
+register int bytes; /* bytes to be read or written (all chunks) */
+u64_t position; /* current file position */
+int notouch; /* check only */
+{
+/* Pipes are a little different. If a process reads from an empty pipe for
+ * which a writer still exists, suspend the reader. If the pipe is empty
+ * and there is no writer, return 0 bytes. If a process is writing to a
+ * pipe and no one is reading from it, give a broken pipe error.
+ */
+ off_t pos;
+ int r = OK;
+
+ if (ex64hi(position) != 0)
+ panic("pipe_check: position too large in pipe");
+ pos = ex64lo(position);
+
+ /* If reading, check for empty pipe. */
+ if (rw_flag == READING) {
+ if (pos >= vp->v_size) {
+ /* Process is reading from an empty pipe. */
+ if (find_filp(vp, W_BIT) != NULL) {
+ /* Writer exists */
+ if (oflags & O_NONBLOCK)
+ r = EAGAIN;
+ else
+ r = SUSPEND;
+
+ /* If need be, activate sleeping writers. */
+ if (susp_count > 0)
+ release(vp, WRITE, susp_count);
+ }
+ return(r);
+ }
+ return(bytes);
+ }
+
+ /* Process is writing to a pipe. */
+ if (find_filp(vp, R_BIT) == NULL) {
+ /* Process is writing, but there is no reader. Tell kernel to generate
+ * a SIGPIPE signal. */
+ if (!notouch) sys_kill(fp->fp_endpoint, SIGPIPE);
+
+ return(EPIPE);
+ }
+
+ /* Calculate how many bytes can be written. */
+ if (pos + bytes > PIPE_BUF) {
+ if (oflags & O_NONBLOCK) {
+ if (bytes <= PIPE_BUF) {
+ /* Write has to be atomic */
+ return(EAGAIN);
+ }
+
+ /* Compute available space */
+ bytes = PIPE_BUF - pos;
+
+ if (bytes > 0) {
+ /* Do a partial write. Need to wakeup reader */
+ if (!notouch)
+ release(vp, READ, susp_count);
+ return(bytes);
+ } else {
+ /* Pipe is full */
+ return(EAGAIN);
+ }
+ }
+
+ if (bytes > PIPE_BUF) {
+ /* Compute available space */
+ bytes = PIPE_BUF - pos;
+
+ if (bytes > 0) {
+ /* Do a partial write. Need to wakeup reader
+ * since we'll suspend ourself in read_write()
+ */
+ if (!notouch)
+ release(vp, READ, susp_count);
+ return(bytes);
+ }
+ }
+
+ /* Pipe is full */
+ return(SUSPEND);
+ }
+
+ /* Writing to an empty pipe. Search for suspended reader. */
+ if (pos == 0 && !notouch)
+ release(vp, READ, susp_count);
+
+ /* Requested amount fits */
+ return(bytes);
+}
+
+
+/*===========================================================================*
+ * suspend *
+ *===========================================================================*/
+PUBLIC void suspend(int why)
+{
+/* Take measures to suspend the processing of the present system call.
+ * Store the parameters to be used upon resuming in the process table.
+ * (Actually they are not used when a process is waiting for an I/O device,
+ * but they are needed for pipes, and it is not worth making the distinction.)
+ * The SUSPEND pseudo error should be returned after calling suspend().
+ */
+
+#if DO_SANITYCHECKS
+ if (why == FP_BLOCKED_ON_PIPE)
+ panic("suspend: called for FP_BLOCKED_ON_PIPE");
+
+ if(fp_is_blocked(fp))
+ panic("suspend: called for suspended process");
+
+ if(why == FP_BLOCKED_ON_NONE)
+ panic("suspend: called for FP_BLOCKED_ON_NONE");
+#endif
+
+ if (why == FP_BLOCKED_ON_POPEN)
+ /* #procs susp'ed on pipe*/
+ susp_count++;
+
+ fp->fp_blocked_on = why;
+ assert(fp->fp_grant == GRANT_INVALID || !GRANT_VALID(fp->fp_grant));
+ fp->fp_block_fd = m_in.fd;
+ fp->fp_block_callnr = call_nr;
+ fp->fp_flags &= ~FP_SUSP_REOPEN; /* Clear this flag. The caller
+ * can set it when needed.
+ */
+ if (why == FP_BLOCKED_ON_LOCK) {
+ fp->fp_buffer = (char *) m_in.name1; /* third arg to fcntl() */
+ fp->fp_nbytes = m_in.request; /* second arg to fcntl() */
+ } else {
+ fp->fp_buffer = m_in.buffer; /* for reads and writes */
+ fp->fp_nbytes = m_in.nbytes;
+ }
+}
+
+/*===========================================================================*
+ * wait_for *
+ *===========================================================================*/
+PUBLIC void wait_for(endpoint_t who)
+{
+ if(who == NONE || who == ANY)
+ panic("suspend on NONE or ANY");
+ suspend(FP_BLOCKED_ON_OTHER);
+ fp->fp_task = who;
+}
+
+
+/*===========================================================================*
+ * pipe_suspend *
+ *===========================================================================*/
+PUBLIC void pipe_suspend(rw_flag, fd_nr, buf, size)
+int rw_flag;
+int fd_nr;
+char *buf;
+size_t size;
+{
+/* Take measures to suspend the processing of the present system call.
+ * Store the parameters to be used upon resuming in the process table.
+ * (Actually they are not used when a process is waiting for an I/O device,
+ * but they are needed for pipes, and it is not worth making the distinction.)
+ * The SUSPEND pseudo error should be returned after calling suspend().
+ */
+#if DO_SANITYCHECKS
+ if(fp_is_blocked(fp))
+ panic("pipe_suspend: called for suspended process");
+#endif
+
+ susp_count++; /* #procs susp'ed on pipe*/
+ fp->fp_blocked_on = FP_BLOCKED_ON_PIPE;
+ assert(!GRANT_VALID(fp->fp_grant));
+ fp->fp_block_fd = fd_nr;
+ fp->fp_block_callnr = ((rw_flag == READING) ? READ : WRITE);
+ fp->fp_buffer = buf;
+ fp->fp_nbytes = size;
+}
+
+
+/*===========================================================================*
+ * unsuspend_by_endpt *
+ *===========================================================================*/
+PUBLIC void unsuspend_by_endpt(endpoint_t proc_e)
+{
+/* Revive processes waiting for drivers (SUSPENDed) that have disappeared with
+ * return code EAGAIN.
+ */
+ struct fproc *rp;
+
+ for (rp = &fproc[0]; rp < &fproc[NR_PROCS]; rp++) {
+ if (rp->fp_pid == PID_FREE) continue;
+ if (rp->fp_blocked_on == FP_BLOCKED_ON_OTHER && rp->fp_task == proc_e)
+ revive(rp->fp_endpoint, EAGAIN);
+ }
+
+ /* Revive processes waiting in drivers on select()s with EAGAIN too */
+ select_unsuspend_by_endpt(proc_e);
+
+ return;
+}
+
+
+/*===========================================================================*
+ * release *
+ *===========================================================================*/
+PUBLIC void release(vp, op, count)
+register struct vnode *vp; /* inode of pipe */
+int op; /* READ, WRITE, OPEN or CREAT */
+int count; /* max number of processes to release */
+{
+/* Check to see if any process is hanging on the pipe whose inode is in 'ip'.
+ * If one is, and it was trying to perform the call indicated by 'call_nr',
+ * release it.
+ */
+
+ register struct fproc *rp;
+ struct filp *f;
+ int selop;
+
+ /* Trying to perform the call also includes SELECTing on it with that
+ * operation.
+ */
+ if (op == READ || op == WRITE) {
+ if (op == READ)
+ selop = SEL_RD;
+ else
+ selop = SEL_WR;
+
+ for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
+ if (f->filp_count < 1 || !(f->filp_pipe_select_ops & selop) ||
+ f->filp_vno != vp)
+ continue;
+ select_callback(f, selop);
+ f->filp_pipe_select_ops &= ~selop;
+ }
+ }
+
+ /* Search the proc table. */
+ for (rp = &fproc[0]; rp < &fproc[NR_PROCS] && count > 0; rp++) {
+ if (rp->fp_pid != PID_FREE && fp_is_blocked(rp) &&
+ !(rp->fp_flags & FP_REVIVED) && rp->fp_block_callnr == op &&
+ rp->fp_filp[rp->fp_block_fd] != NULL &&
+ rp->fp_filp[rp->fp_block_fd]->filp_vno == vp) {
+ revive(rp->fp_endpoint, 0);
+ susp_count--; /* keep track of who is suspended */
+ if(susp_count < 0)
+ panic("susp_count now negative: %d", susp_count);
+ if (--count == 0) return;
+ }
+ }
+}
+
+
+/*===========================================================================*
+ * revive *
+ *===========================================================================*/
+PUBLIC void revive(proc_nr_e, returned)
+int proc_nr_e; /* process to revive */
+int returned; /* if hanging on task, how many bytes read */
+{
+/* Revive a previously blocked process. When a process hangs on tty, this
+ * is the way it is eventually released.
+ */
+ register struct fproc *rfp;
+ int blocked_on;
+ int fd_nr, slot;
+ struct filp *fil_ptr;
+
+ if (proc_nr_e == NONE || isokendpt(proc_nr_e, &slot) != OK) return;
+
+ rfp = &fproc[slot];
+ if (!fp_is_blocked(rfp) || (rfp->fp_flags & FP_REVIVED)) return;
+
+ /* The 'reviving' flag only applies to pipes. Processes waiting for TTY get
+ * a message right away. The revival process is different for TTY and pipes.
+ * For select and TTY revival, the work is already done, for pipes it is not:
+ * the proc must be restarted so it can try again.
+ */
+ blocked_on = rfp->fp_blocked_on;
+ if (blocked_on == FP_BLOCKED_ON_PIPE || blocked_on == FP_BLOCKED_ON_LOCK) {
+ /* Revive a process suspended on a pipe or lock. */
+ rfp->fp_flags |= FP_REVIVED;
+ reviving++; /* process was waiting on pipe or lock */
+ } else if (blocked_on == FP_BLOCKED_ON_DOPEN) {
+ rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
+ fd_nr = rfp->fp_block_fd;
+ if (returned < 0) {
+ fil_ptr = rfp->fp_filp[fd_nr];
+ lock_filp(fil_ptr, VNODE_OPCL);
+ rfp->fp_filp[fd_nr] = NULL;
+ FD_CLR(fd_nr, &rfp->fp_filp_inuse);
+ if (fil_ptr->filp_count != 1) {
+ panic("VFS: revive: bad count in filp: %d",
+ fil_ptr->filp_count);
+ }
+ fil_ptr->filp_count = 0;
+ unlock_filp(fil_ptr);
+ put_vnode(fil_ptr->filp_vno);
+ fil_ptr->filp_vno = NULL;
+ reply(proc_nr_e, returned);
+ } else {
+ reply(proc_nr_e, fd_nr);
+ }
+ } else {
+ rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
+ if (blocked_on == FP_BLOCKED_ON_POPEN) {
+ /* process blocked in open or create */
+ reply(proc_nr_e, rfp->fp_block_fd);
+ } else if (blocked_on == FP_BLOCKED_ON_SELECT) {
+ reply(proc_nr_e, returned);
+ } else {
+ /* Revive a process suspended on TTY or other device.
+ * Pretend it wants only what there is.
+ */
+ rfp->fp_nbytes = returned;
+ /* If a grant has been issued by FS for this I/O, revoke
+ * it again now that I/O is done.
+ */
+ if (GRANT_VALID(rfp->fp_grant)) {
+ if(cpf_revoke(rfp->fp_grant)) {
+ panic("VFS: revoke failed for grant: %d",
+ rfp->fp_grant);
+ }
+ rfp->fp_grant = GRANT_INVALID;
+ }
+ reply(proc_nr_e, returned); /* unblock the process */
+ }
+ }
+}
+
+
+/*===========================================================================*
+ * unpause *
+ *===========================================================================*/
+PUBLIC void unpause(proc_nr_e)
+int proc_nr_e;
+{
+/* A signal has been sent to a user who is paused on the file system.
+ * Abort the system call with the EINTR error message.
+ */
+
+ register struct fproc *rfp, *org_fp;
+ int slot, blocked_on, fild, status = EINTR, major_dev, minor_dev;
+ struct filp *f;
+ dev_t dev;
+ message mess;
+ int wasreviving = 0;
+
+ if (isokendpt(proc_nr_e, &slot) != OK) {
+ printf("VFS: ignoring unpause for bogus endpoint %d\n", proc_nr_e);
+ return;
+ }
+
+ rfp = &fproc[slot];
+ if (!fp_is_blocked(rfp)) return;
+ blocked_on = rfp->fp_blocked_on;
+
+ if (rfp->fp_flags & FP_REVIVED) {
+ rfp->fp_flags &= ~FP_REVIVED;
+ reviving--;
+ wasreviving = 1;
+ }
+
+ switch (blocked_on) {
+ case FP_BLOCKED_ON_PIPE:/* process trying to read or write a pipe */
+ break;
+
+ case FP_BLOCKED_ON_LOCK:/* process trying to set a lock with FCNTL */
+ break;
+
+ case FP_BLOCKED_ON_SELECT:/* process blocking on select() */
+ select_forget(proc_nr_e);
+ break;
+
+ case FP_BLOCKED_ON_POPEN: /* process trying to open a fifo */
+ break;
+
+ case FP_BLOCKED_ON_DOPEN:/* process trying to open a device */
+ /* Don't cancel OPEN. Just wait until the open completes. */
+ return;
+
+ case FP_BLOCKED_ON_OTHER:/* process trying to do device I/O (e.g. tty)*/
+ if (rfp->fp_flags & FP_SUSP_REOPEN) {
+ /* Process is suspended while waiting for a reopen.
+ * Just reply EINTR.
+ */
+ rfp->fp_flags &= ~FP_SUSP_REOPEN;
+ status = EINTR;
+ break;
+ }
+
+ fild = rfp->fp_block_fd;
+ if (fild < 0 || fild >= OPEN_MAX)
+ panic("file descriptor out-of-range");
+ f = rfp->fp_filp[fild];
+ dev = (dev_t) f->filp_vno->v_sdev; /* device hung on */
+ major_dev = major(dev);
+ minor_dev = minor(dev);
+ mess.TTY_LINE = minor_dev;
+ mess.USER_ENDPT = rfp->fp_ioproc;
+ mess.IO_GRANT = (char *) rfp->fp_grant;
+
+ /* Tell kernel R or W. Mode is from current call, not open. */
+ mess.COUNT = rfp->fp_block_callnr == READ ? R_BIT : W_BIT;
+ mess.m_type = CANCEL;
+
+ org_fp = fp;
+ fp = rfp; /* hack - ctty_io uses fp */
+ (*dmap[major_dev].dmap_io)(rfp->fp_task, &mess);
+ fp = org_fp;
+ status = mess.REP_STATUS;
+ if (status == SUSPEND)
+ return; /* Process will be revived at a
+ * later time.
+ */
+
+ if (status == EAGAIN) status = EINTR;
+ if (GRANT_VALID(rfp->fp_grant)) {
+ (void) cpf_revoke(rfp->fp_grant);
+ rfp->fp_grant = GRANT_INVALID;
+ }
+ break;
+ default :
+ panic("VFS: unknown block reason: %d", blocked_on);
+ }
+
+ rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
+
+ if ((blocked_on == FP_BLOCKED_ON_PIPE || blocked_on == FP_BLOCKED_ON_POPEN)&&
+ !wasreviving) {
+ susp_count--;
+ }
+
+ reply(proc_nr_e, status); /* signal interrupted call */
+}
+
+#if DO_SANITYCHECKS
+/*===========================================================================*
+ * check_pipe *
+ *===========================================================================*/
+PUBLIC int check_pipe(void)
+{
+/* Integrity check; verify that susp_count equals what the fproc table thinks
+ * is suspended on a pipe */
+ struct fproc *rfp;
+ int count = 0;
+ for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+ if (rfp->fp_pid == PID_FREE) continue;
+ if ( !(rfp->fp_flags & FP_REVIVED) &&
+ (rfp->fp_blocked_on == FP_BLOCKED_ON_PIPE ||
+ rfp->fp_blocked_on == FP_BLOCKED_ON_POPEN)) {
+ count++;
+ }
+ }
+
+ if (count != susp_count) {
+ printf("check_pipe: count %d susp_count %d\n", count, susp_count);
+ return(0);
+ }
+
+ return(l);
+}
+#endif
--- /dev/null
+/* This file deals with protection in the file system. It contains the code
+ * for four system calls that relate to protection.
+ *
+ * The entry points into this file are
+ * do_chmod: perform the CHMOD and FCHMOD system calls
+ * do_chown: perform the CHOWN and FCHOWN system calls
+ * do_umask: perform the UMASK system call
+ * do_access: perform the ACCESS system call
+ */
+
+#include "fs.h"
+#include <unistd.h>
+#include <minix/callnr.h>
+#include "file.h"
+#include "fproc.h"
+#include "path.h"
+#include "param.h"
+#include <minix/vfsif.h>
+#include "vnode.h"
+#include "vmnt.h"
+
+/*===========================================================================*
+ * do_chmod *
+ *===========================================================================*/
+PUBLIC int do_chmod()
+{
+/* Perform the chmod(name, mode) and fchmod(fd, mode) system calls. */
+
+ struct filp *flp;
+ struct vnode *vp;
+ struct vmnt *vmp;
+ int r;
+ mode_t new_mode;
+ char fullpath[PATH_MAX+1];
+ struct lookup resolve;
+
+ flp = NULL;
+
+ lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+ resolve.l_vmnt_lock = VMNT_WRITE;
+ resolve.l_vnode_lock = VNODE_WRITE;
+
+ if (call_nr == CHMOD) {
+ /* Temporarily open the file */
+ if (fetch_name(m_in.name, m_in.name_length, M3, fullpath) != OK)
+ return(err_code);
+ if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
+ } else { /* call_nr == FCHMOD */
+ /* File is already opened; get a pointer to vnode from filp. */
+ if ((flp = get_filp(m_in.fd, VNODE_WRITE)) == NULL)
+ return(err_code);
+ vp = flp->filp_vno;
+ dup_vnode(vp);
+ }
+
+ /* Only the owner or the super_user may change the mode of a file.
+ * No one may change the mode of a file on a read-only file system.
+ */
+ if (vp->v_uid != fp->fp_effuid && fp->fp_effuid != SU_UID)
+ r = EPERM;
+ else
+ r = read_only(vp);
+
+ if (r == OK) {
+ /* Now make the change. Clear setgid bit if file is not in caller's
+ * group */
+ if (fp->fp_effuid != SU_UID && vp->v_gid != fp->fp_effgid)
+ m_in.mode &= ~I_SET_GID_BIT;
+
+ r = req_chmod(vp->v_fs_e, vp->v_inode_nr, m_in.mode, &new_mode);
+ if (r == OK)
+ vp->v_mode = new_mode;
+ }
+
+ if (call_nr == CHMOD) {
+ unlock_vnode(vp);
+ unlock_vmnt(vmp);
+ } else { /* FCHMOD */
+ unlock_filp(flp);
+ }
+
+ put_vnode(vp);
+ return(r);
+}
+
+
+/*===========================================================================*
+ * do_chown *
+ *===========================================================================*/
+PUBLIC int do_chown()
+{
+/* Perform the chown(path, owner, group) and fchmod(fd, owner, group) system
+ * calls. */
+ struct filp *flp;
+ struct vnode *vp;
+ struct vmnt *vmp;
+ int r;
+ uid_t uid;
+ gid_t gid;
+ mode_t new_mode;
+ char fullpath[PATH_MAX+1];
+ struct lookup resolve;
+
+ flp = NULL;
+
+ lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+ resolve.l_vmnt_lock = VMNT_WRITE;
+ resolve.l_vnode_lock = VNODE_WRITE;
+
+ if (call_nr == CHOWN) {
+ /* Temporarily open the file. */
+ if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
+ return(err_code);
+ if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
+ } else { /* call_nr == FCHOWN */
+ /* File is already opened; get a pointer to the vnode from filp. */
+ if ((flp = get_filp(m_in.fd, VNODE_WRITE)) == NULL)
+ return(err_code);
+ vp = flp->filp_vno;
+ dup_vnode(vp);
+ }
+
+ r = read_only(vp);
+ if (r == OK) {
+ /* FS is R/W. Whether call is allowed depends on ownership, etc. */
+ /* The super user can do anything, so check permissions only if we're
+ a regular user. */
+ if (fp->fp_effuid != SU_UID) {
+ /* Regular users can only change groups of their own files. */
+ if (vp->v_uid != fp->fp_effuid) r = EPERM;
+ if (vp->v_uid != m_in.owner) r = EPERM; /* no giving away */
+ if (fp->fp_effgid != m_in.group) r = EPERM;
+ }
+ }
+
+ if (r == OK) {
+ /* Do not change uid/gid if new uid/gid is -1. */
+ uid = (m_in.owner == (uid_t)-1 ? vp->v_uid : m_in.owner);
+ gid = (m_in.group == (gid_t)-1 ? vp->v_gid : m_in.group);
+ if ((r = req_chown(vp->v_fs_e, vp->v_inode_nr, uid, gid,
+ &new_mode)) == OK) {
+ vp->v_uid = uid;
+ vp->v_gid = gid;
+ vp->v_mode = new_mode;
+ }
+ }
+
+ if (call_nr == CHOWN) {
+ unlock_vnode(vp);
+ unlock_vmnt(vmp);
+ } else { /* FCHOWN */
+ unlock_filp(flp);
+ }
+
+ put_vnode(vp);
+ return(r);
+}
+
+
+/*===========================================================================*
+ * do_umask *
+ *===========================================================================*/
+PUBLIC int do_umask()
+{
+/* Perform the umask(co_mode) system call. */
+ register mode_t r;
+
+ r = ~fp->fp_umask; /* set 'r' to complement of old mask */
+ fp->fp_umask = ~(m_in.co_mode & RWX_MODES);
+ return(r); /* return complement of old mask */
+}
+
+
+/*===========================================================================*
+ * do_access *
+ *===========================================================================*/
+PUBLIC int do_access()
+{
+/* Perform the access(name, mode) system call. */
+ int r;
+ struct vnode *vp;
+ struct vmnt *vmp;
+ char fullpath[PATH_MAX+1];
+ struct lookup resolve;
+
+ lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+ resolve.l_vmnt_lock = VMNT_READ;
+ resolve.l_vnode_lock = VNODE_READ;
+
+ /* First check to see if the mode is correct. */
+ if ( (m_in.mode & ~(R_OK | W_OK | X_OK)) != 0 && m_in.mode != F_OK)
+ return(EINVAL);
+
+ /* Temporarily open the file. */
+ if (fetch_name(m_in.name, m_in.name_length, M3, fullpath) != OK)
+ return(err_code);
+ if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
+
+ r = forbidden(vp, m_in.mode);
+
+ unlock_vnode(vp);
+ unlock_vmnt(vmp);
+
+ put_vnode(vp);
+ return(r);
+}
+
+
+/*===========================================================================*
+ * forbidden *
+ *===========================================================================*/
+PUBLIC int forbidden(struct vnode *vp, mode_t access_desired)
+{
+/* Given a pointer to an vnode, 'vp', and the access desired, determine
+ * if the access is allowed, and if not why not. The routine looks up the
+ * caller's uid in the 'fproc' table. If access is allowed, OK is returned
+ * if it is forbidden, EACCES is returned.
+ */
+
+ register mode_t bits, perm_bits;
+ uid_t uid;
+ gid_t gid;
+ int r, shift;
+
+ if (vp->v_uid == (uid_t) -1 || vp->v_gid == (gid_t) -1) return(EACCES);
+
+ /* Isolate the relevant rwx bits from the mode. */
+ bits = vp->v_mode;
+ uid = (call_nr == ACCESS ? fp->fp_realuid : fp->fp_effuid);
+ gid = (call_nr == ACCESS ? fp->fp_realgid : fp->fp_effgid);
+
+ if (uid == SU_UID) {
+ /* Grant read and write permission. Grant search permission for
+ * directories. Grant execute permission (for non-directories) if
+ * and only if one of the 'X' bits is set.
+ */
+ if ( (bits & I_TYPE) == I_DIRECTORY ||
+ bits & ((X_BIT << 6) | (X_BIT << 3) | X_BIT))
+ perm_bits = R_BIT | W_BIT | X_BIT;
+ else
+ perm_bits = R_BIT | W_BIT;
+ } else {
+ if (uid == vp->v_uid) shift = 6; /* owner */
+ else if (gid == vp->v_gid) shift = 3; /* group */
+ else if (in_group(fp, vp->v_gid) == OK) shift = 3; /* suppl. groups */
+ else shift = 0; /* other */
+ perm_bits = (bits >> shift) & (R_BIT | W_BIT | X_BIT);
+ }
+
+ /* If access desired is not a subset of what is allowed, it is refused. */
+ r = OK;
+ if ((perm_bits | access_desired) != perm_bits) r = EACCES;
+
+ /* Check to see if someone is trying to write on a file system that is
+ * mounted read-only.
+ */
+ if (r == OK)
+ if (access_desired & W_BIT)
+ r = read_only(vp);
+
+ return(r);
+}
+
+/*===========================================================================*
+ * read_only *
+ *===========================================================================*/
+PUBLIC int read_only(vp)
+struct vnode *vp; /* ptr to inode whose file sys is to be cked */
+{
+/* Check to see if the file system on which the inode 'ip' resides is mounted
+ * read only. If so, return EROFS, else return OK.
+ */
+ return((vp->v_vmnt->m_flags & VMNT_READONLY) ? EROFS : OK);
+}
--- /dev/null
+#ifndef __VFS_PROTO_H__
+#define __VFS_PROTO_H__
+
+/* Function prototypes. */
+
+#include "timers.h"
+#include "request.h"
+#include "tll.h"
+#include "threads.h"
+#include <minix/rs.h>
+
+/* Structs used in prototypes must be declared as such first. */
+struct filp;
+struct fproc;
+struct vmnt;
+struct vnode;
+struct lookup;
+struct worker_thread;
+struct job;
+
+typedef struct filp * filp_id_t;
+
+/* comm.c */
+_PROTOTYPE(int fs_sendrec, (endpoint_t fs_e, message *reqm) );
+_PROTOTYPE(void fs_sendmore, (struct vmnt *vmp) );
+_PROTOTYPE(void send_work, (void) );
+
+/* device.c */
+_PROTOTYPE( int dev_open, (dev_t dev, endpoint_t proc_e, int flags) );
+_PROTOTYPE( int dev_reopen, (dev_t dev, int filp_no, int flags) );
+_PROTOTYPE( int dev_close, (dev_t dev, int filp_no) );
+_PROTOTYPE( int dev_io, (int op, dev_t dev, endpoint_t proc_e, void *buf,
+ u64_t pos, size_t bytes, int flags, int suspend_reopen) );
+_PROTOTYPE( int gen_opcl, (int op, dev_t dev, endpoint_t task_nr, int flags));
+_PROTOTYPE( int gen_io, (int task_nr, message *mess_ptr) );
+_PROTOTYPE( int asyn_io, (int task_nr, message *mess_ptr) );
+_PROTOTYPE( int no_dev, (int op, dev_t dev, int proc, int flags) );
+_PROTOTYPE( int no_dev_io, (int, message *) );
+_PROTOTYPE( int tty_opcl, (int op, dev_t dev, endpoint_t proc, int flags));
+_PROTOTYPE( int ctty_opcl, (int op, dev_t dev, endpoint_t proc, int flags));
+_PROTOTYPE( int clone_opcl, (int op, dev_t dev, int proc, int flags) );
+_PROTOTYPE( int ctty_io, (int task_nr, message *mess_ptr) );
+_PROTOTYPE( int do_ioctl, (void) );
+_PROTOTYPE( void pm_setsid, (int proc_e) );
+_PROTOTYPE( void dev_status, (message *) );
+_PROTOTYPE( void dev_up, (int major) );
+_PROTOTYPE( endpoint_t find_suspended_ep, (endpoint_t driver,
+ cp_grant_id_t g) );
+_PROTOTYPE( void reopen_reply, (void) );
+_PROTOTYPE( void open_reply, (void) );
+
+/* dmap.c */
+_PROTOTYPE( int do_mapdriver, (void) );
+_PROTOTYPE( void init_dmap, (void) );
+_PROTOTYPE( int dmap_driver_match, (endpoint_t proc, int major) );
+_PROTOTYPE( void dmap_endpt_up, (int proc_nr) );
+_PROTOTYPE( void dmap_unmap_by_endpt, (int proc_nr) );
+_PROTOTYPE( struct dmap *get_dmap, (endpoint_t proc_e) );
+_PROTOTYPE( int do_mapdriver, (void) );
+_PROTOTYPE( int map_service, (struct rprocpub *rpub) );
+_PROTOTYPE( void dmap_unmap_by_endpt, (int proc_nr) );
+_PROTOTYPE( struct dmap *get_dmap, (endpoint_t proc_e) );
+_PROTOTYPE( int map_driver, (const char *label, int major, endpoint_t proc_nr,
+ int dev_style, int flags) );
+_PROTOTYPE( int map_service, (struct rprocpub *rpub) );
+
+/* exec.c */
+_PROTOTYPE( int pm_exec, (int proc_e, char *path, vir_bytes path_len,
+ char *frame, vir_bytes frame_len, vir_bytes *pc));
+#define check_bsf_lock() do { \
+ assert(mutex_trylock(&bsf_lock) == 0); \
+ unlock_bsf(); \
+ } while(0)
+
+/* filedes.c */
+_PROTOTYPE( void check_filp_locks, (void) );
+_PROTOTYPE( void check_filp_locks_by_me, (void) );
+_PROTOTYPE( void init_filps, (void) );
+_PROTOTYPE( struct filp *find_filp, (struct vnode *vp, mode_t bits) );
+_PROTOTYPE( int get_fd, (int start, mode_t bits, int *k,
+ struct filp **fpt) );
+_PROTOTYPE( struct filp *get_filp, (int fild, tll_access_t locktype) );
+_PROTOTYPE( struct filp *get_filp2, (struct fproc *rfp, int fild,
+ tll_access_t locktype) );
+_PROTOTYPE( void lock_filp, (struct filp *filp, tll_access_t locktype) );
+_PROTOTYPE( void unlock_filp, (struct filp *filp) );
+_PROTOTYPE( void unlock_filps, (struct filp *filp1, struct filp *filp2) );
+_PROTOTYPE( int invalidate, (struct filp *) );
+_PROTOTYPE( int do_verify_fd, (void) );
+_PROTOTYPE( int set_filp, (filp_id_t sfilp) );
+_PROTOTYPE( int do_set_filp, (void) );
+_PROTOTYPE( int copy_filp, (endpoint_t to_ep, filp_id_t cfilp) );
+_PROTOTYPE( int do_copy_filp, (void) );
+_PROTOTYPE( int put_filp, (filp_id_t pfilp) );
+_PROTOTYPE( int do_put_filp, (void) );
+_PROTOTYPE( int cancel_fd, (endpoint_t ep, int fd) );
+_PROTOTYPE( int do_cancel_fd, (void) );
+_PROTOTYPE( void close_filp, (struct filp *fp) );
+
+/* fscall.c */
+_PROTOTYPE( void nested_fs_call, (message *m) );
+
+/* link.c */
+_PROTOTYPE( int do_link, (void) );
+_PROTOTYPE( int do_unlink, (void) );
+_PROTOTYPE( int do_rename, (void) );
+_PROTOTYPE( int do_truncate, (void) );
+_PROTOTYPE( int do_ftruncate, (void) );
+_PROTOTYPE( int truncate_vnode, (struct vnode *vp, off_t newsize) );
+_PROTOTYPE( int rdlink_direct, (char *orig_path, char *link_path,
+ struct fproc *rfp) );
+
+/* lock.c */
+_PROTOTYPE( int lock_op, (struct filp *f, int req) );
+_PROTOTYPE( void lock_revive, (void) );
+
+/* main.c */
+_PROTOTYPE( int main, (void) );
+_PROTOTYPE( void reply, (int whom, int result) );
+_PROTOTYPE( void lock_proc, (struct fproc *rfp, int force_lock) );
+_PROTOTYPE( void unlock_proc, (struct fproc *rfp) );
+_PROTOTYPE( void *do_dummy, (void *arg) );
+
+/* misc.c */
+_PROTOTYPE( int do_dup, (void) );
+_PROTOTYPE( void pm_exit, (int proc) );
+_PROTOTYPE( int do_fcntl, (void) );
+_PROTOTYPE( void pm_fork, (int pproc, int cproc, int cpid) );
+_PROTOTYPE( void pm_setgid, (int proc_e, int egid, int rgid) );
+_PROTOTYPE( void pm_setuid, (int proc_e, int euid, int ruid) );
+_PROTOTYPE( void pm_setgroups, (int proc_e, int ngroups, gid_t *addr) );
+_PROTOTYPE( int do_sync, (void) );
+_PROTOTYPE( int do_fsync, (void) );
+_PROTOTYPE( void pm_reboot, (void) );
+_PROTOTYPE( int do_svrctl, (void) );
+_PROTOTYPE( int do_getsysinfo, (void) );
+_PROTOTYPE( int pm_dumpcore, (int proc_e, struct mem_map *seg_ptr) );
+_PROTOTYPE( void ds_event, (void) );
+
+/* mount.c */
+_PROTOTYPE( int do_fsready, (void) );
+_PROTOTYPE( int do_mount, (void) );
+_PROTOTYPE( int do_umount, (void) );
+_PROTOTYPE( void mount_pfs, (void) );
+_PROTOTYPE( int mount_fs, (dev_t dev, char fullpath[PATH_MAX+1],
+ endpoint_t fs_e, int rdonly,
+ char mount_label[LABEL_MAX]) );
+_PROTOTYPE( int unmount, (dev_t dev, char *label) );
+_PROTOTYPE( void unmount_all, (void) );
+
+/* open.c */
+_PROTOTYPE( int do_close, (void) );
+_PROTOTYPE( int close_fd, (struct fproc *rfp, int fd_nr) );
+_PROTOTYPE( void close_reply, (void) );
+_PROTOTYPE( int do_creat, (void) );
+_PROTOTYPE( int do_lseek, (void) );
+_PROTOTYPE( int do_llseek, (void) );
+_PROTOTYPE( int do_mknod, (void) );
+_PROTOTYPE( int do_mkdir, (void) );
+_PROTOTYPE( int do_open, (void) );
+_PROTOTYPE( int do_slink, (void) );
+_PROTOTYPE( int do_vm_open, (void) );
+_PROTOTYPE( int do_vm_close, (void) );
+
+/* path.c */
+_PROTOTYPE( struct vnode *advance, (struct vnode *dirp, struct lookup *resolve,
+ struct fproc *rfp) );
+_PROTOTYPE( struct vnode *eat_path, (struct lookup *resolve,
+ struct fproc *rfp) );
+_PROTOTYPE( struct vnode *last_dir, (struct lookup *resolve,
+ struct fproc *rfp) );
+_PROTOTYPE( void lookup_init, (struct lookup *resolve, char *path, int flags,
+ struct vmnt **vmp, struct vnode **vp) );
+_PROTOTYPE( int get_name, (struct vnode *dirp, struct vnode *entry,
+ char *_name) );
+_PROTOTYPE( int canonical_path, (char *orig_path, char *canon_path,
+ struct fproc *rfp) );
+_PROTOTYPE( int do_check_perms, (void) );
+
+/* pipe.c */
+_PROTOTYPE( int do_pipe, (void) );
+_PROTOTYPE( int map_vnode, (struct vnode *vp, endpoint_t fs_e) );
+_PROTOTYPE( void unpause, (int proc_nr_e) );
+_PROTOTYPE( int pipe_check, (struct vnode *vp, int rw_flag,
+ int oflags, int bytes, u64_t position, int notouch) );
+_PROTOTYPE( void release, (struct vnode *vp, int call_nr, int count) );
+_PROTOTYPE( void revive, (int proc_nr, int bytes) );
+_PROTOTYPE( void suspend, (int task) );
+_PROTOTYPE( void pipe_suspend, (int rw_flag, int fd_nr, char *buf,
+ size_t size) );
+_PROTOTYPE( void unsuspend_by_endpt, (endpoint_t) );
+_PROTOTYPE( void wait_for, (endpoint_t) );
+#if DO_SANITYCHECKS
+_PROTOTYPE( int check_pipe, (void) );
+#endif
+
+/* protect.c */
+_PROTOTYPE( int do_access, (void) );
+_PROTOTYPE( int do_chmod, (void) );
+_PROTOTYPE( int do_chown, (void) );
+_PROTOTYPE( int do_umask, (void) );
+_PROTOTYPE( int forbidden, (struct vnode *vp, mode_t access_desired) );
+_PROTOTYPE( int read_only, (struct vnode *vp) );
+
+/* read.c */
+_PROTOTYPE( int do_read, (void) );
+_PROTOTYPE( int do_getdents, (void) );
+_PROTOTYPE( void lock_bsf, (void) );
+_PROTOTYPE( void unlock_bsf, (void) );
+_PROTOTYPE( int read_write, (int rw_flag) );
+_PROTOTYPE( int rw_pipe, (int rw_flag, endpoint_t usr,
+ int fd_nr, struct filp *f, char *buf, size_t req_size) );
+
+/* request.c */
+_PROTOTYPE( int req_breadwrite, (endpoint_t fs_e, endpoint_t user_e,
+ dev_t dev, u64_t pos, unsigned int num_of_bytes,
+ char *user_addr, int rw_flag,
+ u64_t *new_posp, unsigned int *cum_iop) );
+_PROTOTYPE( int req_chmod, (int fs_e, ino_t inode_nr, mode_t rmode,
+ mode_t *new_modep) );
+_PROTOTYPE( int req_chown, (endpoint_t fs_e, ino_t inode_nr,
+ uid_t newuid, gid_t newgid, mode_t *new_modep) );
+_PROTOTYPE( int req_create, (int fs_e, ino_t inode_nr, int omode,
+ uid_t uid, gid_t gid, char *path, node_details_t *res) );
+_PROTOTYPE( int req_flush, (endpoint_t fs_e, dev_t dev) );
+_PROTOTYPE( int req_fstatfs, (int fs_e, int who_e, char *buf) );
+_PROTOTYPE( int req_statvfs, (int fs_e, int who_e, char *buf) );
+_PROTOTYPE( int req_ftrunc, (endpoint_t fs_e, ino_t inode_nr,
+ off_t start, off_t end) );
+_PROTOTYPE( int req_getdents, (endpoint_t fs_e, ino_t inode_nr,
+ u64_t pos, char *buf, size_t size,
+ u64_t *new_pos, int direct) );
+_PROTOTYPE( int req_inhibread, (endpoint_t fs_e, ino_t inode_nr) );
+_PROTOTYPE( int req_link, (endpoint_t fs_e, ino_t link_parent,
+ char *lastc, ino_t linked_file) );
+_PROTOTYPE( int req_lookup, (endpoint_t fs_e, ino_t dir_ino, ino_t root_ino,
+ uid_t uid, gid_t gid, struct lookup *resolve,
+ lookup_res_t *res, struct fproc *rfp) );
+_PROTOTYPE( int req_mkdir, (endpoint_t fs_e, ino_t inode_nr,
+ char *lastc, uid_t uid, gid_t gid, mode_t dmode) );
+_PROTOTYPE( int req_mknod, (endpoint_t fs_e, ino_t inode_nr,
+ char *lastc, uid_t uid, gid_t gid,
+ mode_t dmode, dev_t dev) );
+_PROTOTYPE( int req_mountpoint, (endpoint_t fs_e, ino_t inode_nr) );
+_PROTOTYPE( int req_newnode, (endpoint_t fs_e, uid_t uid,
+ gid_t gid, mode_t dmode,
+ dev_t dev, struct node_details *res) );
+_PROTOTYPE( int req_putnode, (int fs_e, ino_t inode_nr, int count) );
+_PROTOTYPE( int req_rdlink, (endpoint_t fs_e, ino_t inode_nr,
+ endpoint_t who_e, char *buf, size_t len,
+ int direct) );
+_PROTOTYPE( int req_readsuper, (endpoint_t fs_e, char *driver_name,
+ dev_t dev, int readonly, int isroot,
+ struct node_details *res_nodep) );
+_PROTOTYPE( int req_readwrite, (endpoint_t fs_e, ino_t inode_nr,
+ u64_t pos, int rw_flag,
+ endpoint_t user_e, char *user_addr,
+ unsigned int num_of_bytes, u64_t *new_posp,
+ unsigned int *cum_iop) );
+_PROTOTYPE( int req_rename, (endpoint_t fs_e, ino_t old_dir,
+ char *old_name, ino_t new_dir, char *new_name) );
+_PROTOTYPE( int req_rmdir, (endpoint_t fs_e, ino_t inode_nr,
+ char *lastc) );
+_PROTOTYPE(int req_slink, (endpoint_t fs_e, ino_t inode_nr, char *lastc,
+ endpoint_t who_e, char *path_addr,
+ unsigned short path_length, uid_t uid, gid_t gid) );
+_PROTOTYPE( int req_stat, (int fs_e, ino_t inode_nr, int who_e,
+ char *buf, int pos, int stat_version) );
+_PROTOTYPE( int req_sync, (endpoint_t fs_e) );
+_PROTOTYPE( int req_unlink, (endpoint_t fs_e, ino_t inode_nr,
+ char *lastc) );
+_PROTOTYPE( int req_unmount, (endpoint_t fs_e) );
+_PROTOTYPE( int req_utime, (endpoint_t fs_e, ino_t inode_nr,
+ time_t actime, time_t modtime) );
+_PROTOTYPE( int req_newdriver, (endpoint_t fs_e, dev_t dev,
+ endpoint_t driver_e) );
+
+/* stadir.c */
+_PROTOTYPE( int do_chdir, (void) );
+_PROTOTYPE( int do_fchdir, (void) );
+_PROTOTYPE( int do_chroot, (void) );
+_PROTOTYPE( int do_fstat, (void) );
+_PROTOTYPE( int do_stat, (void) );
+_PROTOTYPE( int do_fstatfs, (void) );
+_PROTOTYPE( int do_statvfs, (void) );
+_PROTOTYPE( int do_fstatvfs, (void) );
+_PROTOTYPE( int do_rdlink, (void) );
+_PROTOTYPE( int do_lstat, (void) );
+
+/* time.c */
+_PROTOTYPE( int do_utime, (void) );
+
+/* tll.c */
+_PROTOTYPE( void tll_downgrade, (tll_t *tllp) );
+_PROTOTYPE( int tll_haspendinglock, (tll_t *tllp) );
+_PROTOTYPE( void tll_init, (tll_t *tllp) );
+_PROTOTYPE( int tll_islocked, (tll_t *tllp) );
+_PROTOTYPE( int tll_lock, (tll_t *tllp, tll_access_t locktype) );
+_PROTOTYPE( int tll_locked_by_me, (tll_t *tllp) );
+_PROTOTYPE( void tll_lockstat, (tll_t *tllp) );
+_PROTOTYPE( int tll_unlock, (tll_t *tllp) );
+_PROTOTYPE( void tll_upgrade, (tll_t *tllp) );
+
+/* utility.c */
+_PROTOTYPE( time_t clock_time, (void) );
+_PROTOTYPE( unsigned conv2, (int norm, int w) );
+_PROTOTYPE( long conv4, (int norm, long x) );
+_PROTOTYPE( int fetch_name, (char *path, int len, int flag, char *dest) );
+_PROTOTYPE( int no_sys, (void) );
+_PROTOTYPE( int isokendpt_f, (char *f, int l, endpoint_t e, int *p, int ft));
+_PROTOTYPE( int in_group, (struct fproc *rfp, gid_t grp) );
+
+#define okendpt(e, p) isokendpt_f(__FILE__, __LINE__, (e), (p), 1)
+#define isokendpt(e, p) isokendpt_f(__FILE__, __LINE__, (e), (p), 0)
+
+/* vmnt.c */
+_PROTOTYPE( void check_vmnt_locks, (void) );
+_PROTOTYPE( void check_vmnt_locks_by_me, (struct fproc *rfp) );
+_PROTOTYPE( struct vmnt *get_free_vmnt, (void) );
+_PROTOTYPE( struct vmnt *find_vmnt, (endpoint_t fs_e) );
+_PROTOTYPE( struct vmnt *get_locked_vmnt, (struct fproc *rfp) );
+_PROTOTYPE( void init_vmnts, (void) );
+_PROTOTYPE( int lock_vmnt, (struct vmnt *vp, tll_access_t locktype) );
+_PROTOTYPE( void unlock_vmnt, (struct vmnt *vp) );
+
+/* vnode.c */
+_PROTOTYPE( void check_vnode_locks, (void) );
+_PROTOTYPE( void check_vnode_locks_by_me, (struct fproc *rfp) );
+_PROTOTYPE( struct vnode *get_free_vnode, (void) );
+_PROTOTYPE( struct vnode *find_vnode, (int fs_e, int numb) );
+_PROTOTYPE( void init_vnodes, (void) );
+_PROTOTYPE( int is_vnode_locked, (struct vnode *vp) );
+_PROTOTYPE( int lock_vnode, (struct vnode *vp, tll_access_t locktype) );
+_PROTOTYPE( void unlock_vnode, (struct vnode *vp) );
+_PROTOTYPE( void dup_vnode, (struct vnode *vp) );
+_PROTOTYPE( void put_vnode, (struct vnode *vp) );
+_PROTOTYPE( void vnode_clean_refs, (struct vnode *vp) );
+#if DO_SANITYCHECKS
+_PROTOTYPE( int check_vrefs, (void) );
+#endif
+
+/* write.c */
+_PROTOTYPE( int do_write, (void) );
+
+/* gcov.c */
+_PROTOTYPE( int do_gcov_flush, (void) );
+#if ! USE_COVERAGE
+#define do_gcov_flush no_sys
+#endif
+
+/* select.c */
+_PROTOTYPE( int do_select, (void) );
+_PROTOTYPE( void init_select, (void) );
+_PROTOTYPE( void select_callback, (struct filp *, int ops) );
+_PROTOTYPE( void select_forget, (endpoint_t proc_e) );
+_PROTOTYPE( void select_reply1, (endpoint_t driver_e, int minor, int status));
+_PROTOTYPE( void select_reply2, (endpoint_t driver_e, int minor, int status));
+_PROTOTYPE( void select_timeout_check, (timer_t *) );
+_PROTOTYPE( void select_unsuspend_by_endpt, (endpoint_t proc) );
+
+/* worker.c */
+_PROTOTYPE( int worker_available, (void) );
+_PROTOTYPE( struct worker_thread *worker_get, (thread_t worker_tid) );
+_PROTOTYPE( struct job *worker_getjob, (thread_t worker_tid) );
+_PROTOTYPE( void worker_init, (struct worker_thread *worker) );
+_PROTOTYPE( struct worker_thread *worker_self, (void) );
+_PROTOTYPE( void worker_start, (void *(*func)(void *arg)) );
+_PROTOTYPE( void worker_signal, (struct worker_thread *worker) );
+_PROTOTYPE( void worker_wait, (void) );
+_PROTOTYPE( void sys_worker_start, (void *(*func)(void *arg)) );
+_PROTOTYPE( void dl_worker_start, (void *(*func)(void *arg)) );
+#endif
--- /dev/null
+/* This file contains the heart of the mechanism used to read (and write)
+ * files. Read and write requests are split up into chunks that do not cross
+ * block boundaries. Each chunk is then processed in turn. Reads on special
+ * files are also detected and handled.
+ *
+ * The entry points into this file are
+ * do_read: perform the READ system call by calling read_write
+ * do_getdents: read entries from a directory (GETDENTS)
+ * read_write: actually do the work of READ and WRITE
+ *
+ */
+
+#include "fs.h"
+#include <fcntl.h>
+#include <unistd.h>
+#include <minix/com.h>
+#include <minix/u64.h>
+#include "file.h"
+#include "fproc.h"
+#include "param.h"
+#include <dirent.h>
+#include <assert.h>
+#include <minix/vfsif.h>
+#include "vnode.h"
+#include "vmnt.h"
+
+
+/*===========================================================================*
+ * do_read *
+ *===========================================================================*/
+PUBLIC int do_read()
+{
+ return(read_write(READING));
+}
+
+
+/*===========================================================================*
+ * lock_bsf *
+ *===========================================================================*/
+PUBLIC void lock_bsf(void)
+{
+ message org_m_in;
+ struct fproc *org_fp;
+ struct worker_thread *org_self;
+
+ if (mutex_trylock(&bsf_lock) == 0)
+ return;
+
+ org_m_in = m_in;
+ org_fp = fp;
+ org_self = self;
+ assert(mutex_lock(&bsf_lock) == 0);
+ m_in = org_m_in;
+ fp = org_fp;
+ self = org_self;
+}
+
+/*===========================================================================*
+ * unlock_bsf *
+ *===========================================================================*/
+PUBLIC void unlock_bsf(void)
+{
+ assert(mutex_unlock(&bsf_lock) == 0);
+}
+
+/*===========================================================================*
+ * read_write *
+ *===========================================================================*/
+PUBLIC int read_write(rw_flag)
+int rw_flag; /* READING or WRITING */
+{
+/* Perform read(fd, buffer, nbytes) or write(fd, buffer, nbytes) call. */
+ register struct filp *f;
+ register struct vnode *vp;
+ u64_t position, res_pos, new_pos;
+ unsigned int cum_io, cum_io_incr, res_cum_io;
+ int op, oflags, r, block_spec, char_spec, regular;
+ tll_access_t locktype;
+ mode_t mode_word;
+
+ /* If the file descriptor is valid, get the vnode, size and mode. */
+ if (m_in.nbytes < 0) return(EINVAL);
+ locktype = (rw_flag == READING) ? VNODE_READ : VNODE_WRITE;
+ if ((f = get_filp(m_in.fd, locktype)) == NULL) return(err_code);
+ if (((f->filp_mode) & (rw_flag == READING ? R_BIT : W_BIT)) == 0) {
+ unlock_filp(f);
+ return(f->filp_mode == FILP_CLOSED ? EIO : EBADF);
+ }
+ if (m_in.nbytes == 0) {
+ unlock_filp(f);
+ return(0); /* so char special files need not check for 0*/
+ }
+
+ position = f->filp_pos;
+ oflags = f->filp_flags;
+ vp = f->filp_vno;
+ r = OK;
+ cum_io = 0;
+
+ if (vp->v_pipe == I_PIPE) {
+ if (fp->fp_cum_io_partial != 0) {
+ panic("VFS: read_write: fp_cum_io_partial not clear");
+ }
+ r = rw_pipe(rw_flag, who_e, m_in.fd, f, m_in.buffer, m_in.nbytes);
+ unlock_filp(f);
+ return(r);
+ }
+
+ op = (rw_flag == READING ? VFS_DEV_READ : VFS_DEV_WRITE);
+ mode_word = vp->v_mode & I_TYPE;
+ regular = mode_word == I_REGULAR;
+
+ if ((char_spec = (mode_word == I_CHAR_SPECIAL ? 1 : 0))) {
+ if (vp->v_sdev == NO_DEV)
+ panic("VFS: read_write tries to access char dev NO_DEV");
+ }
+
+ if ((block_spec = (mode_word == I_BLOCK_SPECIAL ? 1 : 0))) {
+ if (vp->v_sdev == NO_DEV)
+ panic("VFS: read_write tries to access block dev NO_DEV");
+ }
+
+ if (char_spec) { /* Character special files. */
+ dev_t dev;
+ int suspend_reopen;
+
+ suspend_reopen = (f->filp_state != FS_NORMAL);
+ dev = (dev_t) vp->v_sdev;
+
+ r = dev_io(op, dev, who_e, m_in.buffer, position, m_in.nbytes, oflags,
+ suspend_reopen);
+ if (r >= 0) {
+ cum_io = r;
+ position = add64ul(position, r);
+ r = OK;
+ }
+ } else if (block_spec) { /* Block special files. */
+ lock_bsf();
+
+ r = req_breadwrite(vp->v_bfs_e, who_e, vp->v_sdev, position,
+ m_in.nbytes, m_in.buffer, rw_flag, &res_pos, &res_cum_io);
+ if (r == OK) {
+ position = res_pos;
+ cum_io += res_cum_io;
+ }
+
+ unlock_bsf();
+ } else { /* Regular files */
+ if (rw_flag == WRITING && block_spec == 0) {
+ /* Check for O_APPEND flag. */
+ if (oflags & O_APPEND) position = cvul64(vp->v_size);
+ }
+
+ /* Issue request */
+ r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, position, rw_flag, who_e,
+ m_in.buffer, m_in.nbytes, &new_pos, &cum_io_incr);
+
+ if (r >= 0) {
+ if (ex64hi(new_pos))
+ panic("read_write: bad new pos");
+
+ position = new_pos;
+ cum_io += cum_io_incr;
+ }
+ }
+
+ /* On write, update file size and access time. */
+ if (rw_flag == WRITING) {
+ if (regular || mode_word == I_DIRECTORY) {
+ if (cmp64ul(position, vp->v_size) > 0) {
+ if (ex64hi(position) != 0) {
+ panic("read_write: file size too big ");
+ }
+ vp->v_size = ex64lo(position);
+ }
+ }
+ }
+
+ f->filp_pos = position;
+ unlock_filp(f);
+
+ if (r == OK) return(cum_io);
+ return(r);
+}
+
+
+/*===========================================================================*
+ * do_getdents *
+ *===========================================================================*/
+PUBLIC int do_getdents()
+{
+/* Perform the getdents(fd, buf, size) system call. */
+ int r = OK;
+ u64_t new_pos;
+ register struct filp *rfilp;
+
+ /* Is the file descriptor valid? */
+ if ( (rfilp = get_filp(m_in.fd, VNODE_READ)) == NULL) return(err_code);
+
+ if (!(rfilp->filp_mode & R_BIT))
+ r = EBADF;
+ else if ((rfilp->filp_vno->v_mode & I_TYPE) != I_DIRECTORY)
+ r = EBADF;
+
+ if (r == OK) {
+ if (ex64hi(rfilp->filp_pos) != 0)
+ panic("do_getdents: can't handle large offsets");
+
+ r = req_getdents(rfilp->filp_vno->v_fs_e, rfilp->filp_vno->v_inode_nr,
+ rfilp->filp_pos, m_in.buffer, m_in.nbytes,&new_pos,0);
+
+ if (r > 0) rfilp->filp_pos = new_pos;
+ }
+
+ unlock_filp(rfilp);
+ return(r);
+}
+
+
+/*===========================================================================*
+ * rw_pipe *
+ *===========================================================================*/
+PUBLIC int rw_pipe(rw_flag, usr_e, fd_nr, f, buf, req_size)
+int rw_flag; /* READING or WRITING */
+endpoint_t usr_e;
+int fd_nr;
+struct filp *f;
+char *buf;
+size_t req_size;
+{
+ int r, oflags, partial_pipe = 0;
+ size_t size, cum_io, cum_io_incr;
+ struct vnode *vp;
+ u64_t position, new_pos;
+
+ /* Must make sure we're operating on locked filp and vnode */
+ assert(tll_islocked(&f->filp_vno->v_lock));
+ assert(mutex_trylock(&f->filp_lock) == -EDEADLK);
+
+ oflags = f->filp_flags;
+ vp = f->filp_vno;
+ position = cvu64((rw_flag == READING) ? vp->v_pipe_rd_pos :
+ vp->v_pipe_wr_pos);
+ /* fp->fp_cum_io_partial is only nonzero when doing partial writes */
+ cum_io = fp->fp_cum_io_partial;
+
+ r = pipe_check(vp, rw_flag, oflags, req_size, position, 0);
+ if (r <= 0) {
+ if (r == SUSPEND) pipe_suspend(rw_flag, fd_nr, buf, req_size);
+ return(r);
+ }
+
+ size = r;
+ if (size < req_size) partial_pipe = 1;
+
+ /* Truncate read request at size. */
+ if((rw_flag == READING) &&
+ cmp64ul(add64ul(position, size), vp->v_size) > 0) {
+ /* Position always should fit in an off_t (LONG_MAX). */
+ off_t pos32;
+
+ assert(cmp64ul(position, LONG_MAX) <= 0);
+ pos32 = cv64ul(position);
+ assert(pos32 >= 0);
+ assert(pos32 <= LONG_MAX);
+ size = vp->v_size - pos32;
+ }
+
+ if (vp->v_mapfs_e == 0)
+ panic("unmapped pipe");
+
+ r = req_readwrite(vp->v_mapfs_e, vp->v_mapinode_nr, position, rw_flag, usr_e,
+ buf, size, &new_pos, &cum_io_incr);
+
+ if (r >= 0) {
+ if (ex64hi(new_pos))
+ panic("rw_pipe: bad new pos");
+
+ position = new_pos;
+ cum_io += cum_io_incr;
+ buf += cum_io_incr;
+ req_size -= cum_io_incr;
+ }
+
+ /* On write, update file size and access time. */
+ if (rw_flag == WRITING) {
+ if (cmp64ul(position, vp->v_size) > 0) {
+ if (ex64hi(position) != 0) {
+ panic("read_write: file size too big for v_size");
+ }
+ vp->v_size = ex64lo(position);
+ }
+ } else {
+ if (cmp64ul(position, vp->v_size) >= 0) {
+ /* Reset pipe pointers */
+ vp->v_size = 0;
+ vp->v_pipe_rd_pos= 0;
+ vp->v_pipe_wr_pos= 0;
+ position = cvu64(0);
+ }
+ }
+
+ if (rw_flag == READING)
+ vp->v_pipe_rd_pos= cv64ul(position);
+ else
+ vp->v_pipe_wr_pos= cv64ul(position);
+
+ if (r == OK) {
+ if (partial_pipe) {
+ /* partial write on pipe with */
+ /* O_NONBLOCK, return write count */
+ if (!(oflags & O_NONBLOCK)) {
+ /* partial write on pipe with req_size > PIPE_SIZE,
+ * non-atomic
+ */
+ fp->fp_cum_io_partial = cum_io;
+ pipe_suspend(rw_flag, fd_nr, buf, req_size);
+ return(SUSPEND);
+ }
+ }
+ fp->fp_cum_io_partial = 0;
+ return(cum_io);
+ }
+
+ return(r);
+}
--- /dev/null
+/* This file contains the wrapper functions for issueing a request
+ * and receiving response from FS processes.
+ * Each function builds a request message according to the request
+ * parameter, calls the most low-level fs_sendrec and copies
+ * back the response.
+ * The low-level fs_sendrec handles the recovery mechanism from
+ * a dead driver and reissues the request.
+ */
+
+#include "fs.h"
+#include <string.h>
+#include <assert.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/statvfs.h>
+#include <minix/vfsif.h>
+#include <minix/com.h>
+#include <minix/const.h>
+#include <minix/endpoint.h>
+#include <minix/u64.h>
+#include <unistd.h>
+#include <minix/vfsif.h>
+#include "fproc.h"
+#include "vmnt.h"
+#include "vnode.h"
+#include "path.h"
+#include "param.h"
+
+
+/*===========================================================================*
+ * req_breadwrite *
+ *===========================================================================*/
+PUBLIC int req_breadwrite(
+ endpoint_t fs_e,
+ endpoint_t user_e,
+ dev_t dev,
+ u64_t pos,
+ unsigned int num_of_bytes,
+ char *user_addr,
+ int rw_flag,
+ u64_t *new_posp,
+ unsigned int *cum_iop
+)
+{
+ int r;
+ cp_grant_id_t grant_id;
+ message m;
+
+ grant_id = cpf_grant_magic(fs_e, user_e, (vir_bytes) user_addr, num_of_bytes,
+ (rw_flag == READING ? CPF_WRITE : CPF_READ));
+ if(grant_id == -1)
+ panic("req_breadwrite: cpf_grant_magic failed");
+
+ /* Fill in request message */
+ m.m_type = rw_flag == READING ? REQ_BREAD : REQ_BWRITE;
+ m.REQ_DEV2 = dev;
+ m.REQ_GRANT = grant_id;
+ m.REQ_SEEK_POS_LO = ex64lo(pos);
+ m.REQ_SEEK_POS_HI = ex64hi(pos);
+ m.REQ_NBYTES = num_of_bytes;
+
+ /* Send/rec request */
+ r = fs_sendrec(fs_e, &m);
+ cpf_revoke(grant_id);
+ if (r != OK) return(r);
+
+ /* Fill in response structure */
+ *new_posp = make64(m.RES_SEEK_POS_LO, m.RES_SEEK_POS_HI);
+ *cum_iop = m.RES_NBYTES;
+
+ return(OK);
+}
+
+
+/*===========================================================================*
+ * req_chmod *
+ *===========================================================================*/
+PUBLIC int req_chmod(
+ int fs_e,
+ ino_t inode_nr,
+ mode_t rmode,
+ mode_t *new_modep
+)
+{
+ message m;
+ int r;
+
+ /* Fill in request message */
+ m.m_type = REQ_CHMOD;
+ m.REQ_INODE_NR = inode_nr;
+ m.REQ_MODE = rmode;
+
+ /* Send/rec request */
+ r = fs_sendrec(fs_e, &m);
+
+ /* Copy back actual mode. */
+ *new_modep = m.RES_MODE;
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * req_chown *
+ *===========================================================================*/
+PUBLIC int req_chown(
+ endpoint_t fs_e,
+ ino_t inode_nr,
+ uid_t newuid,
+ gid_t newgid,
+ mode_t *new_modep
+)
+{
+ message m;
+ int r;
+
+ /* Fill in request message */
+ m.m_type = REQ_CHOWN;
+ m.REQ_INODE_NR = inode_nr;
+ m.REQ_UID = newuid;
+ m.REQ_GID = newgid;
+
+ /* Send/rec request */
+ r = fs_sendrec(fs_e, &m);
+
+ /* Return new mode to caller. */
+ *new_modep = m.RES_MODE;
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * req_create *
+ *===========================================================================*/
+PUBLIC int req_create(
+ int fs_e,
+ ino_t inode_nr,
+ int omode,
+ uid_t uid,
+ gid_t gid,
+ char *path,
+ node_details_t *res
+)
+{
+ int r;
+ cp_grant_id_t grant_id;
+ size_t len;
+ message m;
+
+ if (path[0] == '/')
+ panic("req_create: filename starts with '/'");
+
+ len = strlen(path) + 1;
+ grant_id = cpf_grant_direct(fs_e, (vir_bytes) path, len, CPF_READ);
+ if (grant_id == -1)
+ panic("req_create: cpf_grant_direct failed");
+
+ /* Fill in request message */
+ m.m_type = REQ_CREATE;
+ m.REQ_INODE_NR = inode_nr;
+ m.REQ_MODE = omode;
+ m.REQ_UID = uid;
+ m.REQ_GID = gid;
+ m.REQ_GRANT = grant_id;
+ m.REQ_PATH_LEN = len;
+
+ /* Send/rec request */
+ r = fs_sendrec(fs_e, &m);
+ cpf_revoke(grant_id);
+ if (r != OK) return(r);
+
+ /* Fill in response structure */
+ res->fs_e = m.m_source;
+ res->inode_nr = m.RES_INODE_NR;
+ res->fmode = m.RES_MODE;
+ res->fsize = m.RES_FILE_SIZE_LO;
+ res->uid = m.RES_UID;
+ res->gid = m.RES_GID;
+ res->dev = m.RES_DEV;
+
+ return(OK);
+}
+
+
+/*===========================================================================*
+ * req_flush *
+ *===========================================================================*/
+PUBLIC int req_flush(endpoint_t fs_e, dev_t dev)
+{
+ message m;
+
+ /* Fill in request message */
+ m.m_type = REQ_FLUSH;
+ m.REQ_DEV = dev;
+
+ /* Send/rec request */
+ return fs_sendrec(fs_e, &m);
+}
+
+
+/*===========================================================================*
+ * req_fstatfs *
+ *===========================================================================*/
+PUBLIC int req_fstatfs(int fs_e, int proc_e, char *buf)
+{
+ int r;
+ cp_grant_id_t grant_id;
+ message m;
+
+ grant_id = cpf_grant_magic(fs_e, proc_e, (vir_bytes) buf, sizeof(struct statfs),
+ CPF_WRITE);
+ if(grant_id == -1)
+ panic("req_fstatfs: cpf_grant_magic failed");
+
+ /* Fill in request message */
+ m.m_type = REQ_FSTATFS;
+ m.REQ_GRANT = grant_id;
+
+ /* Send/rec request */
+ r = fs_sendrec(fs_e, &m);
+ cpf_revoke(grant_id);
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * req_statvfs *
+ *===========================================================================*/
+PUBLIC int req_statvfs(int fs_e, int proc_e, char *buf)
+{
+ int r;
+ cp_grant_id_t grant_id;
+ message m;
+
+ grant_id = cpf_grant_magic(fs_e, proc_e, (vir_bytes) buf, sizeof(struct statvfs),
+ CPF_WRITE);
+ if(grant_id == -1)
+ panic("req_statvfs: cpf_grant_magic failed");
+
+ /* Fill in request message */
+ m.m_type = REQ_STATVFS;
+ m.REQ_GRANT = grant_id;
+
+ /* Send/rec request */
+ r = fs_sendrec(fs_e, &m);
+ cpf_revoke(grant_id);
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * req_ftrunc *
+ *===========================================================================*/
+PUBLIC int req_ftrunc(endpoint_t fs_e, ino_t inode_nr, off_t start, off_t end)
+{
+ message m;
+
+ /* Fill in request message */
+ m.m_type = REQ_FTRUNC;
+ m.REQ_INODE_NR = inode_nr;
+ m.REQ_TRC_START_LO = start;
+ m.REQ_TRC_START_HI = 0; /* Not used for now, so clear it. */
+ m.REQ_TRC_END_LO = end;
+ m.REQ_TRC_END_HI = 0; /* Not used for now, so clear it. */
+
+ /* Send/rec request */
+ return fs_sendrec(fs_e, &m);
+}
+
+
+/*===========================================================================*
+ * req_getdents *
+ *===========================================================================*/
+PUBLIC int req_getdents(
+ endpoint_t fs_e,
+ ino_t inode_nr,
+ u64_t pos,
+ char *buf,
+ size_t size,
+ u64_t *new_pos,
+ int direct
+)
+{
+ int r;
+ message m;
+ cp_grant_id_t grant_id;
+
+ if (direct) {
+ grant_id = cpf_grant_direct(fs_e, (vir_bytes) buf, size,
+ CPF_WRITE);
+ } else {
+ grant_id = cpf_grant_magic(fs_e, who_e, (vir_bytes) buf, size,
+ CPF_WRITE);
+ }
+
+ if (grant_id < 0)
+ panic("req_getdents: cpf_grant_direct/cpf_grant_magic failed: %d",
+ grant_id);
+
+ m.m_type = REQ_GETDENTS;
+ m.REQ_INODE_NR = inode_nr;
+ m.REQ_GRANT = grant_id;
+ m.REQ_MEM_SIZE = size;
+ m.REQ_SEEK_POS_LO = ex64lo(pos);
+ m.REQ_SEEK_POS_HI = 0; /* Not used for now, so clear it. */
+
+ r = fs_sendrec(fs_e, &m);
+ cpf_revoke(grant_id);
+
+ if (r == OK) {
+ *new_pos = cvul64(m.RES_SEEK_POS_LO);
+ r = m.RES_NBYTES;
+ }
+
+ return(r);
+}
+
+/*===========================================================================*
+ * req_inhibread *
+ *===========================================================================*/
+PUBLIC int req_inhibread(endpoint_t fs_e, ino_t inode_nr)
+{
+ message m;
+
+ /* Fill in request message */
+ m.m_type = REQ_INHIBREAD;
+ m.REQ_INODE_NR = inode_nr;
+
+ /* Send/rec request */
+ return fs_sendrec(fs_e, &m);
+}
+
+
+/*===========================================================================*
+ * req_link *
+ *===========================================================================*/
+PUBLIC int req_link(
+ endpoint_t fs_e,
+ ino_t link_parent,
+ char *lastc,
+ ino_t linked_file
+)
+{
+ int r;
+ cp_grant_id_t grant_id;
+ const size_t len = strlen(lastc) + 1;
+ message m;
+
+ grant_id = cpf_grant_direct(fs_e, (vir_bytes)lastc, len, CPF_READ);
+ if(grant_id == -1)
+ panic("req_link: cpf_grant_direct failed");
+
+ /* Fill in request message */
+ m.m_type = REQ_LINK;
+ m.REQ_INODE_NR = linked_file;
+ m.REQ_DIR_INO = link_parent;
+ m.REQ_GRANT = grant_id;
+ m.REQ_PATH_LEN = len;
+
+ /* Send/rec request */
+ r = fs_sendrec(fs_e, &m);
+ cpf_revoke(grant_id);
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * req_lookup *
+ *===========================================================================*/
+PUBLIC int req_lookup(
+ endpoint_t fs_e,
+ ino_t dir_ino,
+ ino_t root_ino,
+ uid_t uid,
+ gid_t gid,
+ struct lookup *resolve,
+ lookup_res_t *res,
+ struct fproc *rfp
+)
+{
+ int r;
+ size_t len;
+ cp_grant_id_t grant_id=0, grant_id2=0;
+ message m;
+ vfs_ucred_t credentials;
+ int flags;
+
+ grant_id = cpf_grant_direct(fs_e, (vir_bytes) resolve->l_path, PATH_MAX+1,
+ CPF_READ | CPF_WRITE);
+ if(grant_id == -1)
+ panic("req_lookup: cpf_grant_direct failed");
+
+ flags = resolve->l_flags;
+ len = strlen(resolve->l_path) + 1;
+
+ m.m_type = REQ_LOOKUP;
+ m.REQ_GRANT = grant_id;
+ m.REQ_PATH_LEN = len;
+ m.REQ_PATH_SIZE = PATH_MAX + 1;
+ m.REQ_DIR_INO = dir_ino;
+ m.REQ_ROOT_INO = root_ino;
+
+ if(rfp->fp_ngroups > 0) { /* Is the process member of multiple groups? */
+ /* In that case the FS has to copy the uid/gid credentials */
+ int i;
+
+ /* Set credentials */
+ credentials.vu_uid = rfp->fp_effuid;
+ credentials.vu_gid = rfp->fp_effgid;
+ credentials.vu_ngroups = rfp->fp_ngroups;
+ for (i = 0; i < rfp->fp_ngroups; i++)
+ credentials.vu_sgroups[i] = rfp->fp_sgroups[i];
+
+ grant_id2 = cpf_grant_direct(fs_e, (vir_bytes) &credentials,
+ sizeof(credentials), CPF_READ);
+ if(grant_id2 == -1)
+ panic("req_lookup: cpf_grant_direct failed");
+
+ m.REQ_GRANT2 = grant_id2;
+ m.REQ_UCRED_SIZE= sizeof(credentials);
+ flags |= PATH_GET_UCRED;
+ } else {
+ /* When there's only one gid, we can send it directly */
+ m.REQ_UID = uid;
+ m.REQ_GID = gid;
+ flags &= ~PATH_GET_UCRED;
+ }
+
+ m.REQ_FLAGS = flags;
+
+ /* Send/rec request */
+ r = fs_sendrec(fs_e, &m);
+ cpf_revoke(grant_id);
+ if(rfp->fp_ngroups > 0) cpf_revoke(grant_id2);
+
+ /* Fill in response according to the return value */
+ res->fs_e = m.m_source;
+
+ switch (r) {
+ case OK:
+ res->inode_nr = m.RES_INODE_NR;
+ res->fmode = m.RES_MODE;
+ res->fsize = m.RES_FILE_SIZE_LO;
+ res->dev = m.RES_DEV;
+ res->uid= m.RES_UID;
+ res->gid= m.RES_GID;
+ break;
+ case EENTERMOUNT:
+ res->inode_nr = m.RES_INODE_NR;
+ res->char_processed = m.RES_OFFSET;
+ res->symloop = m.RES_SYMLOOP;
+ break;
+ case ELEAVEMOUNT:
+ res->char_processed = m.RES_OFFSET;
+ res->symloop = m.RES_SYMLOOP;
+ break;
+ case ESYMLINK:
+ res->char_processed = m.RES_OFFSET;
+ res->symloop = m.RES_SYMLOOP;
+ break;
+ default:
+ break;
+ }
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * req_mkdir *
+ *===========================================================================*/
+PUBLIC int req_mkdir(
+ endpoint_t fs_e,
+ ino_t inode_nr,
+ char *lastc,
+ uid_t uid,
+ gid_t gid,
+ mode_t dmode
+)
+{
+ int r;
+ cp_grant_id_t grant_id;
+ size_t len;
+ message m;
+
+ len = strlen(lastc) + 1;
+ grant_id = cpf_grant_direct(fs_e, (vir_bytes)lastc, len, CPF_READ);
+ if(grant_id == -1)
+ panic("req_mkdir: cpf_grant_direct failed");
+
+ /* Fill in request message */
+ m.m_type = REQ_MKDIR;
+ m.REQ_INODE_NR = inode_nr;
+ m.REQ_MODE = dmode;
+ m.REQ_UID = uid;
+ m.REQ_GID = gid;
+ m.REQ_GRANT = grant_id;
+ m.REQ_PATH_LEN = len;
+
+ /* Send/rec request */
+ r = fs_sendrec(fs_e, &m);
+ cpf_revoke(grant_id);
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * req_mknod *
+ *===========================================================================*/
+PUBLIC int req_mknod(
+ endpoint_t fs_e,
+ ino_t inode_nr,
+ char *lastc,
+ uid_t uid,
+ gid_t gid,
+ mode_t dmode,
+ dev_t dev
+)
+{
+ int r;
+ size_t len;
+ cp_grant_id_t grant_id;
+ message m;
+
+ len = strlen(lastc) + 1;
+ grant_id = cpf_grant_direct(fs_e, (vir_bytes)lastc, len, CPF_READ);
+ if(grant_id == -1)
+ panic("req_mknod: cpf_grant_direct failed");
+
+ /* Fill in request message */
+ m.m_type = REQ_MKNOD;
+ m.REQ_INODE_NR = inode_nr;
+ m.REQ_MODE = dmode;
+ m.REQ_DEV = dev;
+ m.REQ_UID = uid;
+ m.REQ_GID = gid;
+ m.REQ_GRANT = grant_id;
+ m.REQ_PATH_LEN = len;
+
+ /* Send/rec request */
+ r = fs_sendrec(fs_e, &m);
+ cpf_revoke(grant_id);
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * req_mountpoint *
+ *===========================================================================*/
+PUBLIC int req_mountpoint(endpoint_t fs_e, ino_t inode_nr)
+{
+ message m;
+
+ /* Fill in request message */
+ m.m_type = REQ_MOUNTPOINT;
+ m.REQ_INODE_NR = inode_nr;
+
+ /* Send/rec request */
+ return fs_sendrec(fs_e, &m);
+}
+
+
+/*===========================================================================*
+ * req_newnode *
+ *===========================================================================*/
+PUBLIC int req_newnode(
+ endpoint_t fs_e,
+ uid_t uid,
+ gid_t gid,
+ mode_t dmode,
+ dev_t dev,
+ struct node_details *res
+)
+{
+ int r;
+ message m;
+
+ /* Fill in request message */
+ m.m_type = REQ_NEWNODE;
+ m.REQ_MODE = dmode;
+ m.REQ_DEV = dev;
+ m.REQ_UID = uid;
+ m.REQ_GID = gid;
+
+ /* Send/rec request */
+ r = fs_sendrec(fs_e, &m);
+
+ res->fs_e = m.m_source;
+ res->inode_nr = m.RES_INODE_NR;
+ res->fmode = m.RES_MODE;
+ res->fsize = m.RES_FILE_SIZE_LO;
+ res->dev = m.RES_DEV;
+ res->uid = m.RES_UID;
+ res->gid = m.RES_GID;
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * req_newdriver *
+ *===========================================================================*/
+PUBLIC int req_newdriver(
+ endpoint_t fs_e,
+ dev_t dev,
+ endpoint_t driver_e
+)
+{
+/* Note: this is the only request function that doesn't use the
+ * fs_sendrec internal routine, since we want to avoid the dead
+ * driver recovery mechanism here. This function is actually called
+ * during the recovery.
+ */
+ message m;
+ int r;
+
+ /* Fill in request message */
+ m.m_type = REQ_NEW_DRIVER;
+ m.REQ_DEV = dev;
+ m.REQ_DRIVER_E = driver_e;
+
+ /* Issue request */
+ if((r = sendrec(fs_e, &m)) != OK) {
+ printf("%s:%d VFS req_newdriver: error sending message %d to %d\n",
+ __FILE__, __LINE__, r, fs_e);
+ util_stacktrace();
+ return(r);
+ }
+
+ return(OK);
+}
+
+
+
+/*===========================================================================*
+ * req_putnode *
+ *===========================================================================*/
+PUBLIC int req_putnode(fs_e, inode_nr, count)
+int fs_e;
+ino_t inode_nr;
+int count;
+{
+ message m;
+
+ /* Fill in request message */
+ m.m_type = REQ_PUTNODE;
+ m.REQ_INODE_NR = inode_nr;
+ m.REQ_COUNT = count;
+
+ /* Send/rec request */
+ return fs_sendrec(fs_e, &m);
+}
+
+
+/*===========================================================================*
+ * req_rdlink *
+ *===========================================================================*/
+PUBLIC int req_rdlink(fs_e, inode_nr, proc_e, buf, len, direct)
+endpoint_t fs_e;
+ino_t inode_nr;
+endpoint_t proc_e;
+char *buf;
+size_t len;
+int direct; /* set to 1 to use direct grants instead of magic grants */
+{
+ message m;
+ int r;
+ cp_grant_id_t grant_id;
+
+ if (direct) {
+ grant_id = cpf_grant_direct(fs_e, (vir_bytes) buf, len, CPF_WRITE);
+ } else {
+ grant_id = cpf_grant_magic(fs_e, proc_e, (vir_bytes) buf, len,
+ CPF_WRITE);
+ }
+ if(grant_id == -1)
+ panic("req_rdlink: cpf_grant_magic failed");
+
+ /* Fill in request message */
+ m.m_type = REQ_RDLINK;
+ m.REQ_INODE_NR = inode_nr;
+ m.REQ_GRANT = grant_id;
+ m.REQ_MEM_SIZE = len;
+
+ /* Send/rec request */
+ r = fs_sendrec(fs_e, &m);
+ cpf_revoke(grant_id);
+
+ if(r == OK) r = m.RES_NBYTES;
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * req_readsuper *
+ *===========================================================================*/
+PUBLIC int req_readsuper(
+ endpoint_t fs_e,
+ char *label,
+ dev_t dev,
+ int readonly,
+ int isroot,
+ struct node_details *res_nodep
+)
+{
+ int r;
+ cp_grant_id_t grant_id;
+ size_t len;
+ message m;
+
+ len = strlen(label)+1;
+ grant_id = cpf_grant_direct(fs_e, (vir_bytes) label, len, CPF_READ);
+ if (grant_id == -1)
+ panic("req_readsuper: cpf_grant_direct failed");
+
+ /* Fill in request message */
+ m.m_type = REQ_READSUPER;
+ m.REQ_FLAGS = 0;
+ if(readonly) m.REQ_FLAGS |= REQ_RDONLY;
+ if(isroot) m.REQ_FLAGS |= REQ_ISROOT;
+ m.REQ_GRANT = grant_id;
+ m.REQ_DEV = dev;
+ m.REQ_PATH_LEN = len;
+
+ /* Send/rec request */
+ r = fs_sendrec(fs_e, &m);
+ cpf_revoke(grant_id);
+
+ if(r == OK) {
+ /* Fill in response structure */
+ res_nodep->fs_e = m.m_source;
+ res_nodep->inode_nr = m.RES_INODE_NR;
+ res_nodep->fmode = m.RES_MODE;
+ res_nodep->fsize = m.RES_FILE_SIZE_LO;
+ res_nodep->uid = m.RES_UID;
+ res_nodep->gid = m.RES_GID;
+ }
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * req_readwrite *
+ *===========================================================================*/
+PUBLIC int req_readwrite(fs_e, inode_nr, pos, rw_flag, user_e,
+ user_addr, num_of_bytes, new_posp, cum_iop)
+endpoint_t fs_e;
+ino_t inode_nr;
+u64_t pos;
+int rw_flag;
+endpoint_t user_e;
+char *user_addr;
+unsigned int num_of_bytes;
+u64_t *new_posp;
+unsigned int *cum_iop;
+{
+ int r;
+ cp_grant_id_t grant_id;
+ message m;
+
+ if (ex64hi(pos) != 0)
+ panic("req_readwrite: pos too large");
+
+ grant_id = cpf_grant_magic(fs_e, user_e, (vir_bytes) user_addr, num_of_bytes,
+ (rw_flag==READING ? CPF_WRITE:CPF_READ));
+ if (grant_id == -1)
+ panic("req_readwrite: cpf_grant_magic failed");
+
+ /* Fill in request message */
+ m.m_type = rw_flag == READING ? REQ_READ : REQ_WRITE;
+ m.REQ_INODE_NR = inode_nr;
+ m.REQ_GRANT = grant_id;
+ m.REQ_SEEK_POS_LO = ex64lo(pos);
+ m.REQ_SEEK_POS_HI = 0; /* Not used for now, so clear it. */
+ m.REQ_NBYTES = num_of_bytes;
+
+ /* Send/rec request */
+ r = fs_sendrec(fs_e, &m);
+ cpf_revoke(grant_id);
+
+ if (r == OK) {
+ /* Fill in response structure */
+ *new_posp = cvul64(m.RES_SEEK_POS_LO);
+ *cum_iop = m.RES_NBYTES;
+ }
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * req_rename *
+ *===========================================================================*/
+PUBLIC int req_rename(fs_e, old_dir, old_name, new_dir, new_name)
+endpoint_t fs_e;
+ino_t old_dir;
+char *old_name;
+ino_t new_dir;
+char *new_name;
+{
+ int r;
+ cp_grant_id_t gid_old, gid_new;
+ size_t len_old, len_new;
+ message m;
+
+ len_old = strlen(old_name) + 1;
+ gid_old = cpf_grant_direct(fs_e, (vir_bytes) old_name, len_old, CPF_READ);
+ if(gid_old == -1)
+ panic("req_rename: cpf_grant_direct failed");
+
+ len_new = strlen(new_name) + 1;
+ gid_new = cpf_grant_direct(fs_e, (vir_bytes) new_name, len_new, CPF_READ);
+ if(gid_new == -1)
+ panic("req_rename: cpf_grant_direct failed");
+
+ /* Fill in request message */
+ m.m_type = REQ_RENAME;
+ m.REQ_REN_OLD_DIR = old_dir;
+ m.REQ_REN_NEW_DIR = new_dir;
+ m.REQ_REN_GRANT_OLD = gid_old;
+ m.REQ_REN_LEN_OLD = len_old;
+ m.REQ_REN_GRANT_NEW = gid_new;
+ m.REQ_REN_LEN_NEW = len_new;
+
+ /* Send/rec request */
+ r = fs_sendrec(fs_e, &m);
+ cpf_revoke(gid_old);
+ cpf_revoke(gid_new);
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * req_rmdir *
+ *===========================================================================*/
+PUBLIC int req_rmdir(fs_e, inode_nr, lastc)
+endpoint_t fs_e;
+ino_t inode_nr;
+char *lastc;
+{
+ int r;
+ cp_grant_id_t grant_id;
+ size_t len;
+ message m;
+
+ len = strlen(lastc) + 1;
+ grant_id = cpf_grant_direct(fs_e, (vir_bytes) lastc, len, CPF_READ);
+ if(grant_id == -1)
+ panic("req_rmdir: cpf_grant_direct failed");
+
+ /* Fill in request message */
+ m.m_type = REQ_RMDIR;
+ m.REQ_INODE_NR = inode_nr;
+ m.REQ_GRANT = grant_id;
+ m.REQ_PATH_LEN = len;
+
+ /* Send/rec request */
+ r = fs_sendrec(fs_e, &m);
+ cpf_revoke(grant_id);
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * req_slink *
+ *===========================================================================*/
+PUBLIC int req_slink(
+ endpoint_t fs_e,
+ ino_t inode_nr,
+ char *lastc,
+ endpoint_t proc_e,
+ char *path_addr,
+ unsigned short path_length,
+ uid_t uid,
+ gid_t gid
+)
+{
+ int r;
+ size_t len;
+ cp_grant_id_t gid_name, gid_buf;
+ message m;
+
+ len = strlen(lastc) + 1;
+ gid_name = cpf_grant_direct(fs_e, (vir_bytes) lastc, len, CPF_READ);
+ if(gid_name == -1)
+ panic("req_slink: cpf_grant_direct failed");
+
+ gid_buf = cpf_grant_magic(fs_e, proc_e, (vir_bytes) path_addr, path_length,
+ CPF_READ);
+ if(gid_buf == -1) {
+ cpf_revoke(gid_name);
+ panic("req_slink: cpf_grant_magic failed");
+ }
+
+ /* Fill in request message */
+ m.m_type = REQ_SLINK;
+ m.REQ_INODE_NR = inode_nr;
+ m.REQ_UID = uid;
+ m.REQ_GID = gid;
+ m.REQ_GRANT = gid_name;
+ m.REQ_PATH_LEN = len;
+ m.REQ_GRANT3 = gid_buf;
+ m.REQ_MEM_SIZE = path_length;
+
+ /* Send/rec request */
+ r = fs_sendrec(fs_e, &m);
+ cpf_revoke(gid_name);
+ cpf_revoke(gid_buf);
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * req_stat *
+ *===========================================================================*/
+PUBLIC int req_stat(fs_e, inode_nr, proc_e, buf, pos, stat_version)
+int fs_e;
+ino_t inode_nr;
+int proc_e;
+char *buf;
+int pos;
+int stat_version;
+{
+ cp_grant_id_t grant_id;
+ int r;
+ message m;
+ struct stat sb;
+ struct minix_prev_stat old_sb; /* for backward compatibility */
+
+ if (pos != 0 || stat_version != 0)
+ grant_id = cpf_grant_direct(fs_e, (vir_bytes) &sb,
+ sizeof(struct stat), CPF_WRITE);
+ else
+ grant_id = cpf_grant_magic(fs_e, proc_e, (vir_bytes) buf,
+ sizeof(struct stat), CPF_WRITE);
+
+ if (grant_id < 0)
+ panic("req_stat: cpf_grant_* failed");
+
+ /* Fill in request message */
+ m.m_type = REQ_STAT;
+ m.REQ_INODE_NR = inode_nr;
+ m.REQ_GRANT = grant_id;
+
+ /* Send/rec request */
+ r = fs_sendrec(fs_e, &m);
+ cpf_revoke(grant_id);
+
+ if (r != OK || (pos == 0 && stat_version == 0))
+ return(r);
+
+ if (pos != 0)
+ sb.st_size -= pos;
+ if (stat_version == 0) {
+ r = sys_vircopy(SELF, D, (vir_bytes) &sb, proc_e, D, (vir_bytes) buf,
+ sizeof(struct stat));
+ return(r);
+ }
+
+ /* User needs old struct stat.
+ * Just 1 prev version at this moment */
+ assert(stat_version == 1);
+
+/* XXX until that st_Xtime macroses used, we have to undefine them,
+ * because of minix_prev_stat
+ */
+#undef st_atime
+#undef st_ctime
+#undef st_mtime
+
+/* Copy field by field because of st_gid type mismath and
+ * difference in order after atime.
+ */
+ old_sb.st_dev = sb.st_dev;
+ old_sb.st_ino = sb.st_ino;
+ old_sb.st_mode = sb.st_mode;
+ old_sb.st_nlink = sb.st_nlink;
+ old_sb.st_uid = sb.st_uid;
+ old_sb.st_gid = sb.st_gid;
+ old_sb.st_rdev = sb.st_rdev;
+ old_sb.st_size = sb.st_size;
+#if defined(_NETBSD_SOURCE)
+ old_sb.st_atime = sb.st_atimespec.tv_sec;
+ old_sb.st_mtime = sb.st_mtimespec.tv_sec;
+ old_sb.st_ctime = sb.st_ctimespec.tv_sec;
+#else
+ old_sb.st_atime = sb.st_atime;
+ old_sb.st_mtime = sb.st_mtime;
+ old_sb.st_ctime = sb.st_ctime;
+#endif
+
+ r = sys_vircopy(SELF, D, (vir_bytes) &old_sb, proc_e, D, (vir_bytes) buf,
+ sizeof(struct minix_prev_stat));
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * req_sync *
+ *===========================================================================*/
+PUBLIC int req_sync(fs_e)
+endpoint_t fs_e;
+{
+ message m;
+
+ /* Fill in request message */
+ m.m_type = REQ_SYNC;
+
+ /* Send/rec request */
+ return fs_sendrec(fs_e, &m);
+}
+
+
+/*===========================================================================*
+ * req_unlink *
+ *===========================================================================*/
+PUBLIC int req_unlink(fs_e, inode_nr, lastc)
+endpoint_t fs_e;
+ino_t inode_nr;
+char *lastc;
+{
+ cp_grant_id_t grant_id;
+ size_t len;
+ int r;
+ message m;
+
+ len = strlen(lastc) + 1;
+ grant_id = cpf_grant_direct(fs_e, (vir_bytes) lastc, len, CPF_READ);
+ if(grant_id == -1)
+ panic("req_unlink: cpf_grant_direct failed");
+
+ /* Fill in request message */
+ m.m_type = REQ_UNLINK;
+ m.REQ_INODE_NR = inode_nr;
+ m.REQ_GRANT = grant_id;
+ m.REQ_PATH_LEN = len;
+
+ /* Send/rec request */
+ r = fs_sendrec(fs_e, &m);
+ cpf_revoke(grant_id);
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * req_unmount *
+ *===========================================================================*/
+PUBLIC int req_unmount(fs_e)
+endpoint_t fs_e;
+{
+ message m;
+
+ /* Fill in request message */
+ m.m_type = REQ_UNMOUNT;
+
+ /* Send/rec request */
+ return fs_sendrec(fs_e, &m);
+}
+
+
+/*===========================================================================*
+ * req_utime *
+ *===========================================================================*/
+PUBLIC int req_utime(fs_e, inode_nr, actime, modtime)
+endpoint_t fs_e;
+ino_t inode_nr;
+time_t actime;
+time_t modtime;
+{
+ message m;
+
+ /* Fill in request message */
+ m.m_type = REQ_UTIME;
+ m.REQ_INODE_NR = inode_nr;
+ m.REQ_ACTIME = actime;
+ m.REQ_MODTIME = modtime;
+
+ /* Send/rec request */
+ return fs_sendrec(fs_e, &m);
+}
--- /dev/null
+#ifndef __VFS_REQUEST_H__
+#define __VFS_REQUEST_H__
+
+/* Low level request messages are built and sent by wrapper functions.
+ * This file contains the request and response structures for accessing
+ * those wrappers functions.
+ */
+
+#include <sys/types.h>
+
+/* Structure for response that contains inode details */
+typedef struct node_details {
+ endpoint_t fs_e;
+ ino_t inode_nr;
+ mode_t fmode;
+ off_t fsize;
+ uid_t uid;
+ gid_t gid;
+
+ /* For char/block special files */
+ dev_t dev;
+} node_details_t;
+
+/* Structure for a lookup response */
+typedef struct lookup_res {
+ endpoint_t fs_e;
+ ino_t inode_nr;
+ mode_t fmode;
+ off_t fsize;
+ uid_t uid;
+ gid_t gid;
+ /* For char/block special files */
+ dev_t dev;
+
+ /* Fields used for handling mount point and symbolic links */
+ int char_processed;
+ unsigned char symloop;
+} lookup_res_t;
+
+
+#endif
--- /dev/null
+/* Implement entry point to select system call.
+ *
+ * The entry points into this file are
+ * do_select: perform the SELECT system call
+ * select_callback: notify select system of possible fd operation
+ * select_unsuspend_by_endpt: cancel a blocking select on exiting driver
+ */
+
+#include "fs.h"
+#include <sys/time.h>
+#include <sys/select.h>
+#include <minix/com.h>
+#include <minix/u64.h>
+#include <string.h>
+#include <assert.h>
+
+#include "select.h"
+#include "file.h"
+#include "fproc.h"
+#include "dmap.h"
+#include "vnode.h"
+
+/* max. number of simultaneously pending select() calls */
+#define MAXSELECTS 25
+#define FROM_PROC 0
+#define TO_PROC 1
+
+PRIVATE struct selectentry {
+ struct fproc *requestor; /* slot is free iff this is NULL */
+ endpoint_t req_endpt;
+ fd_set readfds, writefds, errorfds;
+ fd_set ready_readfds, ready_writefds, ready_errorfds;
+ fd_set *vir_readfds, *vir_writefds, *vir_errorfds;
+ struct filp *filps[OPEN_MAX];
+ int type[OPEN_MAX];
+ int nfds, nreadyfds;
+ int error;
+ char block;
+ clock_t expiry;
+ timer_t timer; /* if expiry > 0 */
+} selecttab[MAXSELECTS];
+
+FORWARD _PROTOTYPE(int copy_fdsets, (struct selectentry *se, int nfds,
+ int direction) );
+FORWARD _PROTOTYPE(int do_select_request, (struct selectentry *se, int fd,
+ int *ops) );
+FORWARD _PROTOTYPE(void filp_status, (struct filp *fp, int status) );
+FORWARD _PROTOTYPE(int is_deferred, (struct selectentry *se) );
+FORWARD _PROTOTYPE(void restart_proc, (struct selectentry *se) );
+FORWARD _PROTOTYPE(void ops2tab, (int ops, int fd, struct selectentry *e));
+FORWARD _PROTOTYPE(int is_regular_file, (struct filp *f) );
+FORWARD _PROTOTYPE(int is_pipe, (struct filp *f) );
+FORWARD _PROTOTYPE(int is_supported_major, (struct filp *f) );
+FORWARD _PROTOTYPE(void select_lock_filp, (struct filp *f, int ops) );
+FORWARD _PROTOTYPE(int select_request_async, (struct filp *f, int *ops,
+ int block) );
+FORWARD _PROTOTYPE(int select_request_file, (struct filp *f, int *ops,
+ int block) );
+FORWARD _PROTOTYPE(int select_request_major, (struct filp *f, int *ops,
+ int block) );
+FORWARD _PROTOTYPE(int select_request_pipe, (struct filp *f, int *ops,
+ int block) );
+FORWARD _PROTOTYPE(int select_request_sync, (struct filp *f, int *ops,
+ int block) );
+FORWARD _PROTOTYPE(void select_cancel_all, (struct selectentry *e) );
+FORWARD _PROTOTYPE(void select_cancel_filp, (struct filp *f) );
+FORWARD _PROTOTYPE(void select_return, (struct selectentry *) );
+FORWARD _PROTOTYPE(void select_restart_filps, (void) );
+FORWARD _PROTOTYPE(int tab2ops, (int fd, struct selectentry *e) );
+FORWARD _PROTOTYPE(void wipe_select, (struct selectentry *s) );
+
+PRIVATE struct fdtype {
+ int (*select_request)(struct filp *, int *ops, int block);
+ int (*type_match)(struct filp *f);
+} fdtypes[] = {
+ { select_request_major, is_supported_major },
+ { select_request_file, is_regular_file },
+ { select_request_pipe, is_pipe },
+};
+#define SEL_FDS (sizeof(fdtypes) / sizeof(fdtypes[0]))
+PRIVATE int select_majors[] = { /* List of majors that support selecting on */
+ TTY_MAJOR,
+ INET_MAJOR,
+ UDS_MAJOR,
+ LOG_MAJOR,
+};
+#define SEL_MAJORS (sizeof(select_majors) / sizeof(select_majors[0]))
+
+/*===========================================================================*
+ * do_select *
+ *===========================================================================*/
+PUBLIC int do_select(void)
+{
+/* Implement the select(nfds, readfds, writefds, errorfds, timeout) system
+ * call. First we copy the arguments and verify their sanity. Then we check
+ * whether there are file descriptors that satisfy the select call right of the
+ * bat. If so, or if there are no ready file descriptors but the process
+ * requested to return immediately, we return the result. Otherwise we set a
+ * timeout and wait for either the file descriptors to become ready or the
+ * timer to go off. If no timeout value was provided, we wait indefinitely. */
+
+ int r, nfds, do_timeout = 0, fd, s;
+ struct timeval timeout;
+ struct selectentry *se;
+
+ nfds = m_in.SEL_NFDS;
+
+ /* Sane amount of file descriptors? */
+ if (nfds < 0 || nfds > OPEN_MAX) return(EINVAL);
+
+ /* Find a slot to store this select request */
+ for (s = 0; s < MAXSELECTS; s++)
+ if (selecttab[s].requestor == NULL) /* Unused slot */
+ break;
+ if (s >= MAXSELECTS) return(ENOSPC);
+
+ se = &selecttab[s];
+ wipe_select(se); /* Clear results of previous usage */
+ se->req_endpt = who_e;
+ se->vir_readfds = (fd_set *) m_in.SEL_READFDS;
+ se->vir_writefds = (fd_set *) m_in.SEL_WRITEFDS;
+ se->vir_errorfds = (fd_set *) m_in.SEL_ERRORFDS;
+
+ /* Copy fdsets from the process */
+ if ((r = copy_fdsets(se, nfds, FROM_PROC)) != OK) return(r);
+
+ /* Did the process set a timeout value? If so, retrieve it. */
+ if (m_in.SEL_TIMEOUT != NULL) {
+ do_timeout = 1;
+ r = sys_vircopy(who_e, D, (vir_bytes) m_in.SEL_TIMEOUT, SELF, D,
+ (vir_bytes) &timeout, sizeof(timeout));
+ if (r != OK) return(r);
+ }
+
+ /* No nonsense in the timeval */
+ if (do_timeout && (timeout.tv_sec < 0 || timeout.tv_usec < 0))
+ return(EINVAL);
+
+ /* If there is no timeout, we block forever. Otherwise, we block up to the
+ * specified time interval.
+ */
+ if (!do_timeout) /* No timeout value set */
+ se->block = 1;
+ else if (do_timeout && (timeout.tv_sec > 0 || timeout.tv_usec > 0))
+ se->block = 1;
+ else /* timeout set as (0,0) - this effects a poll */
+ se->block = 0;
+ se->expiry = 0; /* no timer set (yet) */
+
+ /* Verify that file descriptors are okay to select on */
+ for (fd = 0; fd < nfds; fd++) {
+ struct filp *f;
+ int type, ops;
+
+ /* Because the select() interface implicitly includes file descriptors
+ * you might not want to select on, we have to figure out whether we're
+ * interested in them. Typically, these file descriptors include fd's
+ * inherited from the parent proc and file descriptors that have been
+ * close()d, but had a lower fd than one in the current set.
+ */
+ if (!(ops = tab2ops(fd, se)))
+ continue; /* No operations set; nothing to do for this fd */
+
+ /* Get filp belonging to this fd */
+ f = se->filps[fd] = get_filp(fd, VNODE_READ);
+ if (f == NULL) {
+ if (err_code == EBADF)
+ r = err_code;
+ else /* File descriptor is 'ready' to return EIO */
+ r = EINTR;
+
+ return(r);
+ }
+
+ /* Check file types. According to POSIX 2008:
+ * "The pselect() and select() functions shall support regular files,
+ * terminal and pseudo-terminal devices, FIFOs, pipes, and sockets. The
+ * behavior of pselect() and select() on file descriptors that refer to
+ * other types of file is unspecified."
+ *
+ * In our case, terminal and pseudo-terminal devices are handled by the
+ * TTY major and sockets by either INET major (socket type AF_INET) or
+ * PFS major (socket type AF_UNIX). PFS acts as an FS when it handles
+ * pipes and as a driver when it handles sockets. Additionally, we
+ * support select on the LOG major to handle kernel logging, which is
+ * beyond the POSIX spec. */
+
+ se->type[fd] = -1;
+ for (type = 0; type < SEL_FDS; type++) {
+ if (fdtypes[type].type_match(f)) {
+ se->type[fd] = type;
+ se->nfds = fd+1;
+ se->filps[fd]->filp_selectors++;
+ break;
+ }
+ }
+ unlock_filp(f);
+ if (se->type[fd] == -1) /* Type not found */
+ return(EBADF);
+ }
+
+ /* Check all file descriptors in the set whether one is 'ready' now */
+ for (fd = 0; fd < nfds; fd++) {
+ int ops, r;
+ struct filp *f;
+
+ /* Again, check for involuntarily selected fd's */
+ if (!(ops = tab2ops(fd, se)))
+ continue; /* No operations set; nothing to do for this fd */
+
+ /* Test filp for select operations if not already done so. e.g.,
+ * processes sharing a filp and both doing a select on that filp. */
+ f = se->filps[fd];
+ select_lock_filp(f, f->filp_select_ops | ops);
+ if ((f->filp_select_ops & ops) != ops) {
+ int wantops;
+
+ wantops = (f->filp_select_ops |= ops);
+ r = do_select_request(se, fd, &wantops);
+ unlock_filp(f);
+ if (r != SEL_OK) {
+ if (r == SEL_DEFERRED) continue;
+ else break; /* Error or bogus return code; abort */
+ }
+
+ /* The select request above might have turned on/off some
+ * operations because they were 'ready' or not meaningful.
+ * Either way, we might have a result and we need to store them
+ * in the select table entry. */
+ if (wantops & ops) ops2tab(wantops, fd, se);
+ } else {
+ unlock_filp(f);
+ }
+ }
+
+ if ((se->nreadyfds > 0 || !se->block) && !is_deferred(se)) {
+ /* fd's were found that were ready to go right away, and/or
+ * we were instructed not to block at all. Must return
+ * immediately.
+ */
+ r = copy_fdsets(se, se->nfds, TO_PROC);
+ select_cancel_all(se);
+
+ if (r != OK)
+ return(r);
+ else if (se->error != OK)
+ return(se->error);
+
+ return(se->nreadyfds);
+ }
+
+ /* Convert timeval to ticks and set the timer. If it fails, undo
+ * all, return error.
+ */
+ if (do_timeout) {
+ int ticks;
+ /* Open Group:
+ * "If the requested timeout interval requires a finer
+ * granularity than the implementation supports, the
+ * actual timeout interval shall be rounded up to the next
+ * supported value."
+ */
+#define USECPERSEC 1000000
+ while(timeout.tv_usec >= USECPERSEC) {
+ /* this is to avoid overflow with *system_hz below */
+ timeout.tv_usec -= USECPERSEC;
+ timeout.tv_sec++;
+ }
+ ticks = timeout.tv_sec * system_hz +
+ (timeout.tv_usec * system_hz + USECPERSEC-1) / USECPERSEC;
+ se->expiry = ticks;
+ set_timer(&se->timer, ticks, select_timeout_check, s);
+ }
+
+ /* If we're blocking, the table entry is now valid */
+ se->requestor = fp;
+
+ /* process now blocked */
+ suspend(FP_BLOCKED_ON_SELECT);
+ return(SUSPEND);
+}
+
+/*===========================================================================*
+ * is_deferred *
+ *===========================================================================*/
+PRIVATE int is_deferred(struct selectentry *se)
+{
+/* Find out whether this select has pending initial replies */
+
+ int fd;
+ struct filp *f;
+
+ for (fd = 0; fd < se->nfds; fd++) {
+ if ((f = se->filps[fd]) == NULL) continue;
+ if (f->filp_select_flags & (FSF_UPDATE|FSF_BUSY)) return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+
+/*===========================================================================*
+ * is_regular_file *
+ *===========================================================================*/
+PRIVATE int is_regular_file(struct filp *f)
+{
+ return(f && f->filp_vno && (f->filp_vno->v_mode & I_TYPE) == I_REGULAR);
+}
+
+/*===========================================================================*
+ * is_pipe *
+ *===========================================================================*/
+PRIVATE int is_pipe(struct filp *f)
+{
+/* Recognize either anonymous pipe or named pipe (FIFO) */
+ return(f && f->filp_vno && (f->filp_vno->v_mode & I_TYPE) == I_NAMED_PIPE);
+}
+
+/*===========================================================================*
+ * is_supported_major *
+ *===========================================================================*/
+PRIVATE int is_supported_major(struct filp *f)
+{
+/* See if this filp is a handle on a device on which we support select() */
+ int m;
+
+ if (!(f && f->filp_vno)) return(FALSE);
+ if ((f->filp_vno->v_mode & I_TYPE) != I_CHAR_SPECIAL) return(FALSE);
+
+ for (m = 0; m < SEL_MAJORS; m++)
+ if (major(f->filp_vno->v_sdev) == select_majors[m])
+ return(TRUE);
+
+ return(FALSE);
+}
+
+/*===========================================================================*
+ * select_request_async *
+ *===========================================================================*/
+PRIVATE int select_request_async(struct filp *f, int *ops, int block)
+{
+ int r, rops, major;
+ struct dmap *dp;
+
+ rops = *ops;
+
+ if (!block && (f->filp_select_flags & FSF_BLOCKED)) {
+ /* This filp is blocked waiting for a reply, but we don't want to
+ * block ourselves. Unless we're awaiting the initial reply, these
+ * operations won't be ready */
+ if (!(f->filp_select_flags & FSF_BUSY)) {
+ if ((rops & SEL_RD) && (f->filp_select_flags & FSF_RD_BLOCK))
+ rops &= ~SEL_RD;
+ if ((rops & SEL_WR) && (f->filp_select_flags & FSF_WR_BLOCK))
+ rops &= ~SEL_WR;
+ if ((rops & SEL_ERR) && (f->filp_select_flags & FSF_ERR_BLOCK))
+ rops &= ~SEL_ERR;
+ if (!(rops & (SEL_RD|SEL_WR|SEL_ERR))) {
+ /* Nothing left to do */
+ *ops = 0;
+ return(SEL_OK);
+ }
+ }
+ }
+
+ f->filp_select_flags |= FSF_UPDATE;
+ if (block) {
+ rops |= SEL_NOTIFY;
+ if (rops & SEL_RD) f->filp_select_flags |= FSF_RD_BLOCK;
+ if (rops & SEL_WR) f->filp_select_flags |= FSF_WR_BLOCK;
+ if (rops & SEL_ERR) f->filp_select_flags |= FSF_ERR_BLOCK;
+ }
+
+ if (f->filp_select_flags & FSF_BUSY)
+ return(SEL_DEFERRED);
+
+ major = major(f->filp_vno->v_sdev);
+ if (major < 0 || major >= NR_DEVICES) return(SEL_ERROR);
+ dp = &dmap[major];
+ if (dp->dmap_sel_filp)
+ return(SEL_DEFERRED);
+
+ f->filp_select_flags &= ~FSF_UPDATE;
+ r = dev_io(VFS_DEV_SELECT, f->filp_vno->v_sdev, rops, NULL,
+ cvu64(0), 0, 0, FALSE);
+ if (r < 0 && r != SUSPEND)
+ return(SEL_ERROR);
+
+ if (r != SUSPEND)
+ panic("select_request_asynch: expected SUSPEND got: %d", r);
+
+ dp->dmap_sel_filp = f;
+ f->filp_select_flags |= FSF_BUSY;
+
+ return(SEL_DEFERRED);
+}
+
+/*===========================================================================*
+ * select_request_file *
+ *===========================================================================*/
+PRIVATE int select_request_file(struct filp *f, int *ops, int block)
+{
+ /* Files are always ready, so output *ops is input *ops */
+ return(SEL_OK);
+}
+
+/*===========================================================================*
+ * select_request_major *
+ *===========================================================================*/
+PRIVATE int select_request_major(struct filp *f, int *ops, int block)
+{
+ int major, r;
+
+ major = major(f->filp_vno->v_sdev);
+ if (major < 0 || major >= NR_DEVICES) return(SEL_ERROR);
+
+ if (dmap[major].dmap_style == STYLE_DEVA ||
+ dmap[major].dmap_style == STYLE_CLONE_A)
+ r = select_request_async(f, ops, block);
+ else
+ r = select_request_sync(f, ops, block);
+
+ return(r);
+}
+
+/*===========================================================================*
+ * select_request_sync *
+ *===========================================================================*/
+PRIVATE int select_request_sync(struct filp *f, int *ops, int block)
+{
+ int rops;
+
+ rops = *ops;
+ if (block) rops |= SEL_NOTIFY;
+ *ops = dev_io(VFS_DEV_SELECT, f->filp_vno->v_sdev, rops, NULL,
+ cvu64(0), 0, 0, FALSE);
+ if (*ops < 0)
+ return(SEL_ERROR);
+
+ return(SEL_OK);
+}
+
+/*===========================================================================*
+ * select_request_pipe *
+ *===========================================================================*/
+PRIVATE int select_request_pipe(struct filp *f, int *ops, int block)
+{
+ int orig_ops, r = 0, err;
+
+ orig_ops = *ops;
+
+ if ((*ops & (SEL_RD|SEL_ERR))) {
+ err = pipe_check(f->filp_vno, READING, 0, 1, f->filp_pos, 1);
+
+ if (err != SUSPEND)
+ r |= SEL_RD;
+ if (err < 0 && err != SUSPEND)
+ r |= SEL_ERR;
+ if (err == SUSPEND && !(f->filp_mode & R_BIT)) {
+ /* A "meaningless" read select, therefore ready
+ * for reading and no error set. */
+ r |= SEL_RD;
+ r &= ~SEL_ERR;
+ }
+ }
+
+ if ((*ops & (SEL_WR|SEL_ERR))) {
+ err = pipe_check(f->filp_vno, WRITING, 0, 1, f->filp_pos, 1);
+
+ if (err != SUSPEND)
+ r |= SEL_WR;
+ if (err < 0 && err != SUSPEND)
+ r |= SEL_ERR;
+ if (err == SUSPEND && !(f->filp_mode & W_BIT)) {
+ /* A "meaningless" write select, therefore ready
+ for writing and no error set. */
+ r |= SEL_WR;
+ r &= ~SEL_ERR;
+ }
+ }
+
+ /* Some options we collected might not be requested. */
+ *ops = r & orig_ops;
+
+ if (!*ops && block)
+ f->filp_pipe_select_ops |= orig_ops;
+
+ return(SEL_OK);
+}
+
+/*===========================================================================*
+ * tab2ops *
+ *===========================================================================*/
+PRIVATE int tab2ops(int fd, struct selectentry *e)
+{
+ int ops = 0;
+ if (FD_ISSET(fd, &e->readfds)) ops |= SEL_RD;
+ if (FD_ISSET(fd, &e->writefds)) ops |= SEL_WR;
+ if (FD_ISSET(fd, &e->errorfds)) ops |= SEL_ERR;
+
+ return(ops);
+}
+
+
+/*===========================================================================*
+ * ops2tab *
+ *===========================================================================*/
+PRIVATE void ops2tab(int ops, int fd, struct selectentry *e)
+{
+ if ((ops & SEL_RD) && e->vir_readfds && FD_ISSET(fd, &e->readfds) &&
+ !FD_ISSET(fd, &e->ready_readfds)) {
+ FD_SET(fd, &e->ready_readfds);
+ e->nreadyfds++;
+ }
+
+ if ((ops & SEL_WR) && e->vir_writefds && FD_ISSET(fd, &e->writefds) &&
+ !FD_ISSET(fd, &e->ready_writefds)) {
+ FD_SET(fd, &e->ready_writefds);
+ e->nreadyfds++;
+ }
+
+ if ((ops & SEL_ERR) && e->vir_errorfds && FD_ISSET(fd, &e->errorfds) &&
+ !FD_ISSET(fd, &e->ready_errorfds)) {
+ FD_SET(fd, &e->ready_errorfds);
+ e->nreadyfds++;
+ }
+}
+
+
+/*===========================================================================*
+ * copy_fdsets *
+ *===========================================================================*/
+PRIVATE int copy_fdsets(struct selectentry *se, int nfds, int direction)
+{
+ int r;
+ size_t fd_setsize;
+ endpoint_t src_e, dst_e;
+ fd_set *src_fds, *dst_fds;
+
+ if (nfds < 0 || nfds > OPEN_MAX)
+ panic("select copy_fdsets: nfds wrong: %d", nfds);
+
+ /* Only copy back as many bits as the user expects. */
+#ifdef __NBSD_LIBC
+ fd_setsize = (size_t) (howmany(nfds, __NFDBITS) * sizeof(__fd_mask));
+#else
+ fd_setsize = (size_t) (_FDSETWORDS(nfds) * _FDSETBITSPERWORD/8);
+#endif
+
+ /* Set source and destination endpoints */
+ src_e = (direction == FROM_PROC) ? se->req_endpt : SELF;
+ dst_e = (direction == FROM_PROC) ? SELF : se->req_endpt;
+
+ /* read set */
+ src_fds = (direction == FROM_PROC) ? se->vir_readfds : &se->ready_readfds;
+ dst_fds = (direction == FROM_PROC) ? &se->readfds : se->vir_readfds;
+ if (se->vir_readfds) {
+ r = sys_vircopy(src_e, D, (vir_bytes) src_fds, dst_e, D,
+ (vir_bytes) dst_fds, fd_setsize);
+ if (r != OK) return(r);
+ }
+
+ /* write set */
+ src_fds = (direction == FROM_PROC) ? se->vir_writefds : &se->ready_writefds;
+ dst_fds = (direction == FROM_PROC) ? &se->writefds : se->vir_writefds;
+ if (se->vir_writefds) {
+ r = sys_vircopy(src_e, D, (vir_bytes) src_fds, dst_e, D,
+ (vir_bytes) dst_fds, fd_setsize);
+ if (r != OK) return(r);
+ }
+
+ /* error set */
+ src_fds = (direction == FROM_PROC) ? se->vir_errorfds : &se->ready_errorfds;
+ dst_fds = (direction == FROM_PROC) ? &se->errorfds : se->vir_errorfds;
+ if (se->vir_errorfds) {
+ r = sys_vircopy(src_e, D, (vir_bytes) src_fds, dst_e, D,
+ (vir_bytes) dst_fds, fd_setsize);
+ if (r != OK) return(r);
+ }
+
+ return(OK);
+}
+
+
+/*===========================================================================*
+ * select_cancel_all *
+ *===========================================================================*/
+PRIVATE void select_cancel_all(struct selectentry *se)
+{
+/* Cancel select. Decrease select usage and cancel timer */
+
+ int fd;
+ struct filp *f;
+
+ /* Always await results of asynchronous requests */
+ assert(!is_deferred(se));
+
+ for (fd = 0; fd < se->nfds; fd++) {
+ if ((f = se->filps[fd]) == NULL) continue;
+ se->filps[fd] = NULL;
+ select_cancel_filp(f);
+ }
+
+ if (se->expiry > 0) {
+ cancel_timer(&se->timer);
+ se->expiry = 0;
+ }
+
+ se->requestor = NULL;
+}
+
+/*===========================================================================*
+ * select_cancel_filp *
+ *===========================================================================*/
+PRIVATE void select_cancel_filp(struct filp *f)
+{
+/* Reduce number of select users of this filp */
+
+ assert(f);
+ assert(f->filp_selectors >= 0);
+ if (f->filp_selectors == 0) return;
+
+ select_lock_filp(f, f->filp_select_ops);
+
+ f->filp_selectors--;
+ if (f->filp_selectors == 0) {
+ /* No one selecting on this filp anymore, forget about select state */
+ f->filp_select_ops = 0;
+ f->filp_select_flags = 0;
+ f->filp_pipe_select_ops = 0;
+ }
+
+ unlock_filp(f);
+}
+
+/*===========================================================================*
+ * select_return *
+ *===========================================================================*/
+PRIVATE void select_return(struct selectentry *se)
+{
+ int r, r1;
+
+ assert(!is_deferred(se)); /* Not done yet, first wait for async reply */
+
+ select_cancel_all(se);
+ r1 = copy_fdsets(se, se->nfds, TO_PROC);
+ if (r1 != OK)
+ r = r1;
+ else if (se->error != OK)
+ r = se->error;
+ else
+ r = se->nreadyfds;
+
+ revive(se->req_endpt, r);
+}
+
+
+/*===========================================================================*
+ * select_callback *
+ *===========================================================================*/
+PUBLIC void select_callback(struct filp *f, int status)
+{
+ filp_status(f, status);
+}
+
+/*===========================================================================*
+ * init_select *
+ *===========================================================================*/
+PUBLIC void init_select(void)
+{
+ int s;
+
+ for (s = 0; s < MAXSELECTS; s++)
+ init_timer(&selecttab[s].timer);
+}
+
+
+/*===========================================================================*
+ * select_forget *
+ *===========================================================================*/
+PUBLIC void select_forget(endpoint_t proc_e)
+{
+/* Something has happened (e.g. signal delivered that interrupts select()).
+ * Totally forget about the select(). */
+
+ int slot;
+ struct selectentry *se;
+
+ for (slot = 0; slot < MAXSELECTS; slot++) {
+ se = &selecttab[slot];
+ if (se->requestor != NULL && se->req_endpt == proc_e)
+ break;
+ }
+
+ if (slot >= MAXSELECTS) return; /* Entry not found */
+ se->error = EINTR;
+ if (is_deferred(se)) return; /* Still awaiting initial reply */
+
+ select_cancel_all(se);
+}
+
+
+/*===========================================================================*
+ * select_timeout_check *
+ *===========================================================================*/
+PUBLIC void select_timeout_check(timer_t *timer)
+{
+ int s;
+ struct selectentry *se;
+
+ s = tmr_arg(timer)->ta_int;
+ if (s < 0 || s >= MAXSELECTS) return; /* Entry does not exist */
+
+ se = &selecttab[s];
+ if (se->requestor == NULL) return;
+ fp = se->requestor;
+ if (se->expiry <= 0) return; /* Strange, did we even ask for a timeout? */
+ se->expiry = 0;
+ if (is_deferred(se)) return; /* Wait for initial replies to DEV_SELECT */
+ select_return(se);
+}
+
+
+/*===========================================================================*
+ * select_unsuspend_by_endpt *
+ *===========================================================================*/
+PUBLIC void select_unsuspend_by_endpt(endpoint_t proc_e)
+{
+/* Revive blocked processes when a driver has disappeared */
+
+ int fd, s, major;
+ struct selectentry *se;
+ struct filp *f;
+
+ for (s = 0; s < MAXSELECTS; s++) {
+ int wakehim = 0;
+ se = &selecttab[s];
+ if (se->requestor == NULL) continue;
+
+ for (fd = 0; fd < se->nfds; fd++) {
+ if ((f = se->filps[fd]) == NULL || f->filp_vno == NULL)
+ continue;
+
+ major = major(f->filp_vno->v_sdev);
+ if (dmap_driver_match(proc_e, major)) {
+ se->filps[fd] = NULL;
+ se->error = EINTR;
+ select_cancel_filp(f);
+ wakehim = 1;
+ }
+ }
+
+ if (wakehim && !is_deferred(se))
+ select_return(se);
+ }
+}
+
+
+/*===========================================================================*
+ * select_reply1 *
+ *===========================================================================*/
+PUBLIC void select_reply1(driver_e, minor, status)
+endpoint_t driver_e;
+int minor;
+int status;
+{
+/* Handle reply to DEV_SELECT request */
+
+ int major;
+ dev_t dev;
+ struct filp *f;
+ struct dmap *dp;
+ struct vnode *vp;
+
+ /* Figure out which device is replying */
+ if ((dp = get_dmap(driver_e)) == NULL) {
+ printf("VFS (%s:%d): endpoint %d is not a known driver endpoint\n",
+ __FILE__, __LINE__, driver_e);
+ return;
+ }
+ major = dp-dmap;
+ dev = makedev(major, minor);
+
+ /* Get filp belonging to character special file */
+ if ((f = dp->dmap_sel_filp) == NULL) {
+ printf("VFS (%s:%d): major %d was not expecting a DEV_SELECT reply\n",
+ __FILE__, __LINE__, major);
+ return;
+ }
+
+ /* Is the filp still in use and busy waiting for a reply? The owner might
+ * have vanished before the driver was able to reply. */
+ if (f->filp_count >= 1 && (f->filp_select_flags & FSF_BUSY)) {
+ /* Find vnode and check we got a reply from the device we expected */
+ vp = f->filp_vno;
+ assert(vp != NULL);
+ assert((vp->v_mode & I_TYPE) == I_CHAR_SPECIAL); /* Must be char. special */
+ if (vp->v_sdev != dev) {
+ printf("VFS (%s:%d): expected reply from dev %d not %d\n",
+ __FILE__, __LINE__, vp->v_sdev, dev);
+ return;
+ }
+ }
+
+ select_lock_filp(f, f->filp_select_ops);
+
+ /* No longer waiting for a reply from this device */
+ f->filp_select_flags &= ~FSF_BUSY;
+ dp->dmap_sel_filp = NULL;
+
+ /* The select call is done now, except when
+ * - another process started a select on the same filp with possibly a
+ * different set of operations.
+ * - a process does a select on the same filp but using different file
+ * descriptors.
+ * - the select has a timeout. Upon receiving this reply the operations might
+ * not be ready yet, so we want to wait for that to ultimately happen.
+ * Therefore we need to keep remembering what the operations are. */
+ if (!(f->filp_select_flags & (FSF_UPDATE|FSF_BLOCKED)))
+ f->filp_select_ops = 0; /* done selecting */
+ else if (!(f->filp_select_flags & FSF_UPDATE))
+ f->filp_select_ops &= ~status; /* there may be operations pending */
+
+ /* Tell filp owners about result unless we need to wait longer */
+ if (!(status == 0 && (f->filp_select_flags & FSF_BLOCKED))) {
+ if (status > 0) { /* operations ready */
+ if (status & SEL_RD) f->filp_select_flags &= ~FSF_RD_BLOCK;
+ if (status & SEL_WR) f->filp_select_flags &= ~FSF_WR_BLOCK;
+ if (status & SEL_ERR) f->filp_select_flags &= ~FSF_ERR_BLOCK;
+ } else if (status < 0) { /* error */
+ f->filp_select_flags &= ~FSF_BLOCKED; /* No longer blocking */
+ }
+
+ unlock_filp(f);
+ filp_status(f, status); /* Tell filp owners about the results */
+ } else {
+ unlock_filp(f);
+ }
+
+ select_restart_filps();
+}
+
+
+/*===========================================================================*
+ * select_reply2 *
+ *===========================================================================*/
+PUBLIC void select_reply2(driver_e, minor, status)
+endpoint_t driver_e;
+int minor;
+int status;
+{
+/* Handle secondary reply to DEV_SELECT request. A secondary reply occurs when
+ * the select request is 'blocking' until an operation becomes ready. */
+ int major, slot, fd;
+ dev_t dev;
+ struct filp *f;
+ struct dmap *dp;
+ struct vnode *vp;
+ struct selectentry *se;
+
+ if (status == 0) {
+ printf("VFS (%s:%d): weird status (%d) to report\n",
+ __FILE__, __LINE__, status);
+ return;
+ }
+
+ /* Figure out which device is replying */
+ if ((dp = get_dmap(driver_e)) == NULL) {
+ printf("VFS (%s:%d): endpoint %d is not a known driver endpoint\n",
+ __FILE__, __LINE__, driver_e);
+ return;
+ }
+ major = dp-dmap;
+ dev = makedev(major, minor);
+
+ /* Find all file descriptors selecting for this device */
+ for (slot = 0; slot < MAXSELECTS; slot++) {
+ se = &selecttab[slot];
+ if (se->requestor == NULL) continue; /* empty slot */
+
+ for (fd = 0; fd < se->nfds; fd++) {
+ if ((f = se->filps[fd]) == NULL) continue;
+ if ((vp = f->filp_vno) == NULL) continue;
+ if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL) continue;
+ if (vp->v_sdev != dev) continue;
+
+ select_lock_filp(f, f->filp_select_ops);
+ if (status > 0) { /* Operations ready */
+ /* Clear the replied bits from the request
+ * mask unless FSF_UPDATE is set.
+ */
+ if (!(f->filp_select_flags & FSF_UPDATE))
+ f->filp_select_ops &= ~status;
+ if (status & SEL_RD)
+ f->filp_select_flags &= ~FSF_RD_BLOCK;
+ if (status & SEL_WR)
+ f->filp_select_flags &= ~FSF_WR_BLOCK;
+ if (status & SEL_ERR)
+ f->filp_select_flags &= ~FSF_ERR_BLOCK;
+
+ ops2tab(status, fd, se);
+ } else {
+ f->filp_select_flags &= ~FSF_BLOCKED;
+ ops2tab(SEL_RD|SEL_WR|SEL_ERR, fd, se);
+ }
+ unlock_filp(f);
+ if (se->nreadyfds > 0) restart_proc(se);
+ }
+ }
+
+ select_restart_filps();
+}
+
+/*===========================================================================*
+ * select_restart_filps *
+ *===========================================================================*/
+PRIVATE void select_restart_filps()
+{
+ int fd, slot;
+ struct filp *f;
+ struct vnode *vp;
+ struct selectentry *se;
+
+ /* Locate filps that can be restarted */
+ for (slot = 0; slot < MAXSELECTS; slot++) {
+ se = &selecttab[slot];
+ if (se->requestor == NULL) continue; /* empty slot */
+
+ /* Only 'deferred' processes are eligible to restart */
+ if (!is_deferred(se)) continue;
+
+ /* Find filps that are not waiting for a reply, but have an updated
+ * status (i.e., another select on the same filp with possibly a
+ * different set of operations is to be done), and thus requires the
+ * select request to be sent again).
+ */
+ for (fd = 0; fd < se->nfds; fd++) {
+ int r, wantops, ops;
+ if ((f = se->filps[fd]) == NULL) continue;
+ if (f->filp_select_flags & FSF_BUSY) /* Still waiting for */
+ continue; /* initial reply */
+ if (!(f->filp_select_flags & FSF_UPDATE)) /* Must be in */
+ continue; /* 'update' state */
+
+ wantops = ops = f->filp_select_ops;
+ select_lock_filp(f, ops);
+ vp = f->filp_vno;
+ assert((vp->v_mode & I_TYPE) == I_CHAR_SPECIAL);
+ r = do_select_request(se, fd, &wantops);
+ unlock_filp(f);
+ if (r != SEL_OK) {
+ if (r == SEL_DEFERRED) continue;
+ else break; /* Error or bogus return code; abort */
+ }
+ if (wantops & ops) ops2tab(wantops, fd, se);
+ }
+ }
+}
+
+/*===========================================================================*
+ * do_select_request *
+ *===========================================================================*/
+PRIVATE int do_select_request(se, fd, ops)
+struct selectentry *se;
+int fd;
+int *ops;
+{
+/* Perform actual select request for file descriptor fd */
+
+ int r, type;
+ struct filp *f;
+
+ type = se->type[fd];
+ f = se->filps[fd];
+ r = fdtypes[type].select_request(f, ops, se->block);
+ if (r != SEL_OK && r != SEL_DEFERRED) {
+ se->error = EINTR;
+ se->block = 0; /* Stop blocking to return asap */
+ if (!is_deferred(se)) select_cancel_all(se);
+ }
+
+ return(r);
+}
+
+/*===========================================================================*
+ * filp_status *
+ *===========================================================================*/
+PRIVATE void filp_status(f, status)
+struct filp *f;
+int status;
+{
+/* Tell processes that need to know about the status of this filp */
+ int fd, slot;
+ struct selectentry *se;
+
+ for (slot = 0; slot < MAXSELECTS; slot++) {
+ se = &selecttab[slot];
+ if (se->requestor == NULL) continue; /* empty slot */
+
+ for (fd = 0; fd < se->nfds; fd++) {
+ if (se->filps[fd] != f) continue;
+ if (status < 0)
+ ops2tab(SEL_RD|SEL_WR|SEL_ERR, fd, se);
+ else
+ ops2tab(status, fd, se);
+ restart_proc(se);
+ }
+ }
+}
+
+/*===========================================================================*
+ * restart_proc *
+ *===========================================================================*/
+PRIVATE void restart_proc(se)
+struct selectentry *se;
+{
+/* Tell process about select results (if any) unless there are still results
+ * pending. */
+
+ if ((se->nreadyfds > 0 || !se->block) && !is_deferred(se))
+ select_return(se);
+}
+
+/*===========================================================================*
+ * wipe_select *
+ *===========================================================================*/
+PRIVATE void wipe_select(struct selectentry *se)
+{
+ se->nfds = 0;
+ se->nreadyfds = 0;
+ se->error = OK;
+ se->block = 0;
+ memset(se->filps, 0, sizeof(se->filps));
+
+ FD_ZERO(&se->readfds);
+ FD_ZERO(&se->writefds);
+ FD_ZERO(&se->errorfds);
+ FD_ZERO(&se->ready_readfds);
+ FD_ZERO(&se->ready_writefds);
+ FD_ZERO(&se->ready_errorfds);
+}
+
+/*===========================================================================*
+ * select_lock_filp *
+ *===========================================================================*/
+PRIVATE void select_lock_filp(struct filp *f, int ops)
+{
+/* Lock a filp and vnode based on which operations are requested */
+ tll_access_t locktype;;
+
+ locktype = VNODE_READ; /* By default */
+
+ if (ops & (SEL_WR|SEL_ERR))
+ /* Selecting for error or writing requires exclusive access */
+ locktype = VNODE_WRITE;
+
+ lock_filp(f, locktype);
+}
--- /dev/null
+#ifndef __VFS_SELECT_H__
+#define __VFS_SELECT_H__
+
+/* return codes for select_request_* and select_cancel_* */
+#define SEL_OK 0 /* ready */
+#define SEL_ERROR 1 /* failed */
+#define SEL_DEFERRED 2 /* request is sent to driver */
+
+#endif
--- /dev/null
+/* This file contains the code for performing four system calls relating to
+ * status and directories.
+ *
+ * The entry points into this file are
+ * do_chdir: perform the CHDIR system call
+ * do_chroot: perform the CHROOT system call
+ * do_lstat: perform the LSTAT system call
+ * do_stat: perform the STAT system call
+ * do_fstat: perform the FSTAT system call
+ * do_fstatfs: perform the FSTATFS system call
+ * do_statvfs: perform the STATVFS system call
+ * do_fstatvfs: perform the FSTATVFS system call
+ */
+
+#include "fs.h"
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <minix/com.h>
+#include <minix/u64.h>
+#include <string.h>
+#include "file.h"
+#include "fproc.h"
+#include "path.h"
+#include "param.h"
+#include <minix/vfsif.h>
+#include <minix/callnr.h>
+#include "vnode.h"
+#include "vmnt.h"
+
+FORWARD _PROTOTYPE( int change, (struct vnode **iip, char *name_ptr, int len));
+FORWARD _PROTOTYPE( int change_into, (struct vnode **iip, struct vnode *vp));
+
+/*===========================================================================*
+ * do_fchdir *
+ *===========================================================================*/
+PUBLIC int do_fchdir()
+{
+ /* Change directory on already-opened fd. */
+ struct filp *rfilp;
+ int r;
+
+ /* Is the file descriptor valid? */
+ if ((rfilp = get_filp(m_in.fd, VNODE_READ)) == NULL) return(err_code);
+ r = change_into(&fp->fp_wd, rfilp->filp_vno);
+ unlock_filp(rfilp);
+ return(r);
+}
+
+/*===========================================================================*
+ * do_chdir *
+ *===========================================================================*/
+PUBLIC int do_chdir()
+{
+/* Perform the chdir(name) system call. */
+
+ return change(&fp->fp_wd, m_in.name, m_in.name_length);
+}
+
+/*===========================================================================*
+ * do_chroot *
+ *===========================================================================*/
+PUBLIC int do_chroot()
+{
+/* Perform the chroot(name) system call. */
+
+ if (!super_user) return(EPERM); /* only su may chroot() */
+ return change(&fp->fp_rd, m_in.name, m_in.name_length);
+}
+
+/*===========================================================================*
+ * change *
+ *===========================================================================*/
+PRIVATE int change(iip, name_ptr, len)
+struct vnode **iip; /* pointer to the inode pointer for the dir */
+char *name_ptr; /* pointer to the directory name to change to */
+int len; /* length of the directory name string */
+{
+/* Do the actual work for chdir() and chroot(). */
+ struct vnode *vp;
+ struct vmnt *vmp;
+ char fullpath[PATH_MAX+1];
+ struct lookup resolve;
+ int r;
+
+ lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+ resolve.l_vmnt_lock = VMNT_READ;
+ resolve.l_vnode_lock = VNODE_READ;
+
+ /* Try to open the directory */
+ if (fetch_name(name_ptr, len, M3, fullpath) != OK) return(err_code);
+ if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
+ r = change_into(iip, vp);
+ unlock_vnode(vp);
+ unlock_vmnt(vmp);
+ put_vnode(vp);
+ return(r);
+}
+
+/*===========================================================================*
+ * change_into *
+ *===========================================================================*/
+PRIVATE int change_into(iip, vp)
+struct vnode **iip; /* pointer to the inode pointer for the dir */
+struct vnode *vp; /* this is what the inode has to become */
+{
+ int r;
+
+ if (*iip == vp) return(OK); /* Nothing to do */
+
+ /* It must be a directory and also be searchable */
+ if ((vp->v_mode & I_TYPE) != I_DIRECTORY)
+ r = ENOTDIR;
+ else
+ r = forbidden(vp, X_BIT); /* Check if dir is searchable*/
+ if (r != OK) return(r);
+
+ /* Everything is OK. Make the change. */
+ put_vnode(*iip); /* release the old directory */
+ dup_vnode(vp);
+ *iip = vp; /* acquire the new one */
+ return(OK);
+}
+
+/*===========================================================================*
+ * do_stat *
+ *===========================================================================*/
+PUBLIC int do_stat()
+{
+/* Perform the stat(name, buf) system call. */
+ int r;
+ struct vnode *vp;
+ struct vmnt *vmp;
+ char fullpath[PATH_MAX+1];
+ struct lookup resolve;
+ int old_stat = 0;
+
+ lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+ resolve.l_vmnt_lock = VMNT_READ;
+ resolve.l_vnode_lock = VNODE_READ;
+
+ if (call_nr == PREV_STAT)
+ old_stat = 1;
+
+ if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
+ return(err_code);
+ if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
+ r = req_stat(vp->v_fs_e, vp->v_inode_nr, who_e, m_in.name2, 0, old_stat);
+
+ unlock_vnode(vp);
+ unlock_vmnt(vmp);
+
+ put_vnode(vp);
+ return r;
+}
+
+/*===========================================================================*
+ * do_fstat *
+ *===========================================================================*/
+PUBLIC int do_fstat()
+{
+/* Perform the fstat(fd, buf) system call. */
+ register struct filp *rfilp;
+ int r;
+ int pipe_pos = 0;
+ int old_stat = 0;
+
+ if (call_nr == PREV_FSTAT)
+ old_stat = 1;
+
+ /* Is the file descriptor valid? */
+ if ((rfilp = get_filp(m_in.fd, VNODE_READ)) == NULL) return(err_code);
+
+ /* If we read from a pipe, send position too */
+ if (rfilp->filp_vno->v_pipe == I_PIPE) {
+ if (rfilp->filp_mode & R_BIT)
+ if (ex64hi(rfilp->filp_pos) != 0) {
+ panic("do_fstat: bad position in pipe");
+ }
+ pipe_pos = ex64lo(rfilp->filp_pos);
+ }
+
+ r = req_stat(rfilp->filp_vno->v_fs_e, rfilp->filp_vno->v_inode_nr,
+ who_e, m_in.buffer, pipe_pos, old_stat);
+
+ unlock_filp(rfilp);
+
+ return(r);
+}
+
+/*===========================================================================*
+ * do_fstatfs *
+ *===========================================================================*/
+PUBLIC int do_fstatfs()
+{
+/* Perform the fstatfs(fd, buf) system call. */
+ struct filp *rfilp;
+ int r;
+
+ /* Is the file descriptor valid? */
+ if( (rfilp = get_filp(m_in.fd, VNODE_READ)) == NULL) return(err_code);
+
+ r = req_fstatfs(rfilp->filp_vno->v_fs_e, who_e, m_in.buffer);
+
+ unlock_filp(rfilp);
+
+ return(r);
+}
+
+/*===========================================================================*
+ * do_statvfs *
+ *===========================================================================*/
+PUBLIC int do_statvfs()
+{
+/* Perform the stat(name, buf) system call. */
+ int r;
+ struct vnode *vp;
+ struct vmnt *vmp;
+ char fullpath[PATH_MAX+1];
+ struct lookup resolve;
+
+ lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+ resolve.l_vmnt_lock = VMNT_READ;
+ resolve.l_vnode_lock = VNODE_READ;
+
+ if (fetch_name(m_in.STATVFS_NAME, m_in.STATVFS_LEN, M1, fullpath) != OK)
+ return(err_code);
+ if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
+ r = req_statvfs(vp->v_fs_e, who_e, m_in.STATVFS_BUF);
+
+ unlock_vnode(vp);
+ unlock_vmnt(vmp);
+
+ put_vnode(vp);
+ return r;
+}
+
+/*===========================================================================*
+ * do_fstatvfs *
+ *===========================================================================*/
+PUBLIC int do_fstatvfs()
+{
+/* Perform the fstat(fd, buf) system call. */
+ register struct filp *rfilp;
+ int r;
+
+ /* Is the file descriptor valid? */
+ if ((rfilp = get_filp(m_in.FSTATVFS_FD, VNODE_READ)) == NULL)
+ return(err_code);
+
+ r = req_statvfs(rfilp->filp_vno->v_fs_e, who_e, m_in.FSTATVFS_BUF);
+
+ unlock_filp(rfilp);
+
+ return(r);
+}
+
+/*===========================================================================*
+ * do_lstat *
+ *===========================================================================*/
+PUBLIC int do_lstat()
+{
+/* Perform the lstat(name, buf) system call. */
+ struct vnode *vp;
+ struct vmnt *vmp;
+ int r;
+ char fullpath[PATH_MAX+1];
+ struct lookup resolve;
+ int old_stat = 0;
+
+ lookup_init(&resolve, fullpath, PATH_RET_SYMLINK, &vmp, &vp);
+ resolve.l_vmnt_lock = VMNT_READ;
+ resolve.l_vnode_lock = VNODE_READ;
+
+ if (call_nr == PREV_LSTAT)
+ old_stat = 1;
+ if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
+ return(err_code);
+
+ if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
+ r = req_stat(vp->v_fs_e, vp->v_inode_nr, who_e, m_in.name2, 0, old_stat);
+
+ unlock_vnode(vp);
+ unlock_vmnt(vmp);
+
+ put_vnode(vp);
+ return(r);
+}
--- /dev/null
+/* This file contains the table used to map system call numbers onto the
+ * routines that perform them.
+ */
+
+#define _TABLE
+
+#include "fs.h"
+#include <minix/callnr.h>
+#include <minix/com.h>
+#include "file.h"
+#include "fproc.h"
+#include "lock.h"
+#include "vnode.h"
+#include "vmnt.h"
+
+PUBLIC _PROTOTYPE (int (*call_vec[]), (void) ) = {
+ no_sys, /* 0 = unused */
+ no_sys, /* 1 = (exit) */
+ no_sys, /* 2 = (fork) */
+ do_read, /* 3 = read */
+ do_write, /* 4 = write */
+ do_open, /* 5 = open */
+ do_close, /* 6 = close */
+ no_sys, /* 7 = wait */
+ do_creat, /* 8 = creat */
+ do_link, /* 9 = link */
+ do_unlink, /* 10 = unlink */
+ no_sys, /* 11 = waitpid */
+ do_chdir, /* 12 = chdir */
+ no_sys, /* 13 = time */
+ do_mknod, /* 14 = mknod */
+ do_chmod, /* 15 = chmod */
+ do_chown, /* 16 = chown */
+ no_sys, /* 17 = break */
+ do_stat, /* 18 = stat (prev)*/
+ do_lseek, /* 19 = lseek */
+ no_sys, /* 20 = getpid */
+ do_mount, /* 21 = mount */
+ do_umount, /* 22 = umount */
+ no_sys, /* 23 = (setuid) */
+ no_sys, /* 24 = getuid */
+ no_sys, /* 25 = (stime) */
+ no_sys, /* 26 = ptrace */
+ no_sys, /* 27 = alarm */
+ do_fstat, /* 28 = fstat (prev)*/
+ no_sys, /* 29 = pause */
+ do_utime, /* 30 = utime */
+ no_sys, /* 31 = (stty) */
+ no_sys, /* 32 = (gtty) */
+ do_access, /* 33 = access */
+ no_sys, /* 34 = (nice) */
+ no_sys, /* 35 = (ftime) */
+ do_sync, /* 36 = sync */
+ no_sys, /* 37 = kill */
+ do_rename, /* 38 = rename */
+ do_mkdir, /* 39 = mkdir */
+ do_unlink, /* 40 = rmdir */
+ do_dup, /* 41 = dup */
+ do_pipe, /* 42 = pipe */
+ no_sys, /* 43 = times */
+ no_sys, /* 44 = (prof) */
+ do_slink, /* 45 = symlink */
+ no_sys, /* 46 = (setgid)*/
+ no_sys, /* 47 = getgid */
+ no_sys, /* 48 = (signal)*/
+ do_rdlink, /* 49 = readlink*/
+ do_lstat, /* 50 = lstat (prev)*/
+ no_sys, /* 51 = (acct) */
+ no_sys, /* 52 = (phys) */
+ no_sys, /* 53 = (lock) */
+ do_ioctl, /* 54 = ioctl */
+ do_fcntl, /* 55 = fcntl */
+ no_sys, /* 56 = (mpx) */
+ do_fsready, /* 57 = FS proc ready */
+ no_sys, /* 58 = unused */
+ no_sys, /* 59 = (execve)*/
+ do_umask, /* 60 = umask */
+ do_chroot, /* 61 = chroot */
+ no_sys, /* 62 = (setsid)*/
+ no_sys, /* 63 = (getpgrp)*/
+ no_sys, /* 64 = (itimer)*/
+ do_stat, /* 65 = stat */
+ do_fstat, /* 66 = fstat */
+ do_lstat, /* 67 = lstat */
+ no_sys, /* 68 = unused */
+ no_sys, /* 69 = unused */
+ no_sys, /* 70 = unused */
+ no_sys, /* 71 = (sigaction) */
+ no_sys, /* 72 = (sigsuspend) */
+ no_sys, /* 73 = (sigpending) */
+ no_sys, /* 74 = (sigprocmask) */
+ no_sys, /* 75 = (sigreturn) */
+ no_sys, /* 76 = (reboot) */
+ do_svrctl, /* 77 = svrctl */
+ no_sys, /* 78 = (sysuname) */
+ do_getsysinfo, /* 79 = getsysinfo */
+ do_getdents, /* 80 = getdents */
+ do_llseek, /* 81 = llseek */
+ do_fstatfs, /* 82 = fstatfs */
+ do_statvfs, /* 83 = fstatvfs */
+ do_fstatvfs, /* 84 = statvfs */
+ do_select, /* 85 = select */
+ do_fchdir, /* 86 = fchdir */
+ do_fsync, /* 87 = fsync */
+ no_sys, /* 88 = (getpriority) */
+ no_sys, /* 89 = (setpriority) */
+ no_sys, /* 90 = (gettimeofday) */
+ no_sys, /* 91 = (seteuid) */
+ no_sys, /* 92 = (setegid) */
+ do_truncate, /* 93 = truncate */
+ do_ftruncate, /* 94 = truncate */
+ do_chmod, /* 95 = fchmod */
+ do_chown, /* 96 = fchown */
+ no_sys, /* 97 = (getsysinfo_up) */
+ no_sys, /* 98 = (sprofile) */
+ no_sys, /* 99 = (cprofile) */
+ /* THE MINIX3 ABI ENDS HERE */
+ no_sys, /* 100 = (exec_newmem) */
+ no_sys, /* 101 = (srv_fork) */
+ no_sys, /* 102 = (exec_restart) */
+ no_sys, /* 103 = (procstat) */
+ no_sys, /* 104 = (getprocnr) */
+ no_sys, /* 105 = unused */
+ no_sys, /* 106 = unused */
+ no_sys, /* 107 = (getepinfo) */
+ no_sys, /* 108 = (adddma) */
+ no_sys, /* 109 = (deldma) */
+ no_sys, /* 110 = (getdma) */
+ no_sys, /* 111 = (srv_kill) */
+ do_gcov_flush, /* 112 = gcov_flush */
+ no_sys, /* 113 = (getsid) */
+};
+/* This should not fail with "array size is negative": */
+extern int dummy[sizeof(call_vec) == NCALLS * sizeof(call_vec[0]) ? 1 : -1];
+
+PUBLIC _PROTOTYPE (int (*pfs_call_vec[]), (void) ) = {
+
+ no_sys, /* 0 */
+ do_check_perms, /* 1 */
+ do_verify_fd, /* 2 */
+ do_set_filp, /* 3 */
+ do_copy_filp, /* 4 */
+ do_put_filp, /* 5 */
+ do_cancel_fd /* 6 */
+};
--- /dev/null
+#ifndef __VFS_WORKERS_H__
+#define __VFS_WORKERS_H__
+#include <minix/mthread.h>
+#include "job.h"
+
+#define thread_t mthread_thread_t
+#define mutex_t mthread_mutex_t
+#define cond_t mthread_cond_t
+#define attr_t mthread_attr_t
+
+#define threads_init mthread_init
+#define yield mthread_yield
+#define yield_all mthread_yield_all
+
+#define mutex_init mthread_mutex_init
+#define mutex_destroy mthread_mutex_destroy
+#define mutex_lock mthread_mutex_lock
+#define mutex_trylock mthread_mutex_trylock
+#define mutex_unlock mthread_mutex_unlock
+
+#define cond_init mthread_cond_init
+#define cond_destroy mthread_cond_destroy
+#define cond_wait mthread_cond_wait
+#define cond_signal mthread_cond_signal
+
+struct worker_thread {
+ thread_t w_tid;
+ mutex_t w_event_mutex;
+ cond_t w_event;
+ struct job w_job;
+ struct fproc *w_fp;
+ struct worker_thread *w_next;
+};
+
+#endif
--- /dev/null
+/* This file takes care of those system calls that deal with time.
+ *
+ * The entry points into this file are
+ * do_utime: perform the UTIME system call
+ */
+
+#include "fs.h"
+#include <minix/callnr.h>
+#include <minix/com.h>
+#include "file.h"
+#include "fproc.h"
+#include "path.h"
+#include "param.h"
+#include "vnode.h"
+#include <minix/vfsif.h>
+#include "vmnt.h"
+
+/*===========================================================================*
+ * do_utime *
+ *===========================================================================*/
+PUBLIC int do_utime()
+{
+/* Perform the utime(name, timep) system call. */
+ register int len;
+ int r;
+ time_t actime, modtime;
+ struct vnode *vp;
+ struct vmnt *vmp;
+ char fullpath[PATH_MAX+1];
+ struct lookup resolve;
+
+ lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+ resolve.l_vmnt_lock = VMNT_WRITE;
+ resolve.l_vnode_lock = VNODE_READ;
+
+ /* Adjust for case of 'timep' being NULL;
+ * utime_strlen then holds the actual size: strlen(name)+1 */
+ len = m_in.utime_length;
+ if (len == 0) len = m_in.utime_strlen;
+
+ /* Temporarily open the file */
+ if (fetch_name(m_in.utime_file, len, M1, fullpath) != OK) return(err_code);
+ if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
+
+ /* Only the owner of a file or the super user can change its name. */
+ r = OK;
+ if (vp->v_uid != fp->fp_effuid && fp->fp_effuid != SU_UID) r = EPERM;
+ if (m_in.utime_length == 0 && r != OK) r = forbidden(vp, W_BIT);
+ if (read_only(vp) != OK) r = EROFS; /* Not even su can touch if R/O */
+ if (r == OK) {
+ /* Issue request */
+ if(m_in.utime_length == 0) {
+ actime = modtime = clock_time();
+ } else {
+ actime = m_in.utime_actime;
+ modtime = m_in.utime_modtime;
+ }
+ r = req_utime(vp->v_fs_e, vp->v_inode_nr, actime, modtime);
+ }
+
+ unlock_vnode(vp);
+ unlock_vmnt(vmp);
+
+ put_vnode(vp);
+ return(r);
+}
--- /dev/null
+/* This file contains the implementation of the three-level-lock. */
+
+#include "fs.h"
+#include "glo.h"
+#include "tll.h"
+#include "threads.h"
+#include <assert.h>
+
+FORWARD _PROTOTYPE( int tll_append, (tll_t *tllp, tll_access_t locktype));
+
+PRIVATE int tll_append(tll_t *tllp, tll_access_t locktype)
+{
+ struct worker_thread *queue;
+
+ assert(self != NULL);
+ assert(tllp != NULL);
+ assert(locktype != TLL_NONE);
+
+ /* Read-only and write-only requests go to the write queue. Read-serialized
+ * requests go to the serial queue. Then we wait for an event to signal it's
+ * our turn to go. */
+ queue = NULL;
+ if (locktype == TLL_READ || locktype == TLL_WRITE) {
+ if (tllp->t_write == NULL)
+ tllp->t_write = self;
+ else
+ queue = tllp->t_write;
+ } else {
+ if (tllp->t_serial == NULL)
+ tllp->t_serial = self;
+ else
+ queue = tllp->t_serial;
+ }
+
+ if (queue != NULL) { /* Traverse to end of queue */
+ while (queue->w_next != NULL) queue = queue->w_next;
+ queue->w_next = self;
+ }
+ self->w_next = NULL; /* End of queue */
+
+ /* Now wait for the event it's our turn */
+ worker_wait();
+
+ tllp->t_current = locktype;
+ tllp->t_status &= ~TLL_PEND;
+ tllp->t_owner = self;
+
+ if (tllp->t_current == TLL_READ) {
+ tllp->t_readonly++;
+ tllp->t_owner = NULL;
+ }
+
+ if (verbose) {
+ printf("got lock on tllp=%p with type %d (self=%p)\n", tllp,
+ locktype, self);
+ }
+
+ /* Due to the way upgrading and downgrading works, read-only requests are
+ * scheduled to run after a downgraded lock is released (because they are
+ * queued on the write-only queue which has priority). This results from the
+ * fact that the downgrade operation cannot know whether the next locktype on
+ * the write-only queue is really write-only or actually read-only. However,
+ * that means that read-serialized requests stay queued, while they could run
+ * simultaneously with read-only requests. See if there are any and grant
+ * the head request access */
+ if (tllp->t_current == TLL_READ && tllp->t_serial != NULL) {
+ tllp->t_owner = tllp->t_serial;
+ tllp->t_serial = tllp->t_serial->w_next;
+ tllp->t_owner->w_next = NULL;
+ assert(!(tllp->t_status & TLL_PEND));
+ tllp->t_status |= TLL_PEND;
+ worker_signal(tllp->t_owner);
+ }
+
+ return(OK);
+}
+
+PUBLIC void tll_downgrade(tll_t *tllp)
+{
+/* Downgrade three-level-lock tll from write-only to read-serialized, or from
+ * read-serialized to read-only. Caveat: as we can't know whether the next
+ * lock type on the write queue is actually read-only or write-only, we can't
+ * grant access to that type. It will be granted access once we unlock. Also,
+ * because we apply write-bias, we can't grant access to read-serialized
+ * either, unless nothing is queued on the write-only stack. */
+
+ assert(self != NULL);
+ assert(tllp != NULL);
+ assert(tllp->t_owner == self);
+
+ switch(tllp->t_current) {
+ case TLL_WRITE: tllp->t_current = TLL_READSER; break;
+ case TLL_READSER:
+ /* If nothing is queued on write-only, but there is a pending lock
+ * requesting read-serialized, grant it and keep the lock type. */
+ if (tllp->t_write == NULL && tllp->t_serial != NULL) {
+ tllp->t_owner = tllp->t_serial;
+ tllp->t_serial = tllp->t_serial->w_next; /* Remove head */
+ tllp->t_owner->w_next = NULL;
+ assert(!(tllp->t_status & TLL_PEND));
+ tllp->t_status |= TLL_PEND;
+ worker_signal(tllp->t_owner);
+ } else {
+ tllp->t_current = TLL_READ;
+ tllp->t_owner = NULL;
+ }
+ tllp->t_readonly++; /* Either way, there's one more read-only lock */
+ break;
+ default: panic("VFS: Incorrect lock state");
+ }
+}
+
+PUBLIC void tll_init(tll_t *tllp)
+{
+/* Initialize three-level-lock tll */
+ assert(tllp != NULL);
+
+ tllp->t_current = TLL_NONE;
+ tllp->t_readonly = 0;
+ tllp->t_status = TLL_DFLT;
+ tllp->t_write = NULL;
+ tllp->t_serial = NULL;
+ tllp->t_owner = NULL;
+}
+
+PUBLIC int tll_islocked(tll_t *tllp)
+{
+ return(tllp->t_current != TLL_NONE);
+}
+
+PUBLIC int tll_locked_by_me(tll_t *tllp)
+{
+ assert(self != NULL);
+ return(tllp->t_owner == self && !(tllp->t_status & TLL_PEND));
+}
+
+PUBLIC int tll_lock(tll_t *tllp, tll_access_t locktype)
+{
+/* Try to lock three-level-lock tll with type locktype */
+
+ assert(self != NULL);
+ assert(tllp != NULL);
+ assert(locktype != TLL_NONE);
+
+ self->w_next = NULL;
+
+ if (locktype != TLL_READ && locktype != TLL_READSER && locktype != TLL_WRITE)
+ panic("Invalid lock type %d\n", locktype);
+
+ /* If this locking has pending locks, we wait */
+ if (tllp->t_status & TLL_PEND)
+ return tll_append(tllp, locktype);
+
+ /* If we already own this lock don't lock it again and return immediately */
+ if (tllp->t_owner == self) {
+ assert(tllp->t_status == TLL_DFLT);
+ return(EBUSY);
+ }
+
+ /* If this lock is not accessed by anyone, locktype is granted off the bat */
+ if (tllp->t_current == TLL_NONE) {
+ tllp->t_current = locktype;
+ if (tllp->t_current == TLL_READ)
+ tllp->t_readonly = 1;
+ else { /* Record owner if locktype is read-serialized or write-only */
+ tllp->t_owner = self;
+ }
+ return(OK);
+ }
+
+ /* If the current lock is write-only, we have to wait for that lock to be
+ * released (regardless of the value of locktype). */
+ if (tllp->t_current == TLL_WRITE)
+ return tll_append(tllp, locktype);
+
+ /* However, if it's not and we're requesting a write-only lock, we have to
+ * wait until the last read access is released (additional read requests
+ * after this write-only requests are to be queued) */
+ if (locktype == TLL_WRITE)
+ return tll_append(tllp, locktype);
+
+ /* We have to queue read and read-serialized requests if we have a write-only
+ * request queued ("write bias") or when a read-serialized lock is trying to
+ * upgrade to write-only. The current lock for this tll is either read or
+ * read-serialized. */
+ if (tllp->t_write != NULL || (tllp->t_status & TLL_UPGR))
+ return tll_append(tllp, locktype);
+
+ /* If this lock is in read-serialized mode, we can allow read requests and
+ * queue read-serialized requests */
+ if (tllp->t_current == TLL_READSER) {
+ if (locktype == TLL_READ) {
+ tllp->t_readonly++;
+ return(OK);
+ } else
+ return tll_append(tllp, locktype);
+ }
+
+ /* Finally, if the current lock is read-only, we can change it to
+ * read-serialized if necessary without a problem. */
+ tllp->t_current = locktype; /* Either read-only or read-serialized */
+ if (tllp->t_current == TLL_READ) { /* We now have an additional reader */
+ tllp->t_readonly++;
+ tllp->t_owner = NULL;
+ } else {
+ assert(tllp->t_current != TLL_WRITE);
+ tllp->t_owner = self; /* We now have a new owner */
+ self->w_next = NULL;
+ }
+
+ return(OK);
+}
+
+PUBLIC int tll_haspendinglock(tll_t *tllp)
+{
+/* Is someone trying to obtain a lock? */
+ assert(tllp != NULL);
+
+ /* Someone is trying to obtain a lock if either the write/read-only queue or
+ * the read-serialized queue is not empty. */
+ return(tllp->t_write != NULL || tllp->t_serial != NULL);
+}
+
+PUBLIC int tll_unlock(tll_t *tllp)
+{
+/* Unlock a previously locked three-level-lock tll */
+ int signal_owner = 0;
+
+ assert(self != NULL);
+ assert(tllp != NULL);
+
+ if (tllp->t_owner == NULL || tllp->t_owner != self) {
+ /* This unlock must have been done by a read-only lock */
+ tllp->t_readonly--;
+ assert(tllp->t_readonly >= 0);
+
+ /* If a read-serialized lock is trying to upgrade and there are no more
+ * read-only locks, the lock can now be upgraded to write-only */
+ if ((tllp->t_status & TLL_UPGR) && tllp->t_readonly == 0)
+ signal_owner = 1;
+ }
+
+ if(tllp->t_owner == self || (tllp->t_owner == NULL && tllp->t_readonly == 0)){
+ /* Let another read-serialized or write-only request obtain access.
+ * Write-only has priority, but only after the last read-only access
+ * has left. Read-serialized access will only be granted if there is
+ * no pending write-only access request. */
+ struct worker_thread *new_owner;
+ new_owner = NULL;
+ tllp->t_owner = NULL; /* Remove owner of lock */
+
+ if (tllp->t_write != NULL) {
+ if (tllp->t_readonly == 0) {
+ new_owner = tllp->t_write;
+ tllp->t_write = tllp->t_write->w_next;
+ }
+ } else if (tllp->t_serial != NULL) {
+ new_owner = tllp->t_serial;
+ tllp->t_serial = tllp->t_serial->w_next;
+ }
+
+ /* New owner is head of queue or NULL if no proc is available */
+ if (new_owner != NULL) {
+ tllp->t_owner = new_owner;
+ tllp->t_owner->w_next = NULL;
+ assert(tllp->t_owner != self);
+ signal_owner = 1;
+ }
+ }
+
+ /* If no one is using this lock, mark it as not in use */
+ if (tllp->t_owner == NULL && tllp->t_readonly == 0)
+ tllp->t_current = TLL_NONE;
+
+ if (tllp->t_current == TLL_NONE || tllp->t_current == TLL_READ) {
+ if (!signal_owner) {
+ tllp->t_owner = NULL;
+ }
+ }
+
+ /* If we have a new owner or the current owner managed to upgrade its lock,
+ * tell it to start/continue running */
+ if (signal_owner) {
+ assert(!(tllp->t_status & TLL_PEND));
+ tllp->t_status |= TLL_PEND;
+ worker_signal(tllp->t_owner);
+ }
+
+ return(OK);
+}
+
+PUBLIC void tll_upgrade(tll_t *tllp)
+{
+/* Upgrade three-level-lock tll from read-serialized to write-only */
+
+ assert(self != NULL);
+ assert(tllp != NULL);
+ assert(tllp->t_owner == self);
+ assert(tllp->t_current != TLL_READ); /* i.e., read-serialized or write-only*/
+ if (tllp->t_current == TLL_WRITE) return; /* Nothing to do */
+ if (tllp->t_readonly != 0) { /* Wait for readers to leave */
+ assert(!(tllp->t_status & TLL_UPGR));
+ tllp->t_status |= TLL_UPGR;
+ worker_wait();
+ tllp->t_status &= ~TLL_UPGR;
+ tllp->t_status &= ~TLL_PEND;
+ assert(tllp->t_readonly == 0);
+ }
+ tllp->t_current = TLL_WRITE;
+}
--- /dev/null
+#ifndef __VFS_TLL_H__
+#define __VFS_TLL_H__
+
+/* Three-level-lock. Allows read-only, read-serialized, and write-only locks */
+
+typedef enum { TLL_NONE, TLL_READ, TLL_READSER, TLL_WRITE } tll_access_t;
+typedef enum { TLL_DFLT = 0x0, TLL_UPGR = 0x1, TLL_PEND = 0x2 } tll_status_t;
+
+typedef struct {
+ tll_access_t t_current; /* Current type of access to lock */
+ struct worker_thread *t_owner;/* Owner of non-read-only lock */
+ signed int t_readonly; /* No. of current read-only access */
+ tll_status_t t_status; /* Lock status; nothing, pending upgrade, or
+ * pending upgrade of read-serialized to
+ * write-only */
+ struct worker_thread *t_write;/* Write/read-only access requestors queue */
+ struct worker_thread *t_serial;/* Read-serialized access requestors queue */
+} tll_t;
+
+#endif
--- /dev/null
+/* This file contains a few general purpose utility routines.
+ *
+ * The entry points into this file are
+ * clock_time: ask the clock task for the real time
+ * copy: copy a block of data
+ * fetch_name: go get a path name from user space
+ * no_sys: reject a system call that FS does not handle
+ * panic: something awful has occurred; MINIX cannot continue
+ * conv2: do byte swapping on a 16-bit int
+ * conv4: do byte swapping on a 32-bit long
+ * in_group: determines if group 'grp' is in rfp->fp_sgroups[]
+ */
+
+#include "fs.h"
+#include <minix/com.h>
+#include <minix/endpoint.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "file.h"
+#include "fproc.h"
+#include "param.h"
+#include "vmnt.h"
+
+/*===========================================================================*
+ * fetch_name *
+ *===========================================================================*/
+PUBLIC int fetch_name(path, len, flag, dest)
+char *path; /* pointer to the path in user space */
+int len; /* path length, including 0 byte */
+int flag; /* M3 means path may be in message */
+char *dest; /* pointer to where path is to be stored */
+{
+/* Go get path and put it in 'user_fullpath'.
+ * If 'flag' = M3 and 'len' <= M3_STRING, the path is present in 'message'.
+ * If it is not, go copy it from user space.
+ */
+ register char *rpu, *rpm;
+ int r, count;
+
+ if (len > PATH_MAX) {
+ err_code = ENAMETOOLONG;
+ return(EGENERIC);
+ }
+
+ /* Check name length for validity. */
+ if (len <= 0) {
+ err_code = EINVAL;
+ return(EGENERIC);
+ }
+
+ if (flag == M3 && len <= M3_STRING) {
+ /* Just copy the path from the message to 'user_fullpath'. */
+ rpu = &dest[0];
+ rpm = m_in.pathname; /* contained in input message */
+ count = len;
+ do { *rpu++ = *rpm++; } while (--count);
+ r = OK;
+ } else {
+ /* String is not contained in the message. Get it from user space. */
+ r = sys_datacopy(who_e, (vir_bytes) path,
+ VFS_PROC_NR, (vir_bytes) dest, (phys_bytes) len);
+ }
+
+ if (dest[len - 1] != '\0') {
+ err_code = ENAMETOOLONG;
+ return(EGENERIC);
+ }
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * no_sys *
+ *===========================================================================*/
+PUBLIC int no_sys()
+{
+/* Somebody has used an illegal system call number */
+ return(ENOSYS);
+}
+
+
+/*===========================================================================*
+ * isokendpt_f *
+ *===========================================================================*/
+PUBLIC int isokendpt_f(char *file, int line, endpoint_t endpoint, int *proc, int fatal)
+{
+ int failed = 0;
+ endpoint_t ke;
+ *proc = _ENDPOINT_P(endpoint);
+ if (endpoint == NONE) {
+ printf("VFS %s:%d: endpoint is NONE\n", file, line);
+ failed = 1;
+ } else if (*proc < 0 || *proc >= NR_PROCS) {
+ printf("VFS %s:%d: proc (%d) from endpoint (%d) out of range\n",
+ file, line, *proc, endpoint);
+ failed = 1;
+ } else if ((ke = fproc[*proc].fp_endpoint) != endpoint) {
+ if(ke == NONE) {
+ printf("VFS %s:%d: endpoint (%d) points to NONE slot (%d)\n",
+ file, line, endpoint, *proc);
+ assert(fproc[*proc].fp_pid == PID_FREE);
+ } else {
+ printf("VFS %s:%d: proc (%d) from endpoint (%d) doesn't match "
+ "known endpoint (%d)\n", file, line, *proc, endpoint,
+ fproc[*proc].fp_endpoint);
+ assert(fproc[*proc].fp_pid != PID_FREE);
+ }
+ failed = 1;
+ }
+
+ if(failed && fatal)
+ panic("isokendpt_f failed");
+
+ return(failed ? EDEADEPT : OK);
+}
+
+
+/*===========================================================================*
+ * clock_time *
+ *===========================================================================*/
+PUBLIC time_t clock_time()
+{
+/* This routine returns the time in seconds since 1.1.1970. MINIX is an
+ * astrophysically naive system that assumes the earth rotates at a constant
+ * rate and that such things as leap seconds do not exist.
+ */
+
+ register int r;
+ clock_t uptime;
+ time_t boottime;
+
+ r = getuptime2(&uptime, &boottime);
+ if (r != OK)
+ panic("clock_time err: %d", r);
+
+ return( (time_t) (boottime + (uptime/system_hz)));
+}
+
+/*===========================================================================*
+ * in_group *
+ *===========================================================================*/
+PUBLIC int in_group(struct fproc *rfp, gid_t grp)
+{
+ int i;
+
+ for (i = 0; i < rfp->fp_ngroups; i++)
+ if (rfp->fp_sgroups[i] == grp)
+ return(OK);
+
+ return(EINVAL);
+}
--- /dev/null
+/* Virtual mount table related routines.
+ *
+ */
+
+#include "fs.h"
+#include "threads.h"
+#include "vmnt.h"
+#include <assert.h>
+#include "fproc.h"
+
+FORWARD _PROTOTYPE( int is_vmnt_locked, (struct vmnt *vmp) );
+
+/* Is vmp pointer reasonable? */
+#define SANEVMP(v) ((((v) >= &vmnt[0] && (v) < &vmnt[NR_MNTS])))
+#define BADVMP(v, f, l) printf("%s:%d: bad vmp %p\n", f, l, v)
+/* vp check that panics */
+#define ASSERTVMP(v) if(!SANEVMP(v)) { \
+ BADVMP(v, __FILE__, __LINE__); panic("bad vmp"); }
+
+#if LOCK_DEBUG
+/*===========================================================================*
+ * check_vmnt_locks_by_me *
+ *===========================================================================*/
+PUBLIC void check_vmnt_locks_by_me(struct fproc *rfp)
+{
+/* Check whether this thread still has locks held on vmnts */
+ struct vmnt *vmp;
+
+ for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; vmp++) {
+ if (tll_locked_by_me(&vmp->m_lock))
+ panic("Thread %d still holds vmnt lock on vmp %p call_nr=%d\n",
+ mthread_self(), vmp, call_nr);
+ }
+
+ if (rfp->fp_vmnt_rdlocks != 0)
+ panic("Thread %d still holds read locks on a vmnt (%d) call_nr=%d\n",
+ mthread_self(), rfp->fp_vmnt_rdlocks, call_nr);
+}
+#endif
+
+/*===========================================================================*
+ * check_vmnt_locks *
+ *===========================================================================*/
+PUBLIC void check_vmnt_locks()
+{
+ struct vmnt *vmp;
+ int count = 0;
+
+ for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; vmp++)
+ if (is_vmnt_locked(vmp)) {
+ count++;
+ printf("vmnt %p is %s, fs_e=%d dev=%d\n", vmp, (tll_islocked(&vmp->m_lock) ? "locked":"pending locked"), vmp->m_fs_e, vmp->m_dev);
+ }
+
+ if (count) panic("%d locked vmnts\n", count);
+#if 0
+ printf("check_vmnt_locks OK\n");
+#endif
+}
+
+/*===========================================================================*
+ * get_free_vmnt *
+ *===========================================================================*/
+PUBLIC struct vmnt *get_free_vmnt(void)
+{
+ struct vmnt *vp;
+
+ for (vp = &vmnt[0]; vp < &vmnt[NR_MNTS]; ++vp)
+ if (vp->m_dev == NO_DEV) return(vp);
+
+ return(NULL);
+}
+
+/*===========================================================================*
+ * find_vmnt *
+ *===========================================================================*/
+PUBLIC struct vmnt *find_vmnt(endpoint_t fs_e)
+{
+/* Find the vmnt belonging to an FS with endpoint 'fs_e' iff it's in use */
+ struct vmnt *vp;
+
+ for (vp = &vmnt[0]; vp < &vmnt[NR_MNTS]; ++vp)
+ if (vp->m_fs_e == fs_e && vp->m_dev != NO_DEV)
+ return(vp);
+
+ return(NULL);
+}
+
+/*===========================================================================*
+ * init_vmnts *
+ *===========================================================================*/
+PUBLIC void init_vmnts(void)
+{
+/* Initialize vmnt table */
+ struct vmnt *vp;
+
+ for (vp = &vmnt[0]; vp < &vmnt[NR_MNTS]; vp++) {
+ vp->m_fs_e = NONE;
+ vp->m_dev = NO_DEV;
+ vp->m_flags = 0;
+ vp->m_mounted_on = NULL;
+ vp->m_root_node = NULL;
+ vp->m_label[0] = '\0';
+ vp->m_comm.c_max_reqs = 1;
+ vp->m_comm.c_cur_reqs = 0;
+ vp->m_comm.c_req_queue = NULL;
+ tll_init(&vp->m_lock);
+ }
+}
+
+/*===========================================================================*
+ * is_vmnt_locked *
+ *===========================================================================*/
+PRIVATE int is_vmnt_locked(struct vmnt *vmp)
+{
+ ASSERTVMP(vmp);
+ return(tll_islocked(&vmp->m_lock) || tll_haspendinglock(&vmp->m_lock));
+}
+
+/*===========================================================================*
+ * lock_vmnt *
+ *===========================================================================*/
+PUBLIC int lock_vmnt(struct vmnt *vmp, tll_access_t locktype)
+{
+ int r;
+ tll_access_t initial_locktype;
+
+ ASSERTVMP(vmp);
+
+ initial_locktype = (locktype == VMNT_EXCL) ? VMNT_WRITE : locktype;
+
+ r = tll_lock(&vmp->m_lock, initial_locktype);
+
+ if (r == EBUSY) return(r);
+
+ if (initial_locktype != locktype) {
+ tll_upgrade(&vmp->m_lock);
+ }
+
+#if LOCK_DEBUG
+ if (locktype == VMNT_READ)
+ fp->fp_vmnt_rdlocks++;
+#endif
+
+ return(OK);
+}
+
+/*===========================================================================*
+ * unlock_vmnt *
+ *===========================================================================*/
+PUBLIC void unlock_vmnt(struct vmnt *vmp)
+{
+ ASSERTVMP(vmp);
+
+#if LOCK_DEBUG
+ /* Decrease read-only lock counter when not locked as VMNT_WRITE or
+ * VMNT_EXCL */
+ if (!tll_locked_by_me(&vmp->m_lock))
+ fp->fp_vmnt_rdlocks--;
+#endif
+
+ tll_unlock(&vmp->m_lock);
+
+#if LOCK_DEBUG
+ assert(!tll_locked_by_me(&vmp->m_lock));
+#endif
+
+}
--- /dev/null
+#ifndef __VFS_VMNT_H__
+#define __VFS_VMNT_H__
+
+EXTERN struct vmnt {
+ int m_fs_e; /* FS process' kernel endpoint */
+ tll_t m_lock;
+ comm_t m_comm;
+ dev_t m_dev; /* device number */
+ unsigned int m_flags; /* mount flags */
+ struct vnode *m_mounted_on; /* vnode on which the partition is mounted */
+ struct vnode *m_root_node; /* root vnode */
+ char m_label[LABEL_MAX]; /* label of the file system process */
+} vmnt[NR_MNTS];
+
+/* vmnt flags */
+#define VMNT_READONLY 01 /* Device mounted readonly */
+#define VMNT_BACKCALL 02 /* FS did back call */
+
+/* vmnt lock types mapping */
+#define VMNT_READ TLL_READ
+#define VMNT_WRITE TLL_READSER
+#define VMNT_EXCL TLL_WRITE
+
+#endif
--- /dev/null
+/* This file contains the routines related to vnodes.
+ * The entry points are:
+ *
+ * get_vnode - increase counter and get details of an inode
+ * get_free_vnode - get a pointer to a free vnode obj
+ * find_vnode - find a vnode according to the FS endpoint and the inode num.
+ * dup_vnode - duplicate vnode (i.e. increase counter)
+ * put_vnode - drop vnode (i.e. decrease counter)
+ */
+
+#include "fs.h"
+#include "threads.h"
+#include "vnode.h"
+#include "vmnt.h"
+#include "fproc.h"
+#include "file.h"
+#include <minix/vfsif.h>
+#include <assert.h>
+
+/* Is vnode pointer reasonable? */
+#if NDEBUG
+#define SANEVP(v)
+#define CHECKVN(v)
+#define ASSERTVP(v)
+#else
+#define SANEVP(v) ((((v) >= &vnode[0] && (v) < &vnode[NR_VNODES])))
+
+#define BADVP(v, f, l) printf("%s:%d: bad vp %p\n", f, l, v)
+
+/* vp check that returns 0 for use in check_vrefs() */
+#define CHECKVN(v) if(!SANEVP(v)) { \
+ BADVP(v, __FILE__, __LINE__); \
+ return 0; \
+}
+
+/* vp check that panics */
+#define ASSERTVP(v) if(!SANEVP(v)) { \
+ BADVP(v, __FILE__, __LINE__); panic("bad vp"); }
+#endif
+
+#if LOCK_DEBUG
+/*===========================================================================*
+ * check_vnode_locks_by_me *
+ *===========================================================================*/
+PUBLIC void check_vnode_locks_by_me(struct fproc *rfp)
+{
+/* Check whether this thread still has locks held on vnodes */
+ struct vnode *vp;
+
+ for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; vp++) {
+ if (tll_locked_by_me(&vp->v_lock)) {
+ panic("Thread %d still holds vnode lock on vp %x call_nr=%d\n",
+ mthread_self(), vp, call_nr);
+ }
+ }
+
+ if (rfp->fp_vp_rdlocks != 0)
+ panic("Thread %d still holds read locks on a vnode (%d) call_nr=%d\n",
+ mthread_self(), rfp->fp_vp_rdlocks, call_nr);
+}
+#endif
+
+/*===========================================================================*
+ * check_vnode_locks *
+ *===========================================================================*/
+PUBLIC void check_vnode_locks()
+{
+ struct vnode *vp;
+ int count = 0;
+
+ for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; vp++)
+ if (is_vnode_locked(vp)) {
+ count++;
+ }
+
+ if (count) panic("%d locked vnodes\n", count);
+#if 0
+ printf("check_vnode_locks OK\n");
+#endif
+}
+
+/*===========================================================================*
+ * get_free_vnode *
+ *===========================================================================*/
+PUBLIC struct vnode *get_free_vnode()
+{
+/* Find a free vnode slot in the vnode table (it's not actually allocated) */
+ struct vnode *vp;
+
+ for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp) {
+ if (vp->v_ref_count == 0 && !is_vnode_locked(vp)) {
+ vp->v_pipe = NO_PIPE;
+ vp->v_uid = -1;
+ vp->v_gid = -1;
+ vp->v_sdev = NO_DEV;
+ vp->v_mapfs_e = NONE;
+ vp->v_mapfs_count = 0;
+ vp->v_mapinode_nr = 0;
+ return(vp);
+ }
+ }
+
+ err_code = ENFILE;
+ return(NULL);
+}
+
+
+/*===========================================================================*
+ * find_vnode *
+ *===========================================================================*/
+PUBLIC struct vnode *find_vnode(int fs_e, int ino)
+{
+/* Find a specified (FS endpoint and inode number) vnode in the
+ * vnode table */
+ struct vnode *vp;
+
+ for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp)
+ if (vp->v_ref_count > 0 && vp->v_inode_nr == ino && vp->v_fs_e == fs_e)
+ return(vp);
+
+ return(NULL);
+}
+
+/*===========================================================================*
+ * is_vnode_locked *
+ *===========================================================================*/
+PUBLIC int is_vnode_locked(struct vnode *vp)
+{
+/* Find out whether a thread holds a lock on this vnode or is trying to obtain
+ * a lock. */
+ ASSERTVP(vp);
+
+ return(tll_islocked(&vp->v_lock) || tll_haspendinglock(&vp->v_lock));
+}
+
+/*===========================================================================*
+ * init_vnodes *
+ *===========================================================================*/
+PUBLIC void init_vnodes(void)
+{
+ struct vnode *vp;
+
+ for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp) {
+ vp->v_fs_e = NONE;
+ vp->v_mapfs_e = NONE;
+ vp->v_inode_nr = 0;
+ vp->v_ref_count = 0;
+ vp->v_fs_count = 0;
+ vp->v_mapfs_count = 0;
+ tll_init(&vp->v_lock);
+ }
+}
+
+/*===========================================================================*
+ * lock_vnode *
+ *===========================================================================*/
+PUBLIC int lock_vnode(struct vnode *vp, tll_access_t locktype)
+{
+ int r;
+
+ ASSERTVP(vp);
+
+ r = tll_lock(&vp->v_lock, locktype);
+
+#if LOCK_DEBUG
+ if (locktype == VNODE_READ) {
+ fp->fp_vp_rdlocks++;
+ }
+#endif
+
+ if (r == EBUSY) return(r);
+ return(OK);
+}
+
+/*===========================================================================*
+ * unlock_vnode *
+ *===========================================================================*/
+PUBLIC void unlock_vnode(struct vnode *vp)
+{
+ int i;
+ register struct vnode *rvp;
+ struct worker_thread *w;
+ ASSERTVP(vp);
+
+#if LOCK_DEBUG
+ /* Decrease read-only lock counter when not locked as VNODE_OPCL or
+ * VNODE_WRITE */
+ if (!tll_locked_by_me(&vp->v_lock)) {
+ fp->fp_vp_rdlocks--;
+ }
+
+ for (i = 0; i < NR_VNODES; i++) {
+ rvp = &vnode[i];
+
+ w = rvp->v_lock.t_write;
+ assert(w != self);
+ while (w && w->w_next != NULL) {
+ w = w->w_next;
+ assert(w != self);
+ }
+
+ w = rvp->v_lock.t_serial;
+ assert(w != self);
+ while (w && w->w_next != NULL) {
+ w = w->w_next;
+ assert(w != self);
+ }
+ }
+#endif
+
+ tll_unlock(&vp->v_lock);
+}
+
+/*===========================================================================*
+ * dup_vnode *
+ *===========================================================================*/
+PUBLIC void dup_vnode(struct vnode *vp)
+{
+/* dup_vnode() is called to increment the vnode and therefore the
+ * referred inode's counter.
+ */
+ ASSERTVP(vp);
+ vp->v_ref_count++;
+}
+
+
+/*===========================================================================*
+ * put_vnode *
+ *===========================================================================*/
+PUBLIC void put_vnode(struct vnode *vp)
+{
+/* Decrease vnode's usage counter and decrease inode's usage counter in the
+ * corresponding FS process. Decreasing the fs_count each time we decrease the
+ * ref count would lead to poor performance. Instead, only decrease fs_count
+ * when the ref count hits zero. However, this could lead to fs_count to wrap.
+ * To prevent this, we drop the counter to 1 when the counter hits 256.
+ * We maintain fs_count as a sanity check to make sure VFS and the FS are in
+ * sync.
+ */
+ int r, lock_vp;
+
+ ASSERTVP(vp);
+
+ /* Lock vnode. It's quite possible this thread already has a lock on this
+ * vnode. That's no problem, because the reference counter will not decrease
+ * to zero in that case. However, if the counter does decrease to zero *and*
+ * is already locked, we have a consistency problem somewhere. */
+ lock_vp = lock_vnode(vp, VNODE_OPCL);
+
+ if (vp->v_ref_count > 1) {
+ /* Decrease counter */
+ vp->v_ref_count--;
+ if (vp->v_fs_count > 256)
+ vnode_clean_refs(vp);
+ if (lock_vp != EBUSY) unlock_vnode(vp);
+ return;
+ }
+
+ /* If we already had a lock, there is a consistency problem */
+ assert(lock_vp != EBUSY);
+ tll_upgrade(&vp->v_lock); /* Make sure nobody else accesses this vnode */
+
+ /* A vnode that's not in use can't be put back. */
+ if (vp->v_ref_count <= 0)
+ panic("put_vnode failed: bad v_ref_count %d\n", vp->v_ref_count);
+
+ /* fs_count should indicate that the file is in use. */
+ if (vp->v_fs_count <= 0)
+ panic("put_vnode failed: bad v_fs_count %d\n", vp->v_fs_count);
+
+ /* Tell FS we don't need this inode to be open anymore. */
+ r = req_putnode(vp->v_fs_e, vp->v_inode_nr, vp->v_fs_count);
+
+ if (r != OK) {
+ printf("VFS: putnode failed: %d\n", r);
+ util_stacktrace();
+ }
+
+ /* This inode could've been mapped. If so, tell mapped FS to close it as
+ * well. If mapped onto same FS, this putnode is not needed. */
+ if (vp->v_mapfs_e != NONE && vp->v_mapfs_e != vp->v_fs_e)
+ req_putnode(vp->v_mapfs_e, vp->v_mapinode_nr, vp->v_mapfs_count);
+
+ vp->v_fs_count = 0;
+ vp->v_ref_count = 0;
+ vp->v_mapfs_count = 0;
+
+ unlock_vnode(vp);
+}
+
+
+/*===========================================================================*
+ * vnode_clean_refs *
+ *===========================================================================*/
+PUBLIC void vnode_clean_refs(struct vnode *vp)
+{
+/* Tell the underlying FS to drop all reference but one. */
+
+ if (vp == NULL) return;
+ if (vp->v_fs_count <= 1) return; /* Nothing to do */
+
+ /* Drop all references except one */
+ req_putnode(vp->v_fs_e, vp->v_inode_nr, vp->v_fs_count - 1);
+ vp->v_fs_count = 1;
+}
+
+
+#define REFVP(v) { vp = (v); CHECKVN(v); vp->v_ref_check++; }
+
+#if DO_SANITYCHECKS
+/*===========================================================================*
+ * check_vrefs *
+ *===========================================================================*/
+PUBLIC int check_vrefs()
+{
+ int i, bad;
+ int ispipe_flag, ispipe_mode;
+ struct vnode *vp;
+ struct vmnt *vmp;
+ struct fproc *rfp;
+ struct filp *f;
+
+ /* Clear v_ref_check */
+ for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp)
+ vp->v_ref_check= 0;
+
+ /* Count reference for processes */
+ for (rfp=&fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+ if (rfp->fp_pid == PID_FREE)
+ continue;
+ if(rfp->fp_rd) REFVP(rfp->fp_rd);
+ if(rfp->fp_wd) REFVP(rfp->fp_wd);
+ }
+
+ /* Count references from filedescriptors */
+ for (f = &filp[0]; f < &filp[NR_FILPS]; f++)
+ {
+ if (f->filp_count == 0)
+ continue;
+ REFVP(f->filp_vno);
+ }
+
+ /* Count references to mount points */
+ for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp)
+ {
+ if (vmp->m_dev == NO_DEV)
+ continue;
+ REFVP(vmp->m_root_node);
+ if(vmp->m_mounted_on)
+ REFVP(vmp->m_mounted_on);
+ }
+
+ /* Check references */
+ bad= 0;
+ for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp)
+ {
+ if (vp->v_ref_count != vp->v_ref_check)
+ {
+ printf(
+"Bad reference count for inode %d on device 0x%x: found %d, listed %d\n",
+ vp->v_inode_nr, vp->v_dev, vp->v_ref_check,
+ vp->v_ref_count);
+ printf("last marked at %s, %d\n",
+ vp->v_file, vp->v_line);
+ bad= 1;
+ }
+
+ /* Also check v_pipe */
+ if (vp->v_ref_count != 0)
+ {
+ ispipe_flag= (vp->v_pipe == I_PIPE);
+ ispipe_mode= ((vp->v_mode & I_TYPE) == I_NAMED_PIPE);
+ if (ispipe_flag != ispipe_mode)
+ {
+ printf(
+"Bad v_pipe for inode %d on device 0x%x: found %d, mode 0%o\n",
+ vp->v_inode_nr, vp->v_dev, vp->v_pipe,
+ vp->v_mode);
+ printf("last marked at %s, %d\n",
+ vp->v_file, vp->v_line);
+ bad= 1;
+ }
+ }
+ }
+ return !bad;
+}
+#endif
--- /dev/null
+#ifndef __VFS_VNODE_H__
+#define __VFS_VNODE_H__
+
+EXTERN struct vnode {
+ endpoint_t v_fs_e; /* FS process' endpoint number */
+ endpoint_t v_mapfs_e; /* mapped FS process' endpoint number */
+ ino_t v_inode_nr; /* inode number on its (minor) device */
+ ino_t v_mapinode_nr; /* mapped inode number of mapped FS. */
+ mode_t v_mode; /* file type, protection, etc. */
+ uid_t v_uid; /* uid of inode. */
+ gid_t v_gid; /* gid of inode. */
+ off_t v_size; /* current file size in bytes */
+ int v_ref_count; /* # times vnode used; 0 means slot is free */
+ int v_fs_count; /* # reference at the underlying FS */
+ int v_mapfs_count; /* # reference at the underlying mapped FS */
+#if 0
+ int v_ref_check; /* for consistency checks */
+#endif
+ char v_pipe; /* set to I_PIPE if pipe */
+ off_t v_pipe_rd_pos;
+ off_t v_pipe_wr_pos;
+ endpoint_t v_bfs_e; /* endpoint number for the FS proces in case
+ of a block special file */
+ dev_t v_dev; /* device number on which the corresponding
+ inode resides */
+ dev_t v_sdev; /* device number for special files */
+ struct vmnt *v_vmnt; /* vmnt object of the partition */
+ tll_t v_lock; /* three-level-lock */
+} vnode[NR_VNODES];
+
+
+/* Field values. */
+#define NO_PIPE 0 /* i_pipe is NO_PIPE if inode is not a pipe */
+#define I_PIPE 1 /* i_pipe is I_PIPE if inode is a pipe */
+
+/* vnode lock types mapping */
+#define VNODE_READ TLL_READ
+#define VNODE_OPCL TLL_READSER
+#define VNODE_WRITE TLL_WRITE
+#endif
--- /dev/null
+#include "fs.h"
+#include "glo.h"
+#include "fproc.h"
+#include "threads.h"
+#include "job.h"
+#include <assert.h>
+
+FORWARD _PROTOTYPE( void append_job, (struct job *job,
+ void *(*func)(void *arg)) );
+FORWARD _PROTOTYPE( void get_work, (struct worker_thread *worker) );
+FORWARD _PROTOTYPE( void *worker_main, (void *arg) );
+FORWARD _PROTOTYPE( void worker_sleep, (struct worker_thread *worker) );
+FORWARD _PROTOTYPE( void worker_wake, (struct worker_thread *worker) );
+PRIVATE int init = 0;
+PRIVATE mthread_attr_t tattr;
+
+#ifdef MKCOVERAGE
+# define TH_STACKSIZE (10 * 1024)
+#else
+# define TH_STACKSIZE (6 * 1024)
+#endif
+
+#define ASSERTW(w) assert((w) == &sys_worker || (w) == &dl_worker || \
+ ((w) >= &workers[0] && (w) < &workers[NR_WTHREADS]));
+
+/*===========================================================================*
+ * worker_init *
+ *===========================================================================*/
+PUBLIC void worker_init(struct worker_thread *worker)
+{
+/* Initialize worker thread */
+ if (!init) {
+ threads_init();
+ assert(mthread_attr_init(&tattr) == 0);
+ if (mthread_attr_setstacksize(&tattr, TH_STACKSIZE) != 0)
+ panic("couldn't set default thread stack size");
+ if (mthread_attr_setdetachstate(&tattr, MTHREAD_CREATE_DETACHED) != 0)
+ panic("couldn't set default thread detach state");
+ pending = 0;
+ init = 1;
+ }
+
+ ASSERTW(worker);
+
+ worker->w_job.j_func = NULL; /* Mark not in use */
+ worker->w_next = NULL;
+ assert(mutex_init(&worker->w_event_mutex, NULL) == 0);
+ assert(cond_init(&worker->w_event, NULL) == 0);
+ assert(mthread_create(&worker->w_tid, &tattr, worker_main, (void *) worker) == 0);
+ yield();
+}
+
+/*===========================================================================*
+ * get_work *
+ *===========================================================================*/
+PRIVATE void get_work(struct worker_thread *worker)
+{
+/* Find new work to do. Work can be 'queued', 'pending', or absent. In the
+ * latter case wait for new work to come in. */
+
+ struct job *new_job;
+ struct fproc *rfp;
+
+ ASSERTW(worker);
+ self = worker;
+
+ /* Do we have queued work to do? */
+ if ((new_job = worker->w_job.j_next) != NULL) {
+ worker->w_job = *new_job;
+ free(new_job);
+ return;
+ } else if (worker != &sys_worker && worker != &dl_worker && pending > 0) {
+ /* Find pending work */
+ for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+ if (rfp->fp_flags & FP_PENDING) {
+ worker->w_job = rfp->fp_job;
+ rfp->fp_job.j_func = NULL;
+ rfp->fp_flags &= ~FP_PENDING; /* No longer pending */
+ pending--;
+ assert(pending >= 0);
+ return;
+ }
+ }
+ panic("Pending work inconsistency");
+ }
+
+ /* Wait for work to come to us */
+ worker_sleep(worker);
+}
+
+/*===========================================================================*
+ * worker_available *
+ *===========================================================================*/
+PUBLIC int worker_available(void)
+{
+ int busy, i;
+
+ busy = 0;
+ for (i = 0; i < NR_WTHREADS; i++) {
+ if (workers[i].w_job.j_func != NULL)
+ busy++;
+ }
+
+ return(NR_WTHREADS - busy);
+}
+
+/*===========================================================================*
+ * worker_main *
+ *===========================================================================*/
+PRIVATE void *worker_main(void *arg)
+{
+/* Worker thread main loop */
+ struct worker_thread *me;
+
+ me = (struct worker_thread *) arg;
+ ASSERTW(me);
+
+ while(TRUE) {
+ get_work(me);
+
+ /* Register ourselves in fproc table if possible */
+ if (me->w_job.j_fp != NULL) {
+ me->w_job.j_fp->fp_wtid = me->w_tid;
+ }
+
+ /* Carry out work */
+ me->w_job.j_func(&me->w_job);
+
+ /* Mark ourselves as done */
+ me->w_job.j_func = NULL;
+ }
+
+ return(NULL); /* Unreachable */
+}
+
+/*===========================================================================*
+ * dl_worker_start *
+ *===========================================================================*/
+PUBLIC void dl_worker_start(void *(*func)(void *arg))
+{
+/* Start the deadlock resolving worker. This worker is reserved to run in case
+ * all other workers are busy and we have to have an additional worker to come
+ * to the rescue. */
+ assert(dl_worker.w_job.j_func == NULL);
+
+ if (dl_worker.w_job.j_func == NULL) {
+ dl_worker.w_job.j_fp = fp;
+ dl_worker.w_job.j_m_in = m_in;
+ dl_worker.w_job.j_func = func;
+ worker_wake(&dl_worker);
+ }
+}
+
+/*===========================================================================*
+ * sys_worker_start *
+ *===========================================================================*/
+PUBLIC void sys_worker_start(void *(*func)(void *arg))
+{
+/* Carry out work for the system (i.e., kernel or PM). If this thread is idle
+ * do it right away, else create new job and append it to the queue. */
+
+ if (sys_worker.w_job.j_func == NULL) {
+ sys_worker.w_job.j_fp = fp;
+ sys_worker.w_job.j_m_in = m_in;
+ sys_worker.w_job.j_func = func;
+ worker_wake(&sys_worker);
+ } else {
+ append_job(&sys_worker.w_job, func);
+ }
+}
+
+/*===========================================================================*
+ * append_job *
+ *===========================================================================*/
+PRIVATE void append_job(struct job *job, void *(*func)(void *arg))
+{
+/* Append a job */
+
+ struct job *new_job, *tail;
+
+ /* Create new job */
+ new_job = calloc(1, sizeof(struct job));
+ assert(new_job != NULL);
+ new_job->j_fp = fp;
+ new_job->j_m_in = m_in;
+ new_job->j_func = func;
+ new_job->j_next = NULL;
+
+ /* Append to queue */
+ tail = job;
+ while (tail->j_next != NULL) tail = tail->j_next;
+ tail->j_next = new_job;
+}
+
+/*===========================================================================*
+ * worker_start *
+ *===========================================================================*/
+PUBLIC void worker_start(void *(*func)(void *arg))
+{
+/* Find an available worker or wait for one */
+ int i;
+ struct worker_thread *worker;
+
+ worker = NULL;
+ for (i = 0; i < NR_WTHREADS; i++) {
+ if (workers[i].w_job.j_func == NULL) {
+ worker = &workers[i];
+ break;
+ }
+ }
+
+ if (worker != NULL) {
+ worker->w_job.j_fp = fp;
+ worker->w_job.j_m_in = m_in;
+ worker->w_job.j_func = func;
+ worker->w_job.j_next = NULL;
+ worker_wake(worker);
+ return;
+ }
+
+ /* No worker threads available, let's wait for one to finish. */
+ /* If this process already has a job scheduled, forget about this new
+ * job;
+ * - the new job is do_dummy and we have already scheduled an actual job
+ * - the new job is an actual job and we have already scheduled do_dummy in
+ * order to exit this proc, so doing the new job is pointless. */
+ if (fp->fp_job.j_func == NULL) {
+ assert(!(fp->fp_flags & FP_PENDING));
+ fp->fp_job.j_fp = fp;
+ fp->fp_job.j_m_in = m_in;
+ fp->fp_job.j_func = func;
+ fp->fp_job.j_next = NULL;
+ fp->fp_flags |= FP_PENDING;
+ pending++;
+ }
+}
+
+/*===========================================================================*
+ * worker_sleep *
+ *===========================================================================*/
+PRIVATE void worker_sleep(struct worker_thread *worker)
+{
+ ASSERTW(worker);
+ assert(self == worker);
+ assert(mutex_lock(&worker->w_event_mutex) == 0);
+ assert(cond_wait(&worker->w_event, &worker->w_event_mutex) == 0);
+ assert(mutex_unlock(&worker->w_event_mutex) == 0);
+ self = worker;
+}
+
+/*===========================================================================*
+ * worker_wake *
+ *===========================================================================*/
+PRIVATE void worker_wake(struct worker_thread *worker)
+{
+/* Signal a worker to wake up */
+ ASSERTW(worker);
+ assert(mutex_lock(&worker->w_event_mutex) == 0);
+ assert(cond_signal(&worker->w_event) == 0);
+ assert(mutex_unlock(&worker->w_event_mutex) == 0);
+}
+
+/*===========================================================================*
+ * worker_wait *
+ *===========================================================================*/
+PUBLIC void worker_wait(void)
+{
+ struct worker_thread *worker;
+
+ worker = worker_self();
+ worker->w_job.j_m_in = m_in; /* Store important global data */
+ assert(fp == worker->w_job.j_fp);
+ worker_sleep(worker);
+ /* We continue here after waking up */
+ fp = worker->w_job.j_fp; /* Restore global data */
+ m_in = worker->w_job.j_m_in;
+ assert(worker->w_next == NULL);
+}
+
+/*===========================================================================*
+ * worker_signal *
+ *===========================================================================*/
+PUBLIC void worker_signal(struct worker_thread *worker)
+{
+ ASSERTW(worker); /* Make sure we have a valid thread */
+ worker_wake(worker);
+}
+
+/*===========================================================================*
+ * worker_self *
+ *===========================================================================*/
+PUBLIC struct worker_thread *worker_self(void)
+{
+ struct worker_thread *worker;
+ worker = worker_get(mthread_self());
+ assert(worker != NULL);
+ return(worker);
+}
+
+/*===========================================================================*
+ * worker_get *
+ *===========================================================================*/
+PUBLIC struct worker_thread *worker_get(thread_t worker_tid)
+{
+ int i;
+ struct worker_thread *worker;
+
+ worker = NULL;
+ if (worker_tid == sys_worker.w_tid)
+ worker = &sys_worker;
+ else if (worker_tid == dl_worker.w_tid)
+ worker = &dl_worker;
+ else {
+ for (i = 0; i < NR_WTHREADS; i++) {
+ if (workers[i].w_tid == worker_tid) {
+ worker = &workers[i];
+ break;
+ }
+ }
+ }
+
+ return(worker);
+}
+
+/*===========================================================================*
+ * worker_getjob *
+ *===========================================================================*/
+PUBLIC struct job *worker_getjob(thread_t worker_tid)
+{
+ struct worker_thread *worker;
+
+ if ((worker = worker_get(worker_tid)) != NULL)
+ return(&worker->w_job);
+
+ return(NULL);
+}
--- /dev/null
+/* This file is the counterpart of "read.c". It contains the code for writing
+ * insofar as this is not contained in read_write().
+ *
+ * The entry points into this file are
+ * do_write: call read_write to perform the WRITE system call
+ */
+
+#include "fs.h"
+#include "file.h"
+
+
+/*===========================================================================*
+ * do_write *
+ *===========================================================================*/
+PUBLIC int do_write()
+{
+/* Perform the write(fd, buffer, nbytes) system call. */
+ return(read_write(WRITING));
+}
# Makefile for Information Server (IS)
+#
+
+.include <bsd.own.mk>
+
PROG= is
SRCS= main.c dmp.c dmp_kernel.c dmp_pm.c dmp_fs.c dmp_rs.c dmp_ds.c dmp_vm.c
CPPFLAGS.dmp_rs.c+= -I${MINIXSRCDIR}
CPPFLAGS.dmp_vm.c+= -I${MINIXSRCDIR}
+.if ${BUILDAVFS} == "yes"
+CFLAGS+= -D_USEAVFS
+.endif
+
.include <minix.service.mk>
#include "inc.h"
#include "../mfs/const.h"
-#include "../vfs/const.h"
-#include "../vfs/fproc.h"
-#include "../vfs/dmap.h"
+#if defined(_USEAVFS)
+# include "../avfs/const.h"
+# include "../avfs/fproc.h"
+# include "../avfs/dmap.h"
+#else
+# include "../vfs/const.h"
+# include "../vfs/fproc.h"
+# include "../vfs/dmap.h"
+#endif
#include <minix/dmap.h>
PUBLIC struct fproc fproc[NR_PROCS];
fp = &fproc[i];
if (fp->fp_pid <= 0) continue;
if (++n > 22) break;
+#if defined(_USEVFS)
printf("%3d %4d %2d/%d 0x%05x %2d (%2d) %2d (%2d) %3d %3d %3d ",
i, fp->fp_pid,
((fp->fp_tty>>MAJOR)&BYTE), ((fp->fp_tty>>MINOR)&BYTE),
fp->fp_sesldr,
fp->fp_blocked_on, !!fp->fp_revived
);
+#else
+ printf("%3d %4d %2d/%d 0x%05x %2d (%2d) %2d (%2d) %3d %3d %3d ",
+ i, fp->fp_pid,
+ major(fp->fp_tty), minor(fp->fp_tty),
+ fp->fp_umask,
+ fp->fp_realuid, fp->fp_effuid, fp->fp_realgid, fp->fp_effgid,
+ !!(fp->fp_flags & FP_SESLDR),
+ fp->fp_blocked_on, !!(fp->fp_flags & FP_REVIVED)
+ );
+#endif
if (fp->fp_blocked_on == FP_BLOCKED_ON_OTHER)
printf("%4d\n", fp->fp_task);
else
# Makefile for ProcFS server
+#
+
+.include <bsd.own.mk>
+
PROG= procfs
SRCS= buf.c main.c pid.c root.c tree.c util.c cpuinfo.c
CPPFLAGS+= -I${MINIXSRCDIR} -I${MINIXSRCDIR}/servers
+.if ${BUILDAVFS} == "yes"
+CFLAGS+= -D_USEAVFS
+.endif
+
DPADD+= ${LIBVTREEFS} ${LIBSYS}
LDADD+= -lvtreefs -lsys
#include "kernel/type.h"
#include "kernel/proc.h"
#include "pm/mproc.h"
-#include "vfs/const.h"
-#include "vfs/fproc.h"
+#if defined(_USEAVFS)
+# include "avfs/const.h"
+# include "avfs/fproc.h"
+#else
+# include "vfs/const.h"
+# include "vfs/fproc.h"
+#endif
#include <minix/vtreefs.h>
#include <minix/procfs.h>
CPPFLAGS+= ${SMP_FLAGS}
+BUILDAVFS?= "no"
+
MAKECONF?= /etc/make.conf
.-include "${MAKECONF}"
GEN_FILES= *.bak image kernel *.iso *.iso.gz cdfdimage rootimage src
# Specify the programs that are part of the system image.
+.if ${BUILDAVFS} == "yes"
+VFS= "../servers/avfs/vfs"
+PFS= "../servers/apfs/pfs"
+.else
+VFS= "../servers/vfs/vfs"
+PFS= "../servers/pfs/pfs"
+.endif
KERNEL= kernel
PROGRAMS= \
../servers/ds/ds \
../servers/rs/rs \
../servers/pm/pm \
../servers/sched/sched \
- ../servers/vfs/vfs \
+ ${VFS} \
../drivers/memory/memory \
../drivers/log/log \
../drivers/tty/tty \
../servers/mfs/mfs \
../servers/vm/vm \
- ../servers/pfs/pfs \
+ ${PFS} \
../servers/init/init
usage: