From: Thomas Veerman Date: Wed, 17 Aug 2011 13:23:45 +0000 (+0000) Subject: Merge AVFS and APFS X-Git-Tag: v3.2.0~354 X-Git-Url: http://zhaoyanbai.com/repos/?a=commitdiff_plain;h=a6bd3f4a2260809e96dc423b3173db3fb79a4c21;p=minix.git Merge AVFS and APFS --- diff --git a/servers/Makefile b/servers/Makefile index f5e7140e7..05a2bc6ce 100644 --- a/servers/Makefile +++ b/servers/Makefile @@ -3,14 +3,22 @@ .include +.if ${BUILDAVFS} == "yes" +VFS= "avfs" +PFS= "apfs" +.else +VFS= "vfs" +PFS= "pfs" +.endif + .if ${MKIMAGEONLY} == "yes" -SUBDIR= ds init mfs pfs pm rs sched vfs vm +SUBDIR= ds init mfs ${PFS} pm rs sched ${VFS} vm .else SUBDIR= ds ext2 hgfs inet init ipc is iso9660fs \ - mfs pfs pm procfs rs sched vfs vm devman + mfs ${PFS} pm procfs rs sched ${VFS} vm devman .endif diff --git a/servers/apfs/Makefile b/servers/apfs/Makefile new file mode 100644 index 000000000..b713d2dee --- /dev/null +++ b/servers/apfs/Makefile @@ -0,0 +1,14 @@ +# Makefile for Pipe File System (PFS) +PROG= pfs +SRCS= open.c table.c inode.c main.c super.c link.c \ + buffer.c read.c misc.c mount.c utility.c stadir.c \ + uds.c dev_uds.c + +DPADD+= ${LIBDRIVER} ${LIBSYS} +LDADD+= -ldriver -lsys + +MAN= + +BINDIR?= /usr/sbin + +.include diff --git a/servers/apfs/buf.h b/servers/apfs/buf.h new file mode 100644 index 000000000..cd31f7e84 --- /dev/null +++ b/servers/apfs/buf.h @@ -0,0 +1,26 @@ +#ifndef __PFS_BUF_H__ +#define __PFS_BUF_H__ + +/* Buffer (block) cache. + */ + +struct buf { + /* Data portion of the buffer. */ + char b_data[PIPE_BUF]; /* ordinary user data */ + + /* Header portion of the buffer. */ + struct buf *b_next; /* used to link all free bufs in a chain */ + struct buf *b_prev; /* used to link all free bufs the other way */ + ino_t b_num; /* inode number on minor device */ + dev_t b_dev; /* major | minor device where block resides */ + int b_bytes; /* Number of bytes allocated in bp */ + int b_count; /* Number of users of this buffer */ +}; + +/* A block is free if b_dev == NO_DEV. */ + + +EXTERN struct buf *front; /* points to least recently used free block */ +EXTERN struct buf *rear; /* points to most recently used free block */ + +#endif diff --git a/servers/apfs/buffer.c b/servers/apfs/buffer.c new file mode 100644 index 000000000..a5c77b4a6 --- /dev/null +++ b/servers/apfs/buffer.c @@ -0,0 +1,103 @@ +#include "fs.h" +#include "buf.h" +#include "inode.h" +#include +#include +#include + +FORWARD _PROTOTYPE( struct buf *new_block, (dev_t dev, ino_t inum) ); + +/*===========================================================================* + * buf_pool * + *===========================================================================*/ +PUBLIC void buf_pool(void) +{ +/* Initialize the buffer pool. */ + + front = NULL; + rear = NULL; +} + + + +/*===========================================================================* + * get_block * + *===========================================================================*/ +PUBLIC struct buf *get_block(dev_t dev, ino_t inum) +{ + struct buf *bp = front; + + while(bp != NULL) { + if (bp->b_dev == dev && bp->b_num == inum) { + bp->b_count++; + return(bp); + } + bp = bp->b_next; + } + + /* Buffer was not found. Try to allocate a new one */ + return new_block(dev, inum); +} + + +/*===========================================================================* + * new_block * + *===========================================================================*/ +PRIVATE struct buf *new_block(dev_t dev, ino_t inum) +{ +/* Allocate a new buffer and add it to the double linked buffer list */ + struct buf *bp; + + bp = malloc(sizeof(struct buf)); + if (bp == NULL) { + err_code = ENOSPC; + return(NULL); + } + bp->b_num = inum; + bp->b_dev = dev; + bp->b_bytes = 0; + bp->b_count = 1; + memset(bp->b_data, 0 , PIPE_BUF); + + /* Add at the end of the buffer */ + if (front == NULL) { /* Empty list? */ + front = bp; + bp->b_prev = NULL; + } else { + rear->b_next = bp; + bp->b_prev = rear; + } + bp->b_next = NULL; + rear = bp; + + return(bp); +} + + +/*===========================================================================* + * put_block * + *===========================================================================*/ +PUBLIC void put_block(dev_t dev, ino_t inum) +{ + struct buf *bp; + + bp = get_block(dev, inum); + if (bp == NULL) return; /* We didn't find the block. Nothing to put. */ + + bp->b_count--; /* Compensate for above 'get_block'. */ + if (--bp->b_count > 0) return; + + /* Cut bp out of the loop */ + if (bp->b_prev == NULL) + front = bp->b_next; + else + bp->b_prev->b_next = bp->b_next; + + if (bp->b_next == NULL) + rear = bp->b_prev; + else + bp->b_next->b_prev = bp->b_prev; + + /* Buffer administration is done. Now it's safe to free up bp. */ + free(bp); +} diff --git a/servers/apfs/const.h b/servers/apfs/const.h new file mode 100644 index 000000000..f89a239be --- /dev/null +++ b/servers/apfs/const.h @@ -0,0 +1,42 @@ +#ifndef __PFS_CONST_H__ +#define __PFS_CONST_H__ + +#define NR_INODES 256 /* # slots in "in core" inode table */ + +/* Size of descriptor table for unix domain sockets. This should be + * equal to the maximum number of minor devices (currently 256). + */ +#define NR_FDS 256 + +#define INODE_HASH_LOG2 7 /* 2 based logarithm of the inode hash size */ +#define INODE_HASH_SIZE ((unsigned long)1<USER_ENDPT); +#endif + + /* + * Find a slot in the descriptor table for the new descriptor. + * The index of the descriptor in the table will be returned. + * Subsequent calls to read/write/close/ioctl/etc will use this + * minor number. The minor number must be different from the + * the /dev/uds device's minor number (currently 0). + */ + + minor = -1; /* to trap error */ + + for (i = 1; i < NR_FDS; i++) { + if (uds_fd_table[i].state == UDS_FREE) { + minor = i; + break; + } + } + + if (minor == -1) { + + /* descriptor table full */ + uds_set_reply(dev_m_out, DEV_OPEN_REPL, dev_m_in->USER_ENDPT, + (cp_grant_id_t) dev_m_in->IO_GRANT, ENFILE); + return ENFILE; + } + + /* + * We found a slot in uds_fd_table, now initialize the descriptor + */ + + /* mark this one as 'in use' so that it doesn't get assigned to + * another socket + */ + uds_fd_table[minor].state = UDS_INUSE; + + /* track the system call we are performing in case it gets cancelled */ + uds_fd_table[minor].call_nr = dev_m_in->m_type; + uds_fd_table[minor].ioctl = 0; + uds_fd_table[minor].syscall_done = 0; + + /* set the socket owner */ + uds_fd_table[minor].owner = dev_m_in->USER_ENDPT; + uds_fd_table[minor].endpoint = dev_m_in->USER_ENDPT; + + /* setup select(2) framework */ + uds_fd_table[minor].selecting = 0; + uds_fd_table[minor].select_proc = 0; + uds_fd_table[minor].sel_ops_in = 0; + uds_fd_table[minor].sel_ops_out = 0; + uds_fd_table[minor].status_updated = 0; + + /* initialize the data pointer (pos) to the start of the PIPE */ + uds_fd_table[minor].pos = 0; + + /* the PIPE is initially empty */ + uds_fd_table[minor].size = 0; + + /* the default for a new socket is to allow reading and writing. + * shutdown(2) will remove one or both flags. + */ + uds_fd_table[minor].mode = S_IRUSR|S_IWUSR; + + /* In libc socket(2) sets this to the actual value later with the + * NWIOSUDSTYPE ioctl(). + */ + uds_fd_table[minor].type = -1; + + /* Clear the backlog by setting each entry to -1 */ + for (i = 0; i < UDS_SOMAXCONN; i++) { + /* initially no connections are pending */ + uds_fd_table[minor].backlog[i] = -1; + } + + memset(&uds_fd_table[minor].ancillary_data, '\0', sizeof(struct + ancillary)); + for (i = 0; i < OPEN_MAX; i++) { + uds_fd_table[minor].ancillary_data.fds[i] = -1; + } + + /* default the size to UDS_SOMAXCONN */ + uds_fd_table[minor].backlog_size = UDS_SOMAXCONN; + + /* the socket isn't listening for incoming connections until + * listen(2) is called + */ + uds_fd_table[minor].listening = 0; + + /* initially the socket is not connected to a peer */ + uds_fd_table[minor].peer = -1; + + /* there isn't a child waiting to be accept(2)'d */ + uds_fd_table[minor].child = -1; + + /* initially the socket is not bound or listening on an address */ + memset(&(uds_fd_table[minor].addr), '\0', sizeof(struct sockaddr_un)); + memset(&(uds_fd_table[minor].source), '\0', sizeof(struct sockaddr_un)); + memset(&(uds_fd_table[minor].target), '\0', sizeof(struct sockaddr_un)); + + /* Initially the socket isn't suspended. */ + uds_fd_table[minor].suspended = UDS_NOT_SUSPENDED; + + /* and the socket doesn't have an I/O grant initially */ + uds_fd_table[minor].io_gr = (cp_grant_id_t) 0; + + /* since there is no I/O grant it effectively has no size either */ + uds_fd_table[minor].io_gr_size = 0; + + /* The process isn't suspended so we don't flag it as revivable */ + uds_fd_table[minor].ready_to_revive = 0; + + /* get the effective user id and effective group id from the endpoint */ + /* this is needed in the REQ_NEWNODE request to PFS. */ + rc = getnucred(uds_fd_table[minor].endpoint, &ucred); + if (rc == -1) { + /* roll back the changes we made to the descriptor */ + memset(&(uds_fd_table[minor]), '\0', sizeof(uds_fd_t)); + + /* likely error: invalid endpoint / proc doesn't exist */ + uds_set_reply(dev_m_out, DEV_OPEN_REPL, dev_m_in->USER_ENDPT, + (cp_grant_id_t) dev_m_in->IO_GRANT, errno); + return errno; + } + + /* Prepare Request to the FS side of PFS */ + + fs_m_in.m_type = REQ_NEWNODE; + fs_m_in.REQ_MODE = I_NAMED_PIPE; + fs_m_in.REQ_DEV = NO_DEV; + fs_m_in.REQ_UID = ucred.uid; + fs_m_in.REQ_GID = ucred.gid; + + /* Request a new inode on the pipe file system */ + + rc = fs_newnode(&fs_m_in, &fs_m_out); + if (rc != OK) { + /* roll back the changes we made to the descriptor */ + memset(&(uds_fd_table[minor]), '\0', sizeof(uds_fd_t)); + + /* likely error: get_block() failed */ + uds_set_reply(dev_m_out, DEV_OPEN_REPL, dev_m_in->USER_ENDPT, + (cp_grant_id_t) dev_m_in->IO_GRANT, rc); + return rc; + } + + /* Process the response */ + + uds_fd_table[minor].inode_nr = fs_m_out.RES_INODE_NR; + + /* prepare the reply */ + + uds_fd_table[minor].syscall_done = 1; + uds_set_reply(dev_m_out, DEV_OPEN_REPL, dev_m_in->USER_ENDPT, + (cp_grant_id_t) dev_m_in->IO_GRANT, minor); + return minor; +} + +PUBLIC int uds_close(message *dev_m_in, message *dev_m_out) +{ + int minor; + message fs_m_in, fs_m_out; + int rc; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] uds_close() call_count=%d\n", uds_minor(dev_m_in), + ++call_count); + printf("Endpoint: 0x%x\n", dev_m_in->USER_ENDPT); +#endif + + minor = uds_minor(dev_m_in); + + if (uds_fd_table[minor].state != UDS_INUSE) { + /* attempted to close a socket that hasn't been opened -- + * something is very wrong :( + */ + uds_set_reply(dev_m_out, DEV_CLOSE_REPL, dev_m_in->USER_ENDPT, + (cp_grant_id_t) dev_m_in->IO_GRANT, EINVAL); + return EINVAL; + } + + /* no need to track the syscall in case of cancellation. close() is + * atomic and can't be cancelled. no need to update the endpoint here, + * we won't be needing it to kill the socket + */ + + /* if the socket is connected, disconnect it */ + if (uds_fd_table[minor].peer != -1) { + + /* set peer of this peer to -1 */ + uds_fd_table[uds_fd_table[minor].peer].peer = -1; + + /* error to pass to peer */ + uds_fd_table[uds_fd_table[minor].peer].err = ECONNRESET; + + /* if peer was blocked on I/O revive peer */ + if (uds_fd_table[uds_fd_table[minor].peer].suspended) { + int peer = uds_fd_table[minor].peer; + + uds_fd_table[peer].ready_to_revive = 1; + uds_unsuspend(dev_m_in->m_source, peer); + } + } + + if (uds_fd_table[minor].ancillary_data.nfiledes > 0) { + clear_fds(minor, &(uds_fd_table[minor].ancillary_data)); + } + + /* Prepare Request to the FS side of PFS */ + + fs_m_in.m_type = REQ_PUTNODE; + fs_m_in.REQ_INODE_NR = uds_fd_table[minor].inode_nr; + fs_m_in.REQ_COUNT = 1; + + /* set the socket back to its original UDS_FREE state */ + memset(&(uds_fd_table[minor]), '\0', sizeof(uds_fd_t)); + + /* Request the removal of the inode from the pipe file system */ + + rc = fs_putnode(&fs_m_in, &fs_m_out); + if (rc != OK) { + perror("fs_putnode"); + /* likely error: get_block() failed */ + return rc; + } + + uds_set_reply(dev_m_out, DEV_CLOSE_REPL, dev_m_in->USER_ENDPT, + (cp_grant_id_t) dev_m_in->IO_GRANT, OK); + return OK; +} + +PUBLIC int uds_select(message *dev_m_in, message *dev_m_out) +{ + int i, bytes; + int minor; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] uds_select() call_count=%d\n", uds_minor(dev_m_in), + ++call_count); + printf("Endpoint: 0x%x\n", dev_m_in->USER_ENDPT); +#endif + + minor = uds_minor(dev_m_in); + + if (uds_fd_table[minor].state != UDS_INUSE) { + + /* attempted to close a socket that hasn't been opened -- + * something is very wrong :( + */ + + uds_sel_reply(dev_m_out, DEV_SEL_REPL1, minor, EINVAL); + return EINVAL; + } + + /* setup select(2) framework */ + uds_fd_table[minor].selecting = 1; + uds_fd_table[minor].select_proc = dev_m_in->m_source; + + /* track the system call we are performing in case it gets cancelled */ + uds_fd_table[minor].call_nr = dev_m_in->m_type; + uds_fd_table[minor].ioctl = 0; + uds_fd_table[minor].syscall_done = 0; + + /* Can't update the process endpoint here, no info. */ + + uds_fd_table[minor].sel_ops_in = dev_m_in->USER_ENDPT; + uds_fd_table[minor].sel_ops_out = 0; + + /* check if there is data available to read */ + bytes = uds_perform_read(minor, dev_m_in->m_source, 1, 1); + if (bytes > 0) { + + /* there is data in the pipe for us to read */ + uds_fd_table[minor].sel_ops_out |= SEL_RD; + + } else if (uds_fd_table[minor].listening == 1) { + + /* check for pending connections */ + for (i = 0; i < uds_fd_table[minor].backlog_size; i++) { + if (uds_fd_table[minor].backlog[i] != -1) { + uds_fd_table[minor].sel_ops_out |= SEL_RD; + break; + } + } + } + + /* check if we can write without blocking */ + bytes = uds_perform_write(minor, dev_m_in->m_source, PIPE_BUF, 1); + if (bytes > 0) { + uds_fd_table[minor].sel_ops_out |= SEL_WR; + } + + uds_fd_table[minor].syscall_done = 1; + uds_sel_reply(dev_m_out, DEV_SEL_REPL1, minor, + uds_fd_table[minor].sel_ops_out); + + return uds_fd_table[minor].sel_ops_out; +} + +PRIVATE int uds_perform_read(int minor, endpoint_t m_source, + size_t size, int pretend) +{ + int rc; + message fs_m_in; + message fs_m_out; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] uds_perform_read() call_count=%d\n", minor, + ++call_count); +#endif + + /* skip reads and writes of 0 (or less!) bytes */ + if (size <= 0) { + return 0; + } + + /* check if we are allowed to read */ + if (!(uds_fd_table[minor].mode & S_IRUSR)) { + + /* socket is shutdown for reading */ + return EPIPE; + } + + if (uds_fd_table[minor].size == 0) { + + if (pretend) { + return SUSPEND; + } + + /* maybe a process is blocked waiting to write? if + * needed revive the writer + */ + if (uds_fd_table[minor].peer != -1 && + uds_fd_table[uds_fd_table[minor].peer].suspended) { + int peer = uds_fd_table[minor].peer; + + uds_fd_table[peer].ready_to_revive = 1; + uds_unsuspend(m_source, peer); + } + +#if DEBUG == 1 + printf("(uds) [%d] suspending read request\n", minor); +#endif + + /* Process is reading from an empty pipe, + * suspend it so some bytes can be written + */ + uds_fd_table[minor].suspended = UDS_SUSPENDED_READ; + return SUSPEND; + } + + if (pretend) { + + return (size > uds_fd_table[minor].size) ? + uds_fd_table[minor].size : size; + } + + + /* Prepare Request to the FS side of PFS */ + fs_m_in.m_type = REQ_READ; + fs_m_in.REQ_INODE_NR = uds_fd_table[minor].inode_nr; + fs_m_in.REQ_GRANT = uds_fd_table[minor].io_gr; + fs_m_in.REQ_SEEK_POS_HI = 0; + fs_m_in.REQ_SEEK_POS_LO = uds_fd_table[minor].pos; + fs_m_in.REQ_NBYTES = (size > uds_fd_table[minor].size) ? + uds_fd_table[minor].size : size; + + /* perform the read */ + rc = fs_readwrite(&fs_m_in, &fs_m_out); + if (rc != OK) { + perror("fs_readwrite"); + return rc; + } + + /* Process the response */ +#if DEBUG == 1 + printf("(uds) [%d] read complete\n", minor); +#endif + + /* move the position of the data pointer up to data we haven't + * read yet + */ + uds_fd_table[minor].pos += fs_m_out.RES_NBYTES; + + /* decrease the number of unread bytes */ + uds_fd_table[minor].size -= fs_m_out.RES_NBYTES; + + /* if we have 0 unread bytes, move the data pointer back to the + * start of the buffer + */ + if (uds_fd_table[minor].size == 0) { + uds_fd_table[minor].pos = 0; + } + + /* maybe a big write was waiting for us to read some data, if + * needed revive the writer + */ + if (uds_fd_table[minor].peer != -1 && + uds_fd_table[uds_fd_table[minor].peer].suspended) { + int peer = uds_fd_table[minor].peer; + + uds_fd_table[peer].ready_to_revive = 1; + uds_unsuspend(m_source, peer); + } + + /* see if peer is blocked on select() and a write is possible + * (from peer to minor) + */ + if (uds_fd_table[minor].peer != -1 && + uds_fd_table[uds_fd_table[minor].peer].selecting == 1 && + (uds_fd_table[minor].size + uds_fd_table[minor].pos + 1 + < PIPE_BUF)) { + + int peer = uds_fd_table[minor].peer; + + /* if the peer wants to know about write being possible + * and it doesn't know about it already, then let the peer know. + */ + if ((uds_fd_table[peer].sel_ops_in & SEL_WR) && + !(uds_fd_table[peer].sel_ops_out & SEL_WR)) { + + /* a write on peer is possible now */ + uds_fd_table[peer].sel_ops_out |= SEL_WR; + uds_fd_table[peer].status_updated = 1; + uds_unsuspend(m_source, peer); + } + } + + return fs_m_out.RES_NBYTES; /* return number of bytes read */ +} + +PRIVATE int uds_perform_write(int minor, endpoint_t m_source, + size_t size, int pretend) +{ + int rc, peer, i; + message fs_m_in; + message fs_m_out; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] uds_perform_write() call_count=%d\n", minor, + ++call_count); +#endif + + /* skip reads and writes of 0 (or less!) bytes */ + if (size <= 0) { + return 0; + } + + /* check if we are allowed to write */ + if (!(uds_fd_table[minor].mode & S_IWUSR)) { + + /* socket is shutdown for writing */ + return EPIPE; + } + + if (size > PIPE_BUF) { + + /* message is too big to ever write to the PIPE */ + return EMSGSIZE; + } + + if (uds_fd_table[minor].type == SOCK_STREAM || + uds_fd_table[minor].type == SOCK_SEQPACKET) { + + /* if we're writing with a connection oriented socket, + * then it needs a peer to write to + */ + if (uds_fd_table[minor].peer == -1) { + if (uds_fd_table[minor].err == ECONNRESET) { + + uds_fd_table[minor].err = 0; + return ECONNRESET; + } else { + return ENOTCONN; + } + } else { + + peer = uds_fd_table[minor].peer; + } + + } else /* uds_fd_table[minor].type == SOCK_DGRAM */ { + + peer = -1; + + /* locate the "peer" we want to write to */ + for (i = 0; i < NR_FDS; i++) { + + /* look for a SOCK_DGRAM socket that is bound on + * the target address + */ + if (uds_fd_table[i].type == SOCK_DGRAM && + uds_fd_table[i].addr.sun_family == AF_UNIX && + !strncmp(uds_fd_table[minor].target.sun_path, + uds_fd_table[i].addr.sun_path, UNIX_PATH_MAX)) { + + peer = i; + break; + } + } + + if (peer == -1) { + return ENOENT; + } + } + + /* check if write would overrun buffer. check if message + * boundry preserving types (SEQPACKET and DGRAM) wouldn't write + * to an empty buffer. check if connectionless sockets have a + * target to write to. + */ + if ((uds_fd_table[peer].pos+uds_fd_table[peer].size+size > PIPE_BUF) || + ((uds_fd_table[minor].type == SOCK_SEQPACKET || + uds_fd_table[minor].type == SOCK_DGRAM) && + uds_fd_table[peer].size > 0) || (peer == -1)) { + + if (pretend) { + return SUSPEND; + } + + /* if needed revive the reader */ + if (uds_fd_table[peer].suspended) { + uds_fd_table[peer].ready_to_revive = 1; + uds_unsuspend(m_source, peer); + } + +#if DEBUG == 1 + printf("(uds) [%d] suspending write request\n", minor); +#endif + + /* Process is reading from an empty pipe, + * suspend it so some bytes can be written + */ + uds_fd_table[minor].suspended = UDS_SUSPENDED_WRITE; + return SUSPEND; + } + + if (pretend) { + return size; + } + + /* Prepare Request to the FS side of PFS */ + fs_m_in.m_type = REQ_WRITE; + fs_m_in.REQ_INODE_NR = uds_fd_table[peer].inode_nr; + fs_m_in.REQ_GRANT = uds_fd_table[minor].io_gr; + fs_m_in.REQ_SEEK_POS_HI = 0; + fs_m_in.REQ_SEEK_POS_LO = uds_fd_table[peer].pos + + uds_fd_table[peer].size; + fs_m_in.REQ_NBYTES = size; + + /* Request the write */ + rc = fs_readwrite(&fs_m_in, &fs_m_out); + if (rc != OK) { + perror("fs_readwrite"); + return rc; + } + + /* Process the response */ +#if DEBUG == 1 + printf("(uds) [%d] write complete\n", minor); +#endif + /* increase the count of unread bytes */ + uds_fd_table[peer].size += fs_m_out.RES_NBYTES; + + + /* fill in the source address to be returned by recvfrom & recvmsg */ + if (uds_fd_table[minor].type == SOCK_DGRAM) { + memcpy(&uds_fd_table[peer].source, &uds_fd_table[minor].addr, + sizeof(struct sockaddr_un)); + } + + /* revive peer that was waiting for us to write */ + if (uds_fd_table[peer].suspended) { + uds_fd_table[peer].ready_to_revive = 1; + uds_unsuspend(m_source, peer); + } + + /* see if peer is blocked on select()*/ + if (uds_fd_table[peer].selecting == 1 && fs_m_out.RES_NBYTES > 0) { + + /* if the peer wants to know about data ready to read + * and it doesn't know about it already, then let the peer + * know we have data for it. + */ + if ((uds_fd_table[peer].sel_ops_in & SEL_RD) && + !(uds_fd_table[peer].sel_ops_out & SEL_RD)) { + + /* a read on peer is possible now */ + uds_fd_table[peer].sel_ops_out |= SEL_RD; + uds_fd_table[peer].status_updated = 1; + uds_unsuspend(m_source, peer); + } + } + + return fs_m_out.RES_NBYTES; /* return number of bytes written */ +} + +PUBLIC int uds_read(message *dev_m_in, message *dev_m_out) +{ + int bytes; + int minor; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] uds_read() call_count=%d\n", uds_minor(dev_m_in), + ++call_count); + printf("Endpoint: 0x%x | Position 0x%x\n", dev_m_in->USER_ENDPT, + dev_m_in->POSITION); +#endif + + minor = uds_minor(dev_m_in); + + if (uds_fd_table[minor].state != UDS_INUSE) { + + /* attempted to close a socket that hasn't been opened -- + * something is very wrong :( + */ + uds_set_reply(dev_m_out, DEV_REVIVE, dev_m_in->USER_ENDPT, + (cp_grant_id_t) dev_m_in->IO_GRANT, EINVAL); + + return EINVAL; + } + + /* track the system call we are performing in case it gets cancelled */ + uds_fd_table[minor].call_nr = dev_m_in->m_type; + uds_fd_table[minor].ioctl = 0; + uds_fd_table[minor].syscall_done = 0; + + /* Update the process endpoint. */ + uds_fd_table[minor].endpoint = dev_m_in->USER_ENDPT; + + /* setup select(2) framework */ + uds_fd_table[minor].selecting = 0; + + /* save I/O Grant info */ + uds_fd_table[minor].io_gr = (cp_grant_id_t) dev_m_in->IO_GRANT; + uds_fd_table[minor].io_gr_size = dev_m_in->COUNT; + + bytes = uds_perform_read(minor, dev_m_in->m_source, + uds_fd_table[minor].io_gr_size, 0); + + uds_set_reply(dev_m_out, DEV_REVIVE, uds_fd_table[minor].endpoint, + uds_fd_table[minor].io_gr, bytes); + + return bytes; +} + +PUBLIC int uds_write(message *dev_m_in, message *dev_m_out) +{ + int bytes; + int minor; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] uds_write() call_count=%d\n", uds_minor(dev_m_in), + ++call_count); + printf("Endpoint: 0x%x | Position 0x%x\n", dev_m_in->USER_ENDPT, + dev_m_in->POSITION); +#endif + + minor = uds_minor(dev_m_in); + + if (uds_fd_table[minor].state != UDS_INUSE) { + + /* attempted to close a socket that hasn't been opened -- + * something is very wrong :( + */ + uds_set_reply(dev_m_out, DEV_REVIVE, dev_m_in->USER_ENDPT, + (cp_grant_id_t) dev_m_in->IO_GRANT, EINVAL); + + return EINVAL; + } + + /* track the system call we are performing in case it gets cancelled */ + uds_fd_table[minor].call_nr = dev_m_in->m_type; + uds_fd_table[minor].ioctl = 0; + uds_fd_table[minor].syscall_done = 0; + + /* Update the process endpoint. */ + uds_fd_table[minor].endpoint = dev_m_in->USER_ENDPT; + + /* setup select(2) framework */ + uds_fd_table[minor].selecting = 0; + + /* save I/O Grant info */ + uds_fd_table[minor].io_gr = (cp_grant_id_t) dev_m_in->IO_GRANT; + uds_fd_table[minor].io_gr_size = dev_m_in->COUNT; + + bytes = uds_perform_write(minor, dev_m_in->m_source, + uds_fd_table[minor].io_gr_size, 0); + + uds_set_reply(dev_m_out, DEV_REVIVE, uds_fd_table[minor].endpoint, + uds_fd_table[minor].io_gr, bytes); + + return bytes; +} + +PUBLIC int uds_ioctl(message *dev_m_in, message *dev_m_out) +{ + int rc, minor; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] uds_ioctl() call_count=%d\n", uds_minor(dev_m_in), + ++call_count); + printf("Endpoint: 0x%x | Position 0x%x\n", dev_m_in->USER_ENDPT, + dev_m_in->POSITION); +#endif + + minor = uds_minor(dev_m_in); + + if (uds_fd_table[minor].state != UDS_INUSE) { + + /* attempted to close a socket that hasn't been opened -- + * something is very wrong :( + */ + uds_set_reply(dev_m_out, DEV_REVIVE, dev_m_in->USER_ENDPT, + (cp_grant_id_t) dev_m_in->IO_GRANT, EINVAL); + + return EINVAL; + } + + /* track the system call we are performing in case it gets cancelled */ + uds_fd_table[minor].call_nr = dev_m_in->m_type; + uds_fd_table[minor].ioctl = dev_m_in->COUNT; + uds_fd_table[minor].syscall_done = 0; + + /* setup select(2) framework */ + uds_fd_table[minor].selecting = 0; + + /* update the owner endpoint - yes it's really stored in POSITION */ + uds_fd_table[minor].owner = dev_m_in->POSITION; + + switch (dev_m_in->COUNT) { /* Handle the ioctl(2) command */ + + case NWIOSUDSCONN: + + /* connect to a listening socket -- connect() */ + rc = do_connect(dev_m_in, dev_m_out); + + break; + + case NWIOSUDSACCEPT: + + /* accept an incoming connection -- accept() */ + rc = do_accept(dev_m_in, dev_m_out); + + break; + + case NWIOSUDSBLOG: + + /* set the backlog_size and put the socket into the + * listening state -- listen() + */ + rc = do_listen(dev_m_in, dev_m_out); + + break; + + case NWIOSUDSTYPE: + + /* set the type for this socket (i.e. + * SOCK_STREAM, SOCK_DGRAM, etc) -- socket() + */ + rc = do_socket(dev_m_in, dev_m_out); + + break; + + case NWIOSUDSADDR: + + /* set the address for this socket -- bind() */ + rc = do_bind(dev_m_in, dev_m_out); + + break; + + case NWIOGUDSADDR: + + /* get the address for this socket -- getsockname() */ + rc = do_getsockname(dev_m_in, dev_m_out); + + break; + + case NWIOGUDSPADDR: + + /* get the address for the peer -- getpeername() */ + rc = do_getpeername(dev_m_in, dev_m_out); + + break; + + case NWIOSUDSSHUT: + + /* shutdown a socket for reading, writing, or + * both -- shutdown() + */ + rc = do_shutdown(dev_m_in, dev_m_out); + + break; + + case NWIOSUDSPAIR: + + /* connect two sockets -- socketpair() */ + rc = do_socketpair(dev_m_in, dev_m_out); + + break; + + case NWIOGUDSSOTYPE: + + /* get socket type -- getsockopt(SO_TYPE) */ + rc = do_getsockopt_sotype(dev_m_in, dev_m_out); + + break; + + case NWIOGUDSPEERCRED: + + /* get peer endpoint -- getsockopt(SO_PEERCRED) */ + rc = do_getsockopt_peercred(dev_m_in, dev_m_out); + + break; + + case NWIOSUDSTADDR: + + /* set target address -- sendto() */ + rc = do_sendto(dev_m_in, dev_m_out); + + break; + + case NWIOGUDSFADDR: + + /* get from address -- recvfrom() */ + rc = do_recvfrom(dev_m_in, dev_m_out); + + break; + + case NWIOGUDSSNDBUF: + + /* get the send buffer size -- getsockopt(SO_SNDBUF) */ + rc = do_getsockopt_sndbuf(dev_m_in, dev_m_out); + + break; + + case NWIOSUDSSNDBUF: + + /* set the send buffer size -- setsockopt(SO_SNDBUF) */ + rc = do_setsockopt_sndbuf(dev_m_in, dev_m_out); + + break; + + case NWIOGUDSRCVBUF: + + /* get the send buffer size -- getsockopt(SO_SNDBUF) */ + rc = do_getsockopt_rcvbuf(dev_m_in, dev_m_out); + + break; + + case NWIOSUDSRCVBUF: + + /* set the send buffer size -- setsockopt(SO_SNDBUF) */ + rc = do_setsockopt_rcvbuf(dev_m_in, dev_m_out); + + break; + + case NWIOSUDSCTRL: + + /* set the control data -- sendmsg() */ + rc = do_sendmsg(dev_m_in, dev_m_out); + + break; + + case NWIOGUDSCTRL: + + /* set the control data -- recvmsg() */ + rc = do_recvmsg(dev_m_in, dev_m_out); + + break; + + default: + + /* the IOCTL command is not valid for /dev/uds -- + * this happens a lot and is normal. a lot of + * libc functions determine the socket type with + * IOCTLs. Any not for us simply get a EBADIOCTL + * response. + */ + + rc = EBADIOCTL; + } + + if (rc != SUSPEND) + uds_fd_table[minor].syscall_done = 1; + + uds_set_reply(dev_m_out, DEV_REVIVE, dev_m_in->USER_ENDPT, + (cp_grant_id_t) dev_m_in->IO_GRANT, rc); + + return rc; +} + +PUBLIC int uds_unsuspend(endpoint_t m_source, int minor) +{ + int r, bytes; + message m_out; + uds_fd_t *fdp; + + fdp = &uds_fd_table[minor]; + + if (fdp->status_updated == 1) { + + /* clear the status_updated flag */ + fdp->status_updated = 0; + fdp->selecting = 0; + + /* prepare the response */ + uds_sel_reply(&m_out, DEV_SEL_REPL2, minor, fdp->sel_ops_out); + r = OK; + } else if (fdp->ready_to_revive == 1) { + + /* clear the ready to revive flag */ + fdp->ready_to_revive = 0; + + switch (fdp->suspended) { + + case UDS_SUSPENDED_READ: + + bytes = uds_perform_read(minor, m_source, + fdp->io_gr_size, 0); + + if (bytes == SUSPEND) { + r = SUSPEND; + break; + } + + fdp->suspended = UDS_NOT_SUSPENDED; + + uds_set_reply(&m_out, DEV_REVIVE, fdp->endpoint, + fdp->io_gr, bytes); + + r = OK; + break; + + case UDS_SUSPENDED_WRITE: + + bytes = uds_perform_write(minor, m_source, + fdp->io_gr_size, 0); + + if (bytes == SUSPEND) { + r = SUSPEND; + break; + } + + fdp->suspended = UDS_NOT_SUSPENDED; + + uds_set_reply(&m_out, DEV_REVIVE, fdp->endpoint, + fdp->io_gr, bytes); + + r = OK; + break; + + case UDS_SUSPENDED_CONNECT: + case UDS_SUSPENDED_ACCEPT: + + /* In both cases, the process + * that send the notify() + * already performed the connection. + * The only thing to do here is + * unblock. + */ + + fdp->suspended = UDS_NOT_SUSPENDED; + + uds_set_reply(&m_out, DEV_REVIVE, fdp->endpoint, + fdp->io_gr, OK); + + r = OK; + break; + + default: + return(OK); + } + + } + + if (r == OK) reply(m_source, &m_out); + return(r); +} + +PUBLIC int uds_cancel(message *dev_m_in, message *dev_m_out) +{ + int i, j; + int minor; + /* XXX: should become a noop? */ +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] uds_cancel() call_count=%d\n", uds_minor(dev_m_in), + ++call_count); + printf("Endpoint: 0x%x\n", dev_m_in->USER_ENDPT); +#endif + + minor = uds_minor(dev_m_in); + + if (uds_fd_table[minor].state != UDS_INUSE) { + + /* attempted to close a socket that hasn't been opened -- + * something is very wrong :( + */ + uds_set_reply(dev_m_out, DEV_NO_STATUS, dev_m_in->USER_ENDPT, + (cp_grant_id_t) dev_m_in->IO_GRANT, EINVAL); + + return EINVAL; + } + + /* Update the process endpoint. */ + uds_fd_table[minor].endpoint = dev_m_in->USER_ENDPT; + + /* setup select(2) framework */ + uds_fd_table[minor].selecting = 0; + + /* the system call was cancelled, so if the socket was suspended + * (which is likely the case), then it is not suspended anymore. + */ + uds_fd_table[minor].suspended = UDS_NOT_SUSPENDED; + + /* If there is a system call and it isn't complete, roll back */ + if (uds_fd_table[minor].call_nr && !uds_fd_table[minor].syscall_done) { + + + if (uds_fd_table[minor].call_nr == DEV_IOCTL_S) { + + switch (uds_fd_table[minor].ioctl) { + + case NWIOSUDSACCEPT: /* accept() */ + + /* partial accept() only changes + * uds_fd_table[minorparent].child + */ + + for (i = 0; i < NR_FDS; i++) { + if (uds_fd_table[i].child == + minor) { + + uds_fd_table[i].child = -1; + + } + } + + break; + + case NWIOSUDSCONN: /* connect() */ + + /* partial connect() sets addr + * and adds minor to server backlog + */ + + for (i = 0; i < NR_FDS; i++) { + + /* find a socket that is in + * use. + */ + if (uds_fd_table[i].state == + UDS_INUSE) { + + /* see if minor is in + * the backlog + */ + for (j = 0; j < uds_fd_table[i].backlog_size; j++) { + + if (uds_fd_table[i].backlog[j] == minor) { + + /* remove from backlog */ + uds_fd_table[i].backlog[j] = -1; + } + } + + } + } + + /* clear the address */ + memset(&(uds_fd_table[minor].addr), + '\0', + sizeof(struct sockaddr_un)); + + break; + + case NWIOSUDSTADDR: /* sendto() */ + case NWIOSUDSADDR: /* bind() */ + case NWIOGUDSADDR: /* getsockname() */ + case NWIOGUDSPADDR: /* getpeername() */ + case NWIOSUDSTYPE: /* socket() */ + case NWIOSUDSBLOG: /* listen() */ + case NWIOSUDSSHUT: /* shutdown() */ + case NWIOSUDSPAIR: /* socketpair() */ + case NWIOGUDSSOTYPE: /* SO_TYPE */ + case NWIOGUDSPEERCRED: /* SO_PEERCRED */ + default: + /* these are atomic, never suspend, + * and can't be cancelled once called + */ + break; + } + + } + + /* DEV_READ_S or DEV_WRITE_S don't need to do anything + * when cancelled. DEV_OPEN, DEV_REOPEN, DEV_SELECT, + * DEV_CLOSE are atomic, never suspend, and can't + * be cancelled once called. + */ + + uds_fd_table[minor].syscall_done = 1; + } + + + uds_set_reply(dev_m_out, DEV_NO_STATUS, dev_m_in->USER_ENDPT, + (cp_grant_id_t) dev_m_in->IO_GRANT, EINTR); + + return EINTR; +} diff --git a/servers/apfs/fs.h b/servers/apfs/fs.h new file mode 100644 index 000000000..2bc006af3 --- /dev/null +++ b/servers/apfs/fs.h @@ -0,0 +1,31 @@ +#ifndef __PFS_FS_H__ +#define __PFS_FS_H__ + +/* This is the master header for pfs. It includes some other files + * and defines the principal constants. + */ +#define _POSIX_SOURCE 1 /* tell headers to include POSIX stuff */ +#define _MINIX 1 /* tell headers to include MINIX stuff */ +#define _SYSTEM 1 /* tell headers that this is the kernel */ + +/* The following are so basic, all the *.c files get them automatically. */ +#include /* MUST be first */ +#include /* MUST be second */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "const.h" +#include "proto.h" +#include "glo.h" + +#endif diff --git a/servers/apfs/glo.h b/servers/apfs/glo.h new file mode 100644 index 000000000..175b4fc8e --- /dev/null +++ b/servers/apfs/glo.h @@ -0,0 +1,29 @@ +#ifndef __PFS_GLO_H__ +#define __PFS_GLO_H__ + +/* EXTERN should be extern except for the table file */ +#ifdef _TABLE +#undef EXTERN +#define EXTERN +#endif + +#include + +/* The following variables are used for returning results to the caller. */ +EXTERN int err_code; /* temporary storage for error number */ + +EXTERN _PROTOTYPE (int (*fs_call_vec[]), (message *fs_m_in, message *fs_m_out) ); /* fs call table */ +EXTERN _PROTOTYPE (int (*dev_call_vec[]), (message *fs_m_in, message *fs_m_out) ); /* dev call table */ + +EXTERN uid_t caller_uid; +EXTERN gid_t caller_gid; +EXTERN int req_nr; +EXTERN int SELF_E; +EXTERN int exitsignaled; +EXTERN int busy; +EXTERN int unmountdone; + +/* Inode map. */ +EXTERN bitchunk_t inodemap[FS_BITMAP_CHUNKS(NR_INODES)]; + +#endif diff --git a/servers/apfs/inc.h b/servers/apfs/inc.h new file mode 100644 index 000000000..4484e8038 --- /dev/null +++ b/servers/apfs/inc.h @@ -0,0 +1,41 @@ + +#define _SYSTEM 1 /* get OK and negative error codes */ +#define _MINIX 1 /* tell headers to include MINIX stuff */ + +#define VERBOSE 0 /* display diagnostics */ + +#ifdef __NBSD_LIBC +#include +#else +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "proto.h" diff --git a/servers/apfs/inode.c b/servers/apfs/inode.c new file mode 100644 index 000000000..e73d39ca6 --- /dev/null +++ b/servers/apfs/inode.c @@ -0,0 +1,334 @@ +/* This file manages the inode table. There are procedures to allocate and + * deallocate inodes, acquire, erase, and release them, and read and write + * them from the disk. + * + * The entry points into this file are + * get_inode: search inode table for a given inode; if not there, + * read it + * put_inode: indicate that an inode is no longer needed in memory + * alloc_inode: allocate a new, unused inode + * wipe_inode: erase some fields of a newly allocated inode + * free_inode: mark an inode as available for a new file + * update_times: update atime, ctime, and mtime + * find_inode: retrieve pointer to inode in inode cache + * + */ + +#include "fs.h" +#include "buf.h" +#include "inode.h" +#include + +FORWARD _PROTOTYPE( void addhash_inode, (struct inode * const node) ); +FORWARD _PROTOTYPE( void unhash_inode, (struct inode * const node) ); + + +/*===========================================================================* + * fs_putnode * + *===========================================================================*/ +PUBLIC int fs_putnode(message *fs_m_in, message *fs_m_out) +{ +/* Find the inode specified by the request message and decrease its counter.*/ + + struct inode *rip; + int count; + dev_t dev; + ino_t inum; + + rip = find_inode( (ino_t) fs_m_in->REQ_INODE_NR); + + if(!rip) { + printf("%s:%d put_inode: inode #%ld dev: %d not found\n", __FILE__, + __LINE__, fs_m_in->REQ_INODE_NR, (dev_t) fs_m_in->REQ_DEV); + panic("fs_putnode failed"); + } + + count = fs_m_in->REQ_COUNT; + if (count <= 0) { + printf("%s:%d put_inode: bad value for count: %d\n", __FILE__, + __LINE__, count); + panic("fs_putnode failed"); + } else if(count > rip->i_count) { + printf("%s:%d put_inode: count too high: %d > %d\n", __FILE__, + __LINE__, count, rip->i_count); + panic("fs_putnode failed"); + } + + /* Decrease reference counter, but keep one reference; it will be consumed by + * put_inode(). */ + rip->i_count -= count - 1; + dev = rip->i_dev; + inum = rip->i_num; + put_inode(rip); + if (rip->i_count == 0) put_block(dev, inum); + return(OK); +} + + +/*===========================================================================* + * init_inode_cache * + *===========================================================================*/ +PUBLIC void init_inode_cache() +{ + struct inode *rip; + struct inodelist *rlp; + + /* init free/unused list */ + TAILQ_INIT(&unused_inodes); + + /* init hash lists */ + for (rlp = &hash_inodes[0]; rlp < &hash_inodes[INODE_HASH_SIZE]; ++rlp) + LIST_INIT(rlp); + + /* add free inodes to unused/free list */ + for (rip = &inode[0]; rip < &inode[NR_INODES]; ++rip) { + rip->i_num = NO_ENTRY; + TAILQ_INSERT_HEAD(&unused_inodes, rip, i_unused); + } + + /* Reserve the first inode (bit 0) to prevent it from being allocated later*/ + if (alloc_bit() != NO_BIT) printf("PFS could not reserve NO_BIT\n"); + busy = 0; /* This bit does not make the server 'in use/busy'. */ +} + + +/*===========================================================================* + * addhash_inode * + *===========================================================================*/ +PRIVATE void addhash_inode(struct inode * const node) +{ + int hashi = (int) (node->i_num & INODE_HASH_MASK); + + /* insert into hash table */ + LIST_INSERT_HEAD(&hash_inodes[hashi], node, i_hash); +} + + +/*===========================================================================* + * unhash_inode * + *===========================================================================*/ +PRIVATE void unhash_inode(struct inode * const node) +{ + /* remove from hash table */ + LIST_REMOVE(node, i_hash); +} + + +/*===========================================================================* + * get_inode * + *===========================================================================*/ +PUBLIC struct inode *get_inode( + dev_t dev, /* device on which inode resides */ + ino_t numb /* inode number */ +) +{ +/* Find the inode in the hash table. If it is not there, get a free inode + * load it from the disk if it's necessary and put on the hash list + */ + register struct inode *rip; + int hashi; + + hashi = (int) (numb & INODE_HASH_MASK); + + /* Search inode in the hash table */ + LIST_FOREACH(rip, &hash_inodes[hashi], i_hash) { + if (rip->i_num == numb && rip->i_dev == dev) { + /* If unused, remove it from the unused/free list */ + if (rip->i_count == 0) { + TAILQ_REMOVE(&unused_inodes, rip, i_unused); + } + ++rip->i_count; + + return(rip); + } + } + + /* Inode is not on the hash, get a free one */ + if (TAILQ_EMPTY(&unused_inodes)) { + err_code = ENFILE; + return(NULL); + } + rip = TAILQ_FIRST(&unused_inodes); + + /* If not free unhash it */ + if (rip->i_num != NO_ENTRY) unhash_inode(rip); + + /* Inode is not unused any more */ + TAILQ_REMOVE(&unused_inodes, rip, i_unused); + + /* Load the inode. */ + rip->i_dev = dev; + rip->i_num = numb; + rip->i_count = 1; + rip->i_update = 0; /* all the times are initially up-to-date */ + + /* Add to hash */ + addhash_inode(rip); + + + return(rip); +} + + +/*===========================================================================* + * find_inode * + *===========================================================================*/ +PUBLIC struct inode *find_inode(numb) +ino_t numb; /* inode number */ +{ +/* Find the inode specified by the inode and device number. + */ + struct inode *rip; + int hashi; + + hashi = (int) (numb & INODE_HASH_MASK); + + /* Search inode in the hash table */ + LIST_FOREACH(rip, &hash_inodes[hashi], i_hash) { + if (rip->i_count > 0 && rip->i_num == numb) { + return(rip); + } + } + + return(NULL); +} + + +/*===========================================================================* + * put_inode * + *===========================================================================*/ +PUBLIC void put_inode(rip) +struct inode *rip; /* pointer to inode to be released */ +{ +/* The caller is no longer using this inode. If no one else is using it either + * write it back to the disk immediately. If it has no links, truncate it and + * return it to the pool of available inodes. + */ + + if (rip == NULL) return; /* checking here is easier than in caller */ + + if (rip->i_count < 1) + panic("put_inode: i_count already below 1: %d", rip->i_count); + + if (--rip->i_count == 0) { /* i_count == 0 means no one is using it now */ + if (rip->i_nlinks == NO_LINK) { /* Are there links to this file? */ + /* no links, free the inode. */ + truncate_inode(rip, 0); /* return all the disk blocks */ + rip->i_mode = I_NOT_ALLOC; /* clear I_TYPE field */ + free_inode(rip); + } else { + truncate_inode(rip, (off_t) 0); + } + + if (rip->i_nlinks == NO_LINK) { + /* free, put at the front of the LRU list */ + unhash_inode(rip); + rip->i_num = NO_ENTRY; + rip->i_dev = NO_DEV; + rip->i_rdev = NO_DEV; + TAILQ_INSERT_HEAD(&unused_inodes, rip, i_unused); + } else { + /* unused, put at the back of the LRU (cache it) */ + TAILQ_INSERT_TAIL(&unused_inodes, rip, i_unused); + } + } +} + + +/*===========================================================================* + * alloc_inode * + *===========================================================================*/ +PUBLIC struct inode *alloc_inode(dev_t dev, mode_t bits) +{ +/* Allocate a free inode on 'dev', and return a pointer to it. */ + + register struct inode *rip; + bit_t b; + ino_t i_num; + + b = alloc_bit(); + if (b == NO_BIT) { + err_code = ENOSPC; + printf("PipeFS is out of inodes\n"); + return(NULL); + } + i_num = (ino_t) b; + + + /* Try to acquire a slot in the inode table. */ + if ((rip = get_inode(dev, i_num)) == NULL) { + /* No inode table slots available. Free the inode if just allocated.*/ + if (dev == NO_DEV) free_bit(b); + } else { + /* An inode slot is available. */ + + rip->i_mode = bits; /* set up RWX bits */ + rip->i_nlinks = NO_LINK; /* initial no links */ + rip->i_uid = caller_uid; /* file's uid is owner's */ + rip->i_gid = caller_gid; /* ditto group id */ + + /* Fields not cleared already are cleared in wipe_inode(). They have + * been put there because truncate() needs to clear the same fields if + * the file happens to be open while being truncated. It saves space + * not to repeat the code twice. + */ + wipe_inode(rip); + } + + return(rip); +} + + +/*===========================================================================* + * wipe_inode * + *===========================================================================*/ +PUBLIC void wipe_inode(rip) +struct inode *rip; /* the inode to be erased */ +{ +/* Erase some fields in the inode. This function is called from alloc_inode() + * when a new inode is to be allocated, and from truncate(), when an existing + * inode is to be truncated. + */ + + rip->i_size = 0; + rip->i_update = ATIME | CTIME | MTIME; /* update all times later */ +} + + +/*===========================================================================* + * free_inode * + *===========================================================================*/ +PUBLIC void free_inode(rip) +struct inode *rip; +{ +/* Return an inode to the pool of unallocated inodes. */ + + bit_t b; + + if (rip->i_num <= (ino_t) 0 || rip->i_num >= (ino_t) NR_INODES) return; + b = (bit_t) rip->i_num; + free_bit(b); +} + + +/*===========================================================================* + * update_times * + *===========================================================================*/ +PUBLIC void update_times(rip) +struct inode *rip; /* pointer to inode to be read/written */ +{ +/* Various system calls are required by the standard to update atime, ctime, + * or mtime. Since updating a time requires sending a message to the clock + * task--an expensive business--the times are marked for update by setting + * bits in i_update. When a stat, fstat, or sync is done, or an inode is + * released, update_times() may be called to actually fill in the times. + */ + + time_t cur_time; + + cur_time = clock_time(); + if (rip->i_update & ATIME) rip->i_atime = cur_time; + if (rip->i_update & CTIME) rip->i_ctime = cur_time; + if (rip->i_update & MTIME) rip->i_mtime = cur_time; + rip->i_update = 0; /* they are all up-to-date now */ +} diff --git a/servers/apfs/inode.h b/servers/apfs/inode.h new file mode 100644 index 000000000..19e582593 --- /dev/null +++ b/servers/apfs/inode.h @@ -0,0 +1,39 @@ +#ifndef __PFS_INODE_H__ +#define __PFS_INODE_H__ + +/* Inode table. This table holds inodes that are currently in use. + */ + +#include + +EXTERN struct inode { + mode_t i_mode; /* file type, protection, etc. */ + nlink_t i_nlinks; /* how many links to this file */ + uid_t i_uid; /* user id of the file's owner */ + gid_t i_gid; /* group number */ + off_t i_size; /* current file size in bytes */ + time_t i_atime; /* time of last access (V2 only) */ + time_t i_mtime; /* when was file data last changed */ + time_t i_ctime; /* when was inode itself changed (V2 only)*/ + + /* The following items are not present on the disk. */ + dev_t i_dev; /* which device is the inode on */ + dev_t i_rdev; /* which special device is the inode on */ + ino_t i_num; /* inode number on its (minor) device */ + int i_count; /* # times inode used; 0 means slot is free */ + char i_update; /* the ATIME, CTIME, and MTIME bits are here */ + + LIST_ENTRY(inode) i_hash; /* hash list */ + TAILQ_ENTRY(inode) i_unused; /* free and unused list */ + + +} inode[NR_INODES]; + +/* list of unused/free inodes */ +EXTERN TAILQ_HEAD(unused_inodes_t, inode) unused_inodes; + +/* inode hashtable */ +EXTERN LIST_HEAD(inodelist, inode) hash_inodes[INODE_HASH_SIZE]; + + +#endif diff --git a/servers/apfs/link.c b/servers/apfs/link.c new file mode 100644 index 000000000..4ec064ef9 --- /dev/null +++ b/servers/apfs/link.c @@ -0,0 +1,50 @@ +#include "fs.h" +#include "buf.h" +#include "inode.h" +#include + +/*===========================================================================* + * fs_ftrunc * + *===========================================================================*/ +PUBLIC int fs_ftrunc(message *fs_m_in, message *fs_m_out) +{ + struct inode *rip; + off_t start, end; + ino_t inumb; + + inumb = (ino_t) fs_m_in->REQ_INODE_NR; + + if( (rip = find_inode(inumb)) == NULL) return(EINVAL); + + start = fs_m_in->REQ_TRC_START_LO; + end = fs_m_in->REQ_TRC_END_LO; + + return truncate_inode(rip, start); +} + + +/*===========================================================================* + * truncate_inode * + *===========================================================================*/ +PUBLIC int truncate_inode(rip, newsize) +register struct inode *rip; /* pointer to inode to be truncated */ +off_t newsize; /* inode must become this size */ +{ +/* Set inode to a certain size, freeing any zones no longer referenced + * and updating the size in the inode. If the inode is extended, the + * extra space is a hole that reads as zeroes. + * + * Nothing special has to happen to file pointers if inode is opened in + * O_APPEND mode, as this is different per fd and is checked when + * writing is done. + */ + + /* Pipes can shrink, so adjust size to make sure all zones are removed. */ + if(newsize != 0) return(EINVAL); /* Only truncate pipes to 0. */ + rip->i_size = newsize; + + /* Next correct the inode size. */ + wipe_inode(rip); /* Pipes can only be truncated to 0. */ + + return(OK); +} diff --git a/servers/apfs/main.c b/servers/apfs/main.c new file mode 100644 index 000000000..2f9f4d4ed --- /dev/null +++ b/servers/apfs/main.c @@ -0,0 +1,187 @@ +#include "fs.h" +#include +#include +#include +#include +#include +#include +#include "buf.h" +#include "inode.h" +#include "uds.h" + +FORWARD _PROTOTYPE(void get_work, (message *m_in) ); + +/* SEF functions and variables. */ +FORWARD _PROTOTYPE( void sef_local_startup, (void) ); +FORWARD _PROTOTYPE( int sef_cb_init_fresh, (int type, sef_init_info_t *info) ); +FORWARD _PROTOTYPE( void sef_cb_signal_handler, (int signo) ); + +/*===========================================================================* + * main * + *===========================================================================*/ +PUBLIC int main(int argc, char *argv[]) +{ +/* This is the main routine of this service. The main loop consists of + * three major activities: getting new work, processing the work, and + * sending the reply. The loop never terminates, unless a panic occurs. + */ + int ind, do_reply, transid; + message pfs_m_in; + message pfs_m_out; + + /* SEF local startup. */ + env_setargs(argc, argv); + sef_local_startup(); + + printf("Started APFS\n"); + while(!unmountdone || !exitsignaled) { + endpoint_t src; + + do_reply = 1; + /* Wait for request message. */ + get_work(&pfs_m_in); + + transid = TRNS_GET_ID(pfs_m_in.m_type); + pfs_m_in.m_type = TRNS_DEL_ID(pfs_m_in.m_type); + if (pfs_m_in.m_type == 0) { + assert(!IS_VFS_FS_TRANSID(transid)); + pfs_m_in.m_type = transid; + transid = 0; + } else + assert(IS_VFS_FS_TRANSID(transid) || transid == 0); + + src = pfs_m_in.m_source; + caller_uid = INVAL_UID; /* To trap errors */ + caller_gid = INVAL_GID; + req_nr = pfs_m_in.m_type; + + if (IS_DEV_RQ(req_nr)) { + ind = req_nr - DEV_RQ_BASE; + if (ind < 0 || ind >= DEV_CALL_VEC_SIZE) { + printf("pfs: bad DEV request %d\n", req_nr); + pfs_m_out.m_type = EINVAL; + } else { + int result; + result = (*dev_call_vec[ind])(&pfs_m_in, &pfs_m_out); + if (pfs_m_out.REP_STATUS == SUSPEND || + result == SUSPEND) { + /* Nothing to tell, so not replying */ + do_reply = 0; + } + } + } else if (IS_VFS_RQ(req_nr)) { + ind = req_nr - VFS_BASE; + if (ind < 0 || ind >= FS_CALL_VEC_SIZE) { + printf("pfs: bad FS request %d\n", req_nr); + pfs_m_out.m_type = EINVAL; + } else { + pfs_m_out.m_type = + (*fs_call_vec[ind])(&pfs_m_in, &pfs_m_out); + } + } else { + printf("pfs: bad request %d\n", req_nr); + pfs_m_out.m_type = EINVAL; + } + + if (do_reply) { + if (IS_VFS_RQ(req_nr) && IS_VFS_FS_TRANSID(transid)) { + pfs_m_out.m_type = TRNS_ADD_ID(pfs_m_out.m_type, + transid); + } + reply(src, &pfs_m_out); + } + } + return(OK); +} + +/*===========================================================================* + * sef_local_startup * + *===========================================================================*/ +PRIVATE void sef_local_startup() +{ + /* Register init callbacks. */ + sef_setcb_init_fresh(sef_cb_init_fresh); + sef_setcb_init_restart(sef_cb_init_fail); + + /* No live update support for now. */ + + /* Register signal callbacks. */ + sef_setcb_signal_handler(sef_cb_signal_handler); + + /* Let SEF perform startup. */ + sef_startup(); +} + +/*===========================================================================* + * sef_cb_init_fresh * + *===========================================================================*/ +PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info) +{ +/* Initialize the pipe file server. */ + int i; + + /* Initialize main loop parameters. */ + exitsignaled = 0; /* No exit request seen yet. */ + busy = 0; /* Server is not 'busy' (i.e., inodes in use). */ + + /* Init inode table */ + for (i = 0; i < NR_INODES; ++i) { + inode[i].i_count = 0; + } + + init_inode_cache(); + uds_init(); + + SELF_E = getprocnr(); + buf_pool(); + + driver_announce(); + + return(OK); +} + +/*===========================================================================* + * sef_cb_signal_handler * + *===========================================================================*/ +PRIVATE void sef_cb_signal_handler(int signo) +{ + /* Only check for termination signal, ignore anything else. */ + if (signo != SIGTERM) return; + + + exitsignaled = 1; +} + +/*===========================================================================* + * get_work * + *===========================================================================*/ +PRIVATE void get_work(m_in) +message *m_in; /* pointer to message */ +{ + int r, srcok = 0, status; + endpoint_t src; + + do { + /* wait for a message */ + if ((r = sef_receive_status(ANY, m_in, &status)) != OK) + panic("sef_receive_status failed: %d", r); + src = m_in->m_source; + + if(src == VFS_PROC_NR) { + srcok = 1; /* Normal FS request. */ + } else + printf("PFS: unexpected source %d\n", src); + } while(!srcok); +} + + +/*===========================================================================* + * reply * + *===========================================================================*/ +PUBLIC void reply(who, m_out) +endpoint_t who; +message *m_out; /* report result */ +{ + if (OK != send(who, m_out)) /* send the message */ + printf("PFS(%d) was unable to send reply\n", SELF_E); +} diff --git a/servers/apfs/misc.c b/servers/apfs/misc.c new file mode 100644 index 000000000..2f8e3010d --- /dev/null +++ b/servers/apfs/misc.c @@ -0,0 +1,12 @@ +#include "fs.h" + + +/*===========================================================================* + * fs_sync * + *===========================================================================*/ +PUBLIC int fs_sync(message *fs_m_in, message *fs_m_out) +{ +/* Perform the sync() system call. No-op on this FS. */ + + return(OK); /* sync() can't fail */ +} diff --git a/servers/apfs/mount.c b/servers/apfs/mount.c new file mode 100644 index 000000000..61fbac78e --- /dev/null +++ b/servers/apfs/mount.c @@ -0,0 +1,18 @@ +#include "fs.h" +#include "glo.h" + + +/*===========================================================================* + * fs_unmount * + *===========================================================================*/ +PUBLIC int fs_unmount(message *fs_m_in, message *fs_m_out) +{ +/* Unmount Pipe File Server. */ + + if (busy) return(EBUSY); /* can't umount a busy file system */ + + /* Finish off the unmount. */ + unmountdone = TRUE; + + return(OK); +} diff --git a/servers/apfs/open.c b/servers/apfs/open.c new file mode 100644 index 000000000..a7e275757 --- /dev/null +++ b/servers/apfs/open.c @@ -0,0 +1,52 @@ +#include "fs.h" +#include +#include "buf.h" +#include "inode.h" +#include + + +/*===========================================================================* + * fs_newnode * + *===========================================================================*/ +PUBLIC int fs_newnode(message *fs_m_in, message *fs_m_out) +{ + register int r = OK; + mode_t bits; + struct inode *rip; + dev_t dev; + + caller_uid = (uid_t) fs_m_in->REQ_UID; + caller_gid = (gid_t) fs_m_in->REQ_GID; + bits = (mode_t) fs_m_in->REQ_MODE; + dev = (dev_t) fs_m_in->REQ_DEV; + + /* Try to allocate the inode */ + if( (rip = alloc_inode(dev, bits) ) == NULL) return(err_code); + + switch (bits & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + rip->i_rdev = dev; /* Major/minor dev numbers */ + break; + case S_IFIFO: + if ((get_block(dev, rip->i_num)) == NULL) + r = EIO; + break; + default: + r = EIO; /* Unsupported file type */ + } + + if (r != OK) { + free_inode(rip); + } else { + /* Fill in the fields of the response message */ + fs_m_out->RES_INODE_NR = rip->i_num; + fs_m_out->RES_MODE = rip->i_mode; + fs_m_out->RES_FILE_SIZE_LO = rip->i_size; + fs_m_out->RES_UID = rip->i_uid; + fs_m_out->RES_GID = rip->i_gid; + fs_m_out->RES_DEV = dev; + } + + return(r); +} diff --git a/servers/apfs/proto.h b/servers/apfs/proto.h new file mode 100644 index 000000000..fe28b2895 --- /dev/null +++ b/servers/apfs/proto.h @@ -0,0 +1,104 @@ +#ifndef __PFS_PROTO_H__ +#define __PFS_PROTO_H__ + +/* Function prototypes. */ + +/* Structs used in prototypes must be declared as such first. */ +struct buf; +struct inode; +struct sockaddr_un; +struct ancillary; + +/* buffer.c */ +_PROTOTYPE( struct buf *get_block, (dev_t dev, ino_t inum) ); +_PROTOTYPE( void put_block, (dev_t dev, ino_t inum) ); + +/* cache.c */ +_PROTOTYPE( void buf_pool, (void) ); + +/* inode.c */ +_PROTOTYPE( struct inode *alloc_inode, (dev_t dev, mode_t mode) ); +_PROTOTYPE( void dup_inode, (struct inode *ip) ); +_PROTOTYPE( struct inode *find_inode, (ino_t numb) ); +_PROTOTYPE( void free_inode, (struct inode *rip) ); +_PROTOTYPE( int fs_putnode, (message *fs_m_in, message *fs_m_out) ); +_PROTOTYPE( void init_inode_cache, (void) ); +_PROTOTYPE( struct inode *get_inode, (dev_t dev, ino_t numb) ); +_PROTOTYPE( void put_inode, (struct inode *rip) ); +_PROTOTYPE( void update_times, (struct inode *rip) ); +_PROTOTYPE( void wipe_inode, (struct inode *rip) ); + +/* link.c */ +_PROTOTYPE( int fs_ftrunc, (message *fs_m_in, message *fs_m_out) ); +_PROTOTYPE( int truncate_inode, (struct inode *rip, off_t newsize) ); + + +/* main.c */ +_PROTOTYPE( void reply, (endpoint_t who, message *m_out) ); + +/* misc.c */ +_PROTOTYPE( int fs_sync, (message *fs_m_in, message *fs_m_out) ); + +/* mount.c */ +_PROTOTYPE( int fs_unmount, (message *fs_m_in, message *fs_m_out) ); + +/* open.c */ +_PROTOTYPE( int fs_newnode, (message *fs_m_in, message *fs_m_out) ); + +/* read.c */ +_PROTOTYPE( int fs_readwrite, (message *fs_m_in, message *fs_m_out) ); + +/* utility.c */ +_PROTOTYPE( time_t clock_time, (void) ); +_PROTOTYPE( int no_sys, (message *pfs_m_in, message *pfs_m_out) ); + +/* stadir.c */ +_PROTOTYPE( int fs_stat, (message *fs_m_in, message *fs_m_out) ); + +/* super.c */ +_PROTOTYPE( bit_t alloc_bit, (void) ); +_PROTOTYPE( void free_bit, (bit_t bit_returned) ); + +/* dev_uds.c */ +_PROTOTYPE( int uds_open, (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int uds_close, (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int uds_read, (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int uds_write, (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int uds_ioctl, (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int uds_select, (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int uds_unsuspend, (endpoint_t m_source, int minor) ); +_PROTOTYPE( int uds_cancel, (message *dev_m_in, message *dev_m_out) ); + +/* uds.c */ +_PROTOTYPE( void uds_init, (void) ); +_PROTOTYPE( int do_accept, (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int do_connect, (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int do_listen, (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int do_socket, (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int do_bind, (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int do_getsockname, (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int do_getpeername, (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int do_shutdown, (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int do_socketpair, (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int do_getsockopt_sotype, + (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int do_getsockopt_peercred, + (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int do_getsockopt_sndbuf, + (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int do_setsockopt_sndbuf, + (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int do_getsockopt_rcvbuf, + (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int do_setsockopt_rcvbuf, + (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int do_sendto, (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int do_recvfrom, (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int do_sendmsg, (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int do_recvmsg, (message *dev_m_in, message *dev_m_out) ); +_PROTOTYPE( int perform_connection, + (message *dev_m_in, message *dev_m_out, + struct sockaddr_un *addr, int minorx, + int minory) ); +_PROTOTYPE( int clear_fds, (int minor, struct ancillary *data) ); +#endif diff --git a/servers/apfs/read.c b/servers/apfs/read.c new file mode 100644 index 000000000..b4f06b846 --- /dev/null +++ b/servers/apfs/read.c @@ -0,0 +1,89 @@ +#include "fs.h" +#include "buf.h" +#include +#include "inode.h" + + +/*===========================================================================* + * fs_readwrite * + *===========================================================================*/ +PUBLIC int fs_readwrite(message *fs_m_in, message *fs_m_out) +{ + int r, rw_flag; + struct buf *bp; + cp_grant_id_t gid; + off_t position, f_size; + unsigned int nrbytes, cum_io; + mode_t mode_word; + struct inode *rip; + ino_t inumb; + + r = OK; + cum_io = 0; + inumb = (ino_t) fs_m_in->REQ_INODE_NR; + + /* Find the inode referred */ + if ((rip = find_inode(inumb)) == NULL) return(EINVAL); + + mode_word = rip->i_mode & I_TYPE; + if (mode_word != I_NAMED_PIPE) return(EIO); + f_size = rip->i_size; + + /* Get the values from the request message */ + rw_flag = (fs_m_in->m_type == REQ_READ ? READING : WRITING); + gid = (cp_grant_id_t) fs_m_in->REQ_GRANT; + position = fs_m_in->REQ_SEEK_POS_LO; + nrbytes = (unsigned) fs_m_in->REQ_NBYTES; + + /* We can't read beyond the max file position */ + if (nrbytes > MAX_FILE_POS) return(EFBIG); + + if (rw_flag == WRITING) { + /* Check in advance to see if file will grow too big. */ + /* Casting nrbytes to signed is safe, because it's guaranteed not to + be beyond max signed value (i.e., MAX_FILE_POS). */ + if (position > PIPE_BUF - (signed) nrbytes) return(EFBIG); + } + + /* Mark inode in use */ + if ((get_inode(rip->i_dev, rip->i_num)) == NULL) return(err_code); + if ((bp = get_block(rip->i_dev, rip->i_num)) == NULL) return(err_code); + + if (rw_flag == READING) { + /* Copy a chunk from the block buffer to user space. */ + r = sys_safecopyto(VFS_PROC_NR, gid, (vir_bytes) 0, + (vir_bytes) (bp->b_data+position), (size_t) nrbytes, D); + } else { + /* Copy a chunk from user space to the block buffer. */ + r = sys_safecopyfrom(VFS_PROC_NR, gid, (vir_bytes) 0, + (vir_bytes) (bp->b_data+position), (size_t) nrbytes, D); + } + + if (r == OK) { + position += (signed) nrbytes; /* Update position */ + cum_io += nrbytes; + } + + fs_m_out->RES_SEEK_POS_LO = position; /* It might change later and the VFS + has to know this value */ + + /* On write, update file size and access time. */ + if (rw_flag == WRITING) { + if (position > f_size) rip->i_size = position; + } else { + if(position >= rip->i_size) { + /* All data in the pipe is read, so reset pipe pointers */ + rip->i_size = 0; /* no data left */ + position = 0; /* reset reader(s) */ + } + } + + bp->b_bytes = position; + if (rw_flag == READING) rip->i_update |= ATIME; + if (rw_flag == WRITING) rip->i_update |= CTIME | MTIME; + fs_m_out->RES_NBYTES = (size_t) cum_io; + put_inode(rip); + put_block(rip->i_dev, rip->i_num); + + return(r); +} diff --git a/servers/apfs/stadir.c b/servers/apfs/stadir.c new file mode 100644 index 000000000..7a49caff5 --- /dev/null +++ b/servers/apfs/stadir.c @@ -0,0 +1,70 @@ +#include "fs.h" +#include "inode.h" +#include +#include + + +/*===========================================================================* + * stat_inode * + *===========================================================================*/ +PRIVATE int stat_inode( + register struct inode *rip, /* pointer to inode to stat */ + endpoint_t who_e, /* Caller endpoint */ + cp_grant_id_t gid /* grant for the stat buf */ +) +{ +/* Common code for stat and fstat system calls. */ + mode_t type; + struct stat statbuf; + u32_t blocks; /* The unit of this is 512 */ + int r, s; + + type = rip->i_mode & I_TYPE; + s = (type == I_CHAR_SPECIAL || type == I_BLOCK_SPECIAL); + + /* Update the atime, ctime, and mtime fields in the inode, if need be. */ + if (rip->i_update) update_times(rip); + + blocks = rip->i_size / S_BLKSIZE; + if (rip->i_size % S_BLKSIZE != 0) + blocks += 1; + + memset(&statbuf, 0, sizeof(struct stat)); + + statbuf.st_dev = rip->i_dev; + statbuf.st_ino = rip->i_num; + statbuf.st_mode = rip->i_mode; + statbuf.st_nlink = rip->i_nlinks; + statbuf.st_uid = rip->i_uid; + statbuf.st_gid = (short int) rip->i_gid; + statbuf.st_rdev = (dev_t) (s ? rip->i_rdev : NO_DEV); + statbuf.st_size = rip->i_size; + if (!s) statbuf.st_mode &= ~I_REGULAR;/* wipe out I_REGULAR bit for pipes */ + statbuf.st_atime = rip->i_atime; + statbuf.st_mtime = rip->i_mtime; + statbuf.st_ctime = rip->i_ctime; + statbuf.st_blksize = PIPE_BUF; + statbuf.st_blocks = blocks; + + /* Copy the struct to user space. */ + r = sys_safecopyto(who_e, gid, (vir_bytes) 0, (vir_bytes) &statbuf, + (size_t) sizeof(statbuf), D); + + return(r); +} + + +/*===========================================================================* + * fs_stat * + *===========================================================================*/ +PUBLIC int fs_stat(message *fs_m_in, message *fs_m_out) +{ + register int r; /* return value */ + register struct inode *rip; /* target inode */ + + if( (rip = find_inode(fs_m_in->REQ_INODE_NR)) == NULL) return(EINVAL); + get_inode(rip->i_dev, rip->i_num); /* mark inode in use */ + r = stat_inode(rip, fs_m_in->m_source, (cp_grant_id_t) fs_m_in->REQ_GRANT); + put_inode(rip); /* release the inode */ + return(r); +} diff --git a/servers/apfs/super.c b/servers/apfs/super.c new file mode 100644 index 000000000..50f4e0ba5 --- /dev/null +++ b/servers/apfs/super.c @@ -0,0 +1,75 @@ +/* This file manages the super block table and the related data structures, + * namely, the bit maps that keep track of which zones and which inodes are + * allocated and which are free. When a new inode or zone is needed, the + * appropriate bit map is searched for a free entry. + * + * The entry points into this file are + * alloc_bit: somebody wants to allocate a zone or inode; find one + * free_bit: indicate that a zone or inode is available for allocation + */ + +#include "fs.h" +#include "buf.h" +#include "inode.h" +#include "const.h" + + +/*===========================================================================* + * alloc_bit * + *===========================================================================*/ +PUBLIC bit_t alloc_bit(void) +{ +/* Allocate a bit from a bit map and return its bit number. */ + bitchunk_t *wptr, *wlim; + bit_t b; + unsigned int i, bcount; + + bcount = FS_BITMAP_CHUNKS(NR_INODES); /* Inode map has this many chunks. */ + wlim = &inodemap[bcount]; /* Point to last chunk in inodemap. */ + + for (wptr = &inodemap[0]; wptr < wlim; wptr++) { + /* Does this word contain a free bit? */ + if (*wptr == (bitchunk_t) ~0) continue; /* No. Go to next word */ + + /* Find and allocate the free bit. */ + for (i = 0; (*wptr & (1 << i)) != 0; ++i) {} + + /* Get inode number */ + b = (bit_t) ((wptr - &inodemap[0]) * FS_BITCHUNK_BITS + i); + + /* Don't allocate bits beyond end of map. */ + if (b >= NR_INODES) break; + + /* Allocate and return bit number. */ + *wptr |= 1 << i; + + /* Mark server 'busy' */ + busy++; + return(b); + } + + return(NO_BIT); /* no bit could be allocated */ +} + + +/*===========================================================================* + * free_bit * + *===========================================================================*/ +PUBLIC void free_bit(bit_returned) +bit_t bit_returned; /* number of bit to insert into the inode map*/ +{ + bitchunk_t *k, mask; + bit_t bit; + unsigned word; + + /* Get word offset and bit within offset */ + word = (unsigned) (bit_returned / (bit_t) FS_BITCHUNK_BITS); + bit = bit_returned % (bit_t) FS_BITCHUNK_BITS; + + /* Unset bit */ + k = &inodemap[word]; + mask = (unsigned) 1 << bit; + *k &= ~mask; + + busy--; /* One inode less in use. */ +} diff --git a/servers/apfs/table.c b/servers/apfs/table.c new file mode 100644 index 000000000..760e5b21e --- /dev/null +++ b/servers/apfs/table.c @@ -0,0 +1,82 @@ + +/* This file contains the table used to map system call numbers onto the + * routines that perform them. + */ + +#define _TABLE + +#include "fs.h" +#include "inode.h" +#include "buf.h" +#include "uds.h" + +/* File System Handlers (pfs) */ +PUBLIC _PROTOTYPE (int (*fs_call_vec[]), + (message *fs_m_in, message *fs_m_out) ) = { + + no_sys, /* 0 not used */ + no_sys, /* 1 */ + fs_putnode, /* 2 */ + no_sys, /* 3 */ + fs_ftrunc, /* 4 */ + no_sys, /* 5 */ + no_sys, /* 6 */ + no_sys, /* 7 */ + fs_stat, /* 8 */ + no_sys, /* 9 */ + no_sys, /* 10 */ + no_sys, /* 11 */ + no_sys, /* 12 */ + no_sys, /* 13 */ + no_sys, /* 14 */ + fs_unmount, /* 15 */ + fs_sync, /* 16 */ + no_sys, /* 17 */ + no_sys, /* 18 */ + fs_readwrite, /* 19 */ + fs_readwrite, /* 20 */ + no_sys, /* 21 */ + no_sys, /* 22 */ + no_sys, /* 23 */ + no_sys, /* 24 */ + no_sys, /* 25 */ + no_sys, /* 26 */ + no_sys, /* 27 */ + no_sys, /* 28 */ + fs_newnode, /* 29 */ + no_sys, /* 30 */ + no_sys, /* 31 */ + no_sys, /* 32 */ +}; + +/* Device Handlers (/dev/uds) */ +PUBLIC _PROTOTYPE (int (*dev_call_vec[]), + (message *dev_m_in, message *dev_m_out) ) = { + + uds_cancel, /* 0 CANCEL */ + no_sys, /* 1 */ + no_sys, /* 2 */ + no_sys, /* 3 */ + no_sys, /* 4 */ + no_sys, /* 5 */ + uds_open, /* 6 DEV_OPEN */ + uds_close, /* 7 DEV_CLOSE */ + no_sys, /* 8 */ + no_sys, /* 9 */ + no_sys, /* 10 TTY_SETPGRP */ + no_sys, /* 11 TTY_EXIT */ + uds_select, /* 12 DEV_SELECT */ + no_sys, /* 13 DEV_STATUS */ + uds_open, /* 14 DEV_REOPEN */ + no_sys, /* 15 */ + no_sys, /* 16 */ + no_sys, /* 17 */ + no_sys, /* 18 */ + no_sys, /* 19 */ + uds_read, /* 20 DEV_READ_S */ + uds_write, /* 21 DEV_WRITE_S */ + no_sys, /* 22 DEV_SCATTER_S */ + no_sys, /* 23 DEV_GATHER_S */ + uds_ioctl, /* 24 DEV_IOCTL_S */ + no_sys, /* 25 DEV_MMAP_S */ +}; diff --git a/servers/apfs/uds.c b/servers/apfs/uds.c new file mode 100644 index 000000000..fed57afdf --- /dev/null +++ b/servers/apfs/uds.c @@ -0,0 +1,1528 @@ +/* + * Unix Domain Sockets Implementation (PF_UNIX, PF_LOCAL) + * This code handles ioctl(2) commands to implement the socket API. + * Some helper functions are also present. + * + * The entry points into this file are... + * + * uds_init: initialize the descriptor table. + * do_accept: handles the accept(2) syscall. + * do_connect: handles the connect(2) syscall. + * do_listen: handles the listen(2) syscall. + * do_socket: handles the socket(2) syscall. + * do_bind: handles the bind(2) syscall. + * do_getsockname: handles the getsockname(2) syscall. + * do_getpeername: handles the getpeername(2) syscall. + * do_shutdown: handles the shutdown(2) syscall. + * do_socketpair: handles the socketpair(2) syscall. + * do_getsockopt_sotype: handles the getsockopt(2) syscall. + * do_getsockopt_peercred: handles the getsockopt(2) syscall. + * do_getsockopt_sndbuf: handles the getsockopt(2) syscall. + * do_setsockopt_sndbuf: handles the setsockopt(2) syscall. + * do_getsockopt_rcvbuf: handles the getsockopt(2) syscall. + * do_setsockopt_rcvbuf: handles the setsockopt(2) syscall. + * do_sendto: handles the sendto(2) syscall. + * do_recvfrom: handles the recvfrom(2) syscall. + * do_sendmsg: handles the sendmsg(2) syscall. + * do_recvmsg: handles the recvmsg(2) syscall. + * perform_connection: performs the connection of two descriptors. + * clear_fds: calls put_filp for undelivered FDs. + * + * Also see... + * + * table.c, dev_uds.c, uds.h + */ + +#define DEBUG 0 + +#include "inc.h" +#include "const.h" +#include "glo.h" +#include "uds.h" + +/* File Descriptor Table */ +uds_fd_t uds_fd_table[NR_FDS]; + +/* initialize the descriptor table */ +PUBLIC void uds_init(void) +{ + /* + * Setting everything to NULL implicitly sets the + * state to UDS_FREE. + */ + memset(uds_fd_table, '\0', sizeof(uds_fd_t) * NR_FDS); +} + +/* check the permissions of a socket file */ +PRIVATE int check_perms(int minor, struct sockaddr_un *addr) +{ + int rc; + message vfs_m; + cp_grant_id_t grant_id; + + grant_id = cpf_grant_direct(VFS_PROC_NR, (vir_bytes) addr->sun_path, + UNIX_PATH_MAX, CPF_READ | CPF_WRITE); + + /* ask the VFS to verify the permissions */ + memset(&vfs_m, '\0', sizeof(message)); + + vfs_m.m_type = PFS_REQ_CHECK_PERMS; + vfs_m.USER_ENDPT = uds_fd_table[minor].owner; + vfs_m.IO_GRANT = (char *) grant_id; + vfs_m.COUNT = UNIX_PATH_MAX; + + rc = sendrec(VFS_PROC_NR, &vfs_m); + cpf_revoke(grant_id); + if (OK != rc) { + printf("(uds) sendrec error... req_nr: %d err: %d\n", + vfs_m.m_type, rc); + + return EIO; + } + +#if DEBUG == 1 + printf("(uds) VFS reply => %d\n", vfs_m.m_type); + printf("(uds) Canonical Path => %s\n", addr->sun_path); +#endif + + return vfs_m.m_type; /* return reply code OK, ELOOP, etc. */ +} + +PRIVATE filp_id_t verify_fd(endpoint_t ep, int fd) +{ + int rc; + message vfs_m; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) verify_fd(%d,%d) call_count=%d\n", ep, fd, + ++call_count); +#endif + + memset(&vfs_m, '\0', sizeof(message)); + + vfs_m.m_type = PFS_REQ_VERIFY_FD; + vfs_m.USER_ENDPT = ep; + vfs_m.COUNT = fd; + + rc = sendrec(VFS_PROC_NR, &vfs_m); + if (OK != rc) { + printf("(uds) sendrec error... req_nr: %d err: %d\n", + vfs_m.m_type, rc); + return NULL; + } + +#if DEBUG == 1 + printf("(uds) VFS reply => %d\n", vfs_m.m_type); +#endif + + return vfs_m.ADDRESS; +} + +PRIVATE int set_filp(filp_id_t sfilp) +{ + int rc; + message vfs_m; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) set_filp(0x%x) call_count=%d\n", sfilp, ++call_count); +#endif + + memset(&vfs_m, '\0', sizeof(message)); + + vfs_m.m_type = PFS_REQ_SET_FILP; + vfs_m.ADDRESS = sfilp; + + rc = sendrec(VFS_PROC_NR, &vfs_m); + if (OK != rc) { + printf("(uds) sendrec error... req_nr: %d err: %d\n", + vfs_m.m_type, rc); + return EIO; + } + +#if DEBUG == 1 + printf("(uds) VFS reply => %d\n", vfs_m.m_type); +#endif + return vfs_m.m_type; /* return reply code OK, ELOOP, etc. */ +} + +PRIVATE int copy_filp(endpoint_t to_ep, filp_id_t cfilp) +{ + int rc; + message vfs_m; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) copy_filp(%d, 0x%x) call_count=%d\n",to_ep, cfilp, + ++call_count); +#endif + + memset(&vfs_m, '\0', sizeof(message)); + + vfs_m.m_type = PFS_REQ_COPY_FILP; + vfs_m.USER_ENDPT = to_ep; + vfs_m.ADDRESS = cfilp; + + rc = sendrec(VFS_PROC_NR, &vfs_m); + if (OK != rc) { + printf("(uds) sendrec error... req_nr: %d err: %d\n", + vfs_m.m_type, rc); + return EIO; + } + +#if DEBUG == 1 + printf("(uds) VFS reply => %d\n", vfs_m.m_type); +#endif + return vfs_m.m_type; +} + +PRIVATE int put_filp(filp_id_t pfilp) +{ + int rc; + message vfs_m; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) put_filp(0x%x) call_count=%d\n", pfilp, ++call_count); +#endif + + memset(&vfs_m, '\0', sizeof(message)); + + vfs_m.m_type = PFS_REQ_PUT_FILP; + vfs_m.ADDRESS = pfilp; + + rc = sendrec(VFS_PROC_NR, &vfs_m); + if (OK != rc) { + printf("(uds) sendrec error... req_nr: %d err: %d\n", + vfs_m.m_type, rc); + return EIO; + } + +#if DEBUG == 1 + printf("(uds) VFS reply => %d\n", vfs_m.m_type); +#endif + return vfs_m.m_type; /* return reply code OK, ELOOP, etc. */ +} + +PRIVATE int cancel_fd(endpoint_t ep, int fd) +{ + int rc; + message vfs_m; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) cancel_fd(%d,%d) call_count=%d\n", ep, fd, ++call_count); +#endif + + memset(&vfs_m, '\0', sizeof(message)); + + vfs_m.m_type = PFS_REQ_CANCEL_FD; + vfs_m.USER_ENDPT = ep; + vfs_m.COUNT = fd; + + rc = sendrec(VFS_PROC_NR, &vfs_m); + if (OK != rc) { + printf("(uds) sendrec error... req_nr: %d err: %d\n", + vfs_m.m_type, rc); + return EIO; + } + +#if DEBUG == 1 + printf("(uds) VFS reply => %d\n", vfs_m.m_type); +#endif + return vfs_m.m_type; /* return reply code OK, ELOOP, etc. */ +} + +PUBLIC int perform_connection(message *dev_m_in, message *dev_m_out, + struct sockaddr_un *addr, int minorx, int minory) +{ + /* there are several places were a connection is established. */ + /* accept(2), connect(2), uds_status(2), socketpair(2) */ + /* This is a helper function to make sure it is done in the */ + /* same way in each place with the same validation checks. */ + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] perform_connection() call_count=%d\n", + uds_minor(dev_m_in), ++call_count); +#endif + + /* only connection oriented types are acceptable and only like + * types can connect to each other + */ + if ((uds_fd_table[minorx].type != SOCK_SEQPACKET && + uds_fd_table[minorx].type != SOCK_STREAM) || + uds_fd_table[minorx].type != uds_fd_table[minory].type) { + + /* sockets are not in a valid state */ + return EINVAL; + } + + /* connect the pair of sockets */ + uds_fd_table[minorx].peer = minory; + uds_fd_table[minory].peer = minorx; + + /* Set the address of both sockets */ + memcpy(&(uds_fd_table[minorx].addr), addr, sizeof(struct sockaddr_un)); + memcpy(&(uds_fd_table[minory].addr), addr, sizeof(struct sockaddr_un)); + + return OK; +} + + +PUBLIC int do_accept(message *dev_m_in, message *dev_m_out) +{ + int minor; + int minorparent; /* minor number of parent (server) */ + int minorpeer; + int rc, i; + struct sockaddr_un addr; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] do_accept() call_count=%d\n", + uds_minor(dev_m_in), ++call_count); +#endif + + /* Somewhat weird logic is used in this function, so here's an + * overview... The minor number is the server's client socket + * (the socket to be returned by accept()). The data waiting + * for us in the IO Grant is the address that the server is + * listening on. This function uses the address to find the + * server's descriptor. From there we can perform the + * connection or suspend and wait for a connect(). + */ + + minor = uds_minor(dev_m_in); + + if (uds_fd_table[minor].type != -1) { + /* this IOCTL must be called on a 'fresh' socket */ + return EINVAL; + } + + /* Get the server's address */ + rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT, + (vir_bytes) 0, (vir_bytes) &addr, sizeof(struct sockaddr_un), + D); + + if (rc != OK) { + return EIO; + } + + /* locate server socket */ + rc = -1; /* to trap error */ + + for (i = 0; i < NR_FDS; i++) { + + if (uds_fd_table[i].addr.sun_family == AF_UNIX && + !strncmp(addr.sun_path, + uds_fd_table[i].addr.sun_path, + UNIX_PATH_MAX) && + uds_fd_table[i].listening == 1) { + + rc = 0; + break; + } + } + + if (rc == -1) { + /* there is no server listening on addr. Maybe someone + * screwed up the ioctl()? + */ + return EINVAL; + } + + minorparent = i; /* parent */ + + /* we are the parent's child */ + uds_fd_table[minorparent].child = minor; + + /* the peer has the same type as the parent. we need to be that + * type too. + */ + uds_fd_table[minor].type = uds_fd_table[minorparent].type; + + /* locate peer to accept in the parent's backlog */ + minorpeer = -1; /* to trap error */ + for (i = 0; i < uds_fd_table[minorparent].backlog_size; i++) { + if (uds_fd_table[minorparent].backlog[i] != -1) { + minorpeer = uds_fd_table[minorparent].backlog[i]; + uds_fd_table[minorparent].backlog[i] = -1; + rc = 0; + break; + } + } + + if (minorpeer == -1) { + +#if DEBUG == 1 + printf("(uds) [%d] {do_accept} suspend\n", minor); +#endif + + /* there are no peers in the backlog, suspend and wait + * for some to show up + */ + uds_fd_table[minor].suspended = UDS_SUSPENDED_ACCEPT; + + return SUSPEND; + } + +#if DEBUG == 1 + printf("(uds) [%d] connecting to %d -- parent is %d\n", minor, + minorpeer, minorparent); +#endif + + rc = perform_connection(dev_m_in, dev_m_out, &addr, minor, minorpeer); + if (rc != OK) { +#if DEBUG == 1 + printf("(uds) [%d] {do_accept} connection not performed\n", + minor); +#endif + return rc; + } + + uds_fd_table[minorparent].child = -1; + + /* if peer is blocked on connect() revive peer */ + if (uds_fd_table[minorpeer].suspended) { +#if DEBUG == 1 + printf("(uds) [%d] {do_accept} revive %d\n", minor, + minorpeer); +#endif + uds_fd_table[minorpeer].ready_to_revive = 1; + uds_unsuspend(dev_m_in->m_source, minorpeer); + } + + return OK; +} + +PUBLIC int do_connect(message *dev_m_in, message *dev_m_out) +{ + int minor; + struct sockaddr_un addr; + int rc, i, j; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] do_connect() call_count=%d\n", uds_minor(dev_m_in), + ++call_count); +#endif + + minor = uds_minor(dev_m_in); + + /* only connection oriented sockets can connect */ + if (uds_fd_table[minor].type != SOCK_STREAM && + uds_fd_table[minor].type != SOCK_SEQPACKET) { + return EINVAL; + } + + if (uds_fd_table[minor].peer != -1) { + /* socket is already connected */ + return EISCONN; + } + + rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT, + (vir_bytes) 0, (vir_bytes) &addr, + sizeof(struct sockaddr_un), D); + + if (rc != OK) { + return EIO; + } + + rc = check_perms(minor, &addr); + if (rc != OK) { + /* permission denied, socket file doesn't exist, etc. */ + return rc; + } + + /* look for a socket of the same type that is listening on the + * address we want to connect to + */ + for (i = 0; i < NR_FDS; i++) { + + if (uds_fd_table[minor].type == uds_fd_table[i].type && + uds_fd_table[i].listening && + uds_fd_table[i].addr.sun_family == AF_UNIX && + !strncmp(addr.sun_path, uds_fd_table[i].addr.sun_path, + UNIX_PATH_MAX)) { + + if (uds_fd_table[i].child != -1) { + + /* the server is blocked on accept(2) -- + * perform connection to the child + */ + + rc = perform_connection(dev_m_in, dev_m_out, + &addr, minor, uds_fd_table[i].child); + + if (rc == OK) { + + uds_fd_table[i].child = -1; + +#if DEBUG == 1 + printf("(uds) [%d] {do_connect} revive %d\n", minor, i); +#endif + + /* wake the parent (server) */ + uds_fd_table[i].ready_to_revive = 1; + uds_unsuspend(dev_m_in->m_source, i); + } + + return rc; + + } else { + +#if DEBUG == 1 + printf("(uds) [%d] adding to %d's backlog\n", + minor, i); +#endif + + /* tell the server were waiting to be served */ + + /* look for a free slot in the backlog */ + rc = -1; /* to trap error */ + for (j = 0; j < uds_fd_table[i].backlog_size; + j++) { + + if (uds_fd_table[i].backlog[j] == -1) { + + uds_fd_table[i].backlog[j] = + minor; + + rc = 0; + break; + } + } + + if (rc == -1) { + + /* backlog is full */ + break; + } + + /* see if the server is blocked on select() */ + if (uds_fd_table[i].selecting == 1) { + + /* if the server wants to know + * about data ready to read and + * it doesn't know about it + * already, then let the server + * know we have data for it. + */ + if ((uds_fd_table[i].sel_ops_in & + SEL_RD) && + !(uds_fd_table[i].sel_ops_out & + SEL_RD)) { + + uds_fd_table[i].sel_ops_out |= + SEL_RD; + uds_fd_table[i].status_updated + = 1; + + uds_unsuspend( + dev_m_in->m_source, i); + } + } + + /* we found our server */ + uds_fd_table[minor].peer = i; + + /* set the address */ + memcpy(&(uds_fd_table[minor].addr), &addr, + sizeof(struct sockaddr_un)); + + break; + } + } + } + + if (uds_fd_table[minor].peer == -1) { + /* could not find another open socket listening on the + * specified address with room in the backlog + */ + return ECONNREFUSED; + } + +#if DEBUG == 1 + printf("(uds) [%d] {do_connect} suspend\n", minor); +#endif + + /* suspend until the server side completes the connection with accept() + */ + + uds_fd_table[minor].suspended = UDS_SUSPENDED_CONNECT; + + return SUSPEND; +} + +PUBLIC int do_listen(message *dev_m_in, message *dev_m_out) +{ + int minor; + int rc; + int backlog_size; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] do_listen() call_count=%d\n", uds_minor(dev_m_in), + ++call_count); +#endif + + minor = uds_minor(dev_m_in); + + /* ensure the socket has a type and is bound */ + if (uds_fd_table[minor].type == -1 || + uds_fd_table[minor].addr.sun_family != AF_UNIX) { + + /* probably trying to call listen() before bind() */ + return EINVAL; + } + + /* the two supported types for listen(2) are SOCK_STREAM and + * SOCK_SEQPACKET + */ + if (uds_fd_table[minor].type != SOCK_STREAM && + uds_fd_table[minor].type != SOCK_SEQPACKET) { + + /* probably trying to call listen() with a SOCK_DGRAM */ + return EOPNOTSUPP; + } + + /* The POSIX standard doesn't say what to do if listen() has + * already been called. Well, there isn't an errno. we silently + * let it happen, but if listen() has already been called, we + * don't allow the backlog to shrink + */ + rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT, + (vir_bytes) 0, (vir_bytes) &backlog_size, sizeof(int), D); + + if (rc != OK) { + return EIO; + } + + if (uds_fd_table[minor].listening == 0) { + + /* See if backlog_size is between 0 and UDS_SOMAXCONN */ + if (backlog_size >= 0 || backlog_size < UDS_SOMAXCONN) { + + /* use the user provided backlog_size */ + uds_fd_table[minor].backlog_size = backlog_size; + + } else { + + /* the user gave an invalid size, use + * UDS_SOMAXCONN instead + */ + uds_fd_table[minor].backlog_size = UDS_SOMAXCONN; + } + } else { + + /* See if the user is trying to expand the backlog_size */ + if (backlog_size > uds_fd_table[minor].backlog_size && + backlog_size < UDS_SOMAXCONN) { + + /* expand backlog_size */ + uds_fd_table[minor].backlog_size = backlog_size; + } + + /* Don't let the user shrink the backlog_size (we might + * have clients waiting in those slots + */ + } + + /* perform listen(2) */ + uds_fd_table[minor].listening = 1; + + return OK; +} + +PUBLIC int do_socket(message *dev_m_in, message *dev_m_out) +{ + int rc; + int minor; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] do_socket() call_count=%d\n", uds_minor(dev_m_in), + ++call_count); +#endif + + minor = uds_minor(dev_m_in); + + /* see if this socket already has a type */ + if (uds_fd_table[minor].type != -1) { + /* socket type can only be set once */ + return EINVAL; + } + + /* get the requested type */ + rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT, + (vir_bytes) 0, (vir_bytes) &(uds_fd_table[minor].type), + sizeof(int), D); + + if (rc != OK) { + + /* something went wrong and we couldn't get the type */ + return EIO; + } + + /* validate the type */ + switch (uds_fd_table[minor].type) { + case SOCK_STREAM: + case SOCK_DGRAM: + case SOCK_SEQPACKET: + + /* the type is one of the 3 valid socket types */ + return OK; + + default: + + /* if the type isn't one of the 3 valid socket + * types, then it must be invalid. + */ + + /* set the type back to '-1' (no type set) */ + uds_fd_table[minor].type = -1; + + return EINVAL; + } +} + +PUBLIC int do_bind(message *dev_m_in, message *dev_m_out) +{ + int minor; + struct sockaddr_un addr; + int rc, i; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] do_bind() call_count=%d\n", uds_minor(dev_m_in), + ++call_count); +#endif + + minor = uds_minor(dev_m_in); + + if ((uds_fd_table[minor].type == -1) || + (uds_fd_table[minor].addr.sun_family == AF_UNIX && + uds_fd_table[minor].type != SOCK_DGRAM)) { + + /* the type hasn't been set by do_socket() yet OR attempting + * to re-bind() a non-SOCK_DGRAM socket + */ + return EINVAL; + } + + rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT, + (vir_bytes) 0, (vir_bytes) &addr, sizeof(struct sockaddr_un), + D); + + if (rc != OK) { + return EIO; + } + + /* do some basic sanity checks on the address */ + if (addr.sun_family != AF_UNIX) { + + /* bad family */ + return EAFNOSUPPORT; + } + + if (addr.sun_path[0] == '\0') { + + /* bad address */ + return ENOENT; + } + + rc = check_perms(minor, &addr); + if (rc != OK) { + /* permission denied, socket file doesn't exist, etc. */ + return rc; + } + + /* make sure the address isn't already in use by another socket. */ + for (i = 0; i < NR_FDS; i++) { + if ((uds_fd_table[i].addr.sun_family == AF_UNIX) && + !strncmp(addr.sun_path, + uds_fd_table[i].addr.sun_path, UNIX_PATH_MAX)) { + + /* another socket is bound to this sun_path */ + return EADDRINUSE; + } + } + + /* looks good, perform the bind() */ + memcpy(&(uds_fd_table[minor].addr), &addr, sizeof(struct sockaddr_un)); + + return OK; +} + +PUBLIC int do_getsockname(message *dev_m_in, message *dev_m_out) +{ + int minor; + int rc; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] do_getsockname() call_count=%d\n", + uds_minor(dev_m_in), ++call_count); +#endif + + minor = uds_minor(dev_m_in); + + /* Unconditionally send the address we have assigned to this socket. + * The POSIX standard doesn't say what to do if the address + * hasn't been set. If the address isn't currently set, then + * the user will get NULL bytes. Note: libc depends on this + * behavior. + */ + rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT, + (vir_bytes) 0, (vir_bytes) &(uds_fd_table[minor].addr), + sizeof(struct sockaddr_un), D); + + return rc ? EIO : OK; +} + +PUBLIC int do_getpeername(message *dev_m_in, message *dev_m_out) +{ + int minor; + int rc; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] do_getpeername() call_count=%d\n", + uds_minor(dev_m_in), ++call_count); +#endif + + minor = uds_minor(dev_m_in); + + /* check that the socket is connected with a valid peer */ + if (uds_fd_table[minor].peer != -1) { + int peer_minor; + + peer_minor = uds_fd_table[minor].peer; + + /* copy the address from the peer */ + rc = sys_safecopyto(VFS_PROC_NR, + (cp_grant_id_t) dev_m_in->IO_GRANT, (vir_bytes) 0, + (vir_bytes) &(uds_fd_table[peer_minor].addr), + sizeof(struct sockaddr_un), D); + + return rc ? EIO : OK; + } else { + if (uds_fd_table[minor].err == ECONNRESET) { + uds_fd_table[minor].err = 0; + + return ECONNRESET; + } else { + return ENOTCONN; + } + } +} + +PUBLIC int do_shutdown(message *dev_m_in, message *dev_m_out) +{ + int minor; + int rc, how; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] do_shutdown() call_count=%d\n", + uds_minor(dev_m_in), ++call_count); +#endif + + minor = uds_minor(dev_m_in); + + if (uds_fd_table[minor].type != SOCK_STREAM && + uds_fd_table[minor].type != SOCK_SEQPACKET) { + + /* socket must be a connection oriented socket */ + return EINVAL; + } + + if (uds_fd_table[minor].peer == -1) { + /* shutdown(2) is only valid for connected sockets */ + if (uds_fd_table[minor].err == ECONNRESET) { + return ECONNRESET; + } else { + return ENOTCONN; + } + } + + /* get the 'how' parameter from the process */ + rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT, + (vir_bytes) 0, (vir_bytes) &how, sizeof(int), D); + + if (rc != OK) { + return EIO; + } + + switch (how) { + case SHUT_RD: + /* take away read permission */ + uds_fd_table[minor].mode = + uds_fd_table[minor].mode ^ S_IRUSR; + break; + + case SHUT_WR: + /* take away write permission */ + uds_fd_table[minor].mode = + uds_fd_table[minor].mode ^ S_IWUSR; + break; + + case SHUT_RDWR: + /* completely shutdown */ + uds_fd_table[minor].mode = 0; + break; + + default: + /* the 'how' parameter is invalid */ + return EINVAL; + } + + return OK; +} + +PUBLIC int do_socketpair(message *dev_m_in, message *dev_m_out) +{ + int rc; + dev_t minorin; + int minorx, minory; + struct sockaddr_un addr; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] do_socketpair() call_count=%d\n", + uds_minor(dev_m_in), ++call_count); +#endif + + /* first ioctl param is the first socket */ + minorx = uds_minor(dev_m_in); + + /* third ioctl param is the minor number of the second socket */ + rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT, + (vir_bytes) 0, (vir_bytes) &minorin, sizeof(dev_t), D); + + if (rc != OK) { + return EIO; + } + + minory = (minor(minorin) & BYTE); + +#if DEBUG == 1 + printf("socketpair() %d - %d\n", minorx, minory); +#endif + + /* security check - both sockets must have the same endpoint (owner) */ + if (uds_fd_table[minorx].owner != uds_fd_table[minory].owner) { + + /* we won't allow you to magically connect your socket to + * someone elses socket + */ + return EPERM; + } + + addr.sun_family = AF_UNIX; + addr.sun_path[0] = 'X'; + addr.sun_path[1] = '\0'; + + uds_fd_table[minorx].syscall_done = 1; + return perform_connection(dev_m_in, dev_m_out, &addr, minorx, minory); +} + +PUBLIC int do_getsockopt_sotype(message *dev_m_in, message *dev_m_out) +{ + int minor; + int rc; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] do_getsockopt_sotype() call_count=%d\n", + uds_minor(dev_m_in), ++call_count); +#endif + + minor = uds_minor(dev_m_in); + + if (uds_fd_table[minor].type == -1) { + + /* the type hasn't been set yet. instead of returning an + * invalid type, we fail with EINVAL + */ + return EINVAL; + } + + rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT, + (vir_bytes) 0, (vir_bytes) &(uds_fd_table[minor].type), + sizeof(int), D); + + return rc ? EIO : OK; +} + +PUBLIC int do_getsockopt_peercred(message *dev_m_in, message *dev_m_out) +{ + int minor; + int peer_minor; + int rc; + struct ucred cred; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] do_getsockopt_peercred() call_count=%d\n", + uds_minor(dev_m_in), ++call_count); +#endif + + minor = uds_minor(dev_m_in); + + if (uds_fd_table[minor].peer == -1) { + + if (uds_fd_table[minor].err == ECONNRESET) { + uds_fd_table[minor].err = 0; + + return ECONNRESET; + } else { + return ENOTCONN; + } + } + + peer_minor = uds_fd_table[minor].peer; + + /* obtain the peer's credentials */ + rc = getnucred(uds_fd_table[peer_minor].owner, &cred); + if (rc == -1) { + /* likely error: invalid endpoint / proc doesn't exist */ + return errno; + } + + rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT, + (vir_bytes) 0, (vir_bytes) &cred, sizeof(struct ucred), D); + + return rc ? EIO : OK; +} + +int do_getsockopt_sndbuf(message *dev_m_in, message *dev_m_out) +{ + int minor; + int rc; + size_t sndbuf = PIPE_BUF; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] do_getsockopt_sndbuf() call_count=%d\n", + uds_minor(dev_m_in), ++call_count); +#endif + + minor = uds_minor(dev_m_in); + + rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT, + (vir_bytes) 0, (vir_bytes) &(sndbuf), + sizeof(size_t), D); + + return rc ? EIO : OK; +} + +int do_setsockopt_sndbuf(message *dev_m_in, message *dev_m_out) +{ + int minor; + int rc; + size_t sndbuf; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] do_setsockopt_rcvbuf() call_count=%d\n", + uds_minor(dev_m_in), ++call_count); +#endif + + minor = uds_minor(dev_m_in); + + + rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT, + (vir_bytes) 0, (vir_bytes) &sndbuf, + sizeof(size_t), D); + + if (rc != OK) { + return EIO; + } + + if (sndbuf > PIPE_BUF) { + /* The send buffer is limited to 32K at the moment. */ + return ENOSYS; + } + + /* There is no way to reduce the send buffer, do we have to + * let this call fail for smaller buffers? + */ + return OK; +} + +int do_getsockopt_rcvbuf(message *dev_m_in, message *dev_m_out) +{ + int minor; + int rc; + size_t rcvbuf = PIPE_BUF; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] do_getsockopt_rcvbuf() call_count=%d\n", + uds_minor(dev_m_in), ++call_count); +#endif + + minor = uds_minor(dev_m_in); + + rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT, + (vir_bytes) 0, (vir_bytes) &(rcvbuf), + sizeof(size_t), D); + + return rc ? EIO : OK; +} + +int do_setsockopt_rcvbuf(message *dev_m_in, message *dev_m_out) +{ + int minor; + int rc; + size_t rcvbuf; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] do_setsockopt_rcvbuf() call_count=%d\n", + uds_minor(dev_m_in), ++call_count); +#endif + + minor = uds_minor(dev_m_in); + + + rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT, + (vir_bytes) 0, (vir_bytes) &rcvbuf, + sizeof(size_t), D); + + if (rc != OK) { + return EIO; + } + + if (rcvbuf > PIPE_BUF) { + /* The send buffer is limited to 32K at the moment. */ + return ENOSYS; + } + + /* There is no way to reduce the send buffer, do we have to + * let this call fail for smaller buffers? + */ + return OK; +} + + +PUBLIC int do_sendto(message *dev_m_in, message *dev_m_out) +{ + int minor; + int rc; + struct sockaddr_un addr; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] do_sendto() call_count=%d\n", uds_minor(dev_m_in), + ++call_count); +#endif + + minor = uds_minor(dev_m_in); + + if (uds_fd_table[minor].type != SOCK_DGRAM) { + /* This IOCTL is only for SOCK_DGRAM sockets */ + return EINVAL; + } + + rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT, + (vir_bytes) 0, (vir_bytes) &addr, sizeof(struct sockaddr_un), + D); + + if (rc != OK) { + return EIO; + } + + /* do some basic sanity checks on the address */ + if (addr.sun_family != AF_UNIX || addr.sun_path[0] == '\0') { + /* bad address */ + return EINVAL; + } + + rc = check_perms(minor, &addr); + if (rc != OK) { + return rc; + } + + memcpy(&(uds_fd_table[minor].target), &addr, + sizeof(struct sockaddr_un)); + + return OK; +} + +PUBLIC int do_recvfrom(message *dev_m_in, message *dev_m_out) +{ + int minor; + int rc; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] do_recvfrom() call_count=%d\n", + uds_minor(dev_m_in), ++call_count); +#endif + + minor = uds_minor(dev_m_in); + + rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT, + (vir_bytes) 0, (vir_bytes) &(uds_fd_table[minor].source), + sizeof(struct sockaddr_un), D); + + return rc ? EIO : OK; +} + +int msg_control_read(struct msg_control *msg_ctrl, struct ancillary *data, + int minor) +{ + int rc; + struct msghdr msghdr; + struct cmsghdr *cmsg = NULL; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] msg_control_read() call_count=%d\n", minor, + ++call_count); +#endif + + data->nfiledes = 0; + + memset(&msghdr, '\0', sizeof(struct msghdr)); + msghdr.msg_control = msg_ctrl->msg_control; + msghdr.msg_controllen = msg_ctrl->msg_controllen; + + for(cmsg = CMSG_FIRSTHDR(&msghdr); cmsg != NULL; + cmsg = CMSG_NXTHDR(&msghdr, cmsg)) { + + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_RIGHTS) { + + int i; + int nfds = + MIN((cmsg->cmsg_len-CMSG_LEN(0))/sizeof(int), + OPEN_MAX); + + for (i = 0; i < nfds; i++) { + if (data->nfiledes == OPEN_MAX) { + return EOVERFLOW; + } + + data->fds[data->nfiledes] = + ((int *) CMSG_DATA(cmsg))[i]; +#if DEBUG == 1 + printf("(uds) [%d] fd[%d]=%d\n", minor, + data->nfiledes, data->fds[data->nfiledes]); +#endif + data->nfiledes++; + } + } + } + + /* obtain this socket's credentials */ + rc = getnucred(uds_fd_table[minor].owner, &(data->cred)); + if (rc == -1) { + return errno; + } +#if DEBUG == 1 + printf("(uds) [%d] cred={%d,%d,%d}\n", minor, + data->cred.pid, data->cred.uid, + data->cred.gid); +#endif + return OK; +} + +PRIVATE int send_fds(int minor, struct ancillary *data) +{ + int rc, i, j; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] send_fds() call_count=%d\n", minor, ++call_count); +#endif + + /* verify the file descriptors and get their filps. */ + for (i = 0; i < data->nfiledes; i++) { + data->filps[i] = verify_fd(uds_fd_table[minor].owner, + data->fds[i]); + + if (data->filps[i] == NULL) { + return EINVAL; + } + } + + /* set them as in-flight */ + for (i = 0; i < data->nfiledes; i++) { + rc = set_filp(data->filps[i]); + if (rc != OK) { + /* revert set_filp() calls */ + for (j = i; j >= 0; j--) { + put_filp(data->filps[j]); + } + return rc; + } + } + + return OK; +} + +PUBLIC int clear_fds(int minor, struct ancillary *data) +{ +/* This function calls put_filp() for all of the FDs in data. + * This is used when a Unix Domain Socket is closed and there + * exists references to file descriptors that haven't been received + * with recvmsg(). + */ + int i; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] recv_fds() call_count=%d\n", minor, + ++call_count); +#endif + + for (i = 0; i < data->nfiledes; i++) { + put_filp(data->filps[i]); +#if DEBUG == 1 + printf("(uds) clear_fds() => %d\n", data->fds[i]); +#endif + data->fds[i] = -1; + data->filps[i] = NULL; + } + + data->nfiledes = 0; + + return OK; +} + +PRIVATE int recv_fds(int minor, struct ancillary *data, + struct msg_control *msg_ctrl) +{ + int rc, i, j; + struct msghdr msghdr; + struct cmsghdr *cmsg; + endpoint_t to_ep; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] recv_fds() call_count=%d\n", minor, + ++call_count); +#endif + + msghdr.msg_control = msg_ctrl->msg_control; + msghdr.msg_controllen = msg_ctrl->msg_controllen; + + cmsg = CMSG_FIRSTHDR(&msghdr); + cmsg->cmsg_len = CMSG_LEN(sizeof(int) * data->nfiledes); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + + to_ep = uds_fd_table[minor].owner; + + /* copy to the target endpoint */ + for (i = 0; i < data->nfiledes; i++) { + rc = copy_filp(to_ep, data->filps[i]); + if (rc < 0) { + /* revert set_filp() calls */ + for (j = 0; j < data->nfiledes; j++) { + put_filp(data->filps[j]); + } + /* revert copy_filp() calls */ + for (j = i; j >= 0; j--) { + cancel_fd(to_ep, data->fds[j]); + } + return rc; + } + data->fds[i] = rc; /* data->fds[i] now has the new FD */ + } + + for (i = 0; i < data->nfiledes; i++) { + put_filp(data->filps[i]); +#if DEBUG == 1 + printf("(uds) recv_fds() => %d\n", data->fds[i]); +#endif + ((int *)CMSG_DATA(cmsg))[i] = data->fds[i]; + data->fds[i] = -1; + data->filps[i] = NULL; + } + + data->nfiledes = 0; + + return OK; +} + +PRIVATE int recv_cred(int minor, struct ancillary *data, + struct msg_control *msg_ctrl) +{ + struct msghdr msghdr; + struct cmsghdr *cmsg; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] recv_cred() call_count=%d\n", minor, + ++call_count); +#endif + + msghdr.msg_control = msg_ctrl->msg_control; + msghdr.msg_controllen = msg_ctrl->msg_controllen; + + cmsg = CMSG_FIRSTHDR(&msghdr); + if (cmsg->cmsg_len > 0) { + cmsg = CMSG_NXTHDR(&msghdr, cmsg); + } + + cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_CREDENTIALS; + memcpy(CMSG_DATA(cmsg), &(data->cred), sizeof(struct ucred)); + + return OK; +} + +PUBLIC int do_sendmsg(message *dev_m_in, message *dev_m_out) +{ + int minor, peer, rc, i; + struct msg_control msg_ctrl; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] do_sendmsg() call_count=%d\n", + uds_minor(dev_m_in), ++call_count); +#endif + + minor = uds_minor(dev_m_in); + + memset(&msg_ctrl, '\0', sizeof(struct msg_control)); + + rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT, + (vir_bytes) 0, (vir_bytes) &msg_ctrl, + sizeof(struct msg_control), D); + + if (rc != OK) { + return EIO; + } + + /* locate peer */ + peer = -1; + if (uds_fd_table[minor].type == SOCK_DGRAM) { + if (uds_fd_table[minor].target.sun_path[0] == '\0' || + uds_fd_table[minor].target.sun_family != AF_UNIX) { + + return EDESTADDRREQ; + } + + for (i = 0; i < NR_FDS; i++) { + + /* look for a SOCK_DGRAM socket that is bound on + * the target address + */ + if (uds_fd_table[i].type == SOCK_DGRAM && + uds_fd_table[i].addr.sun_family == AF_UNIX && + !strncmp(uds_fd_table[minor].target.sun_path, + uds_fd_table[i].addr.sun_path, UNIX_PATH_MAX)){ + + peer = i; + break; + } + } + + if (peer == -1) { + return ENOENT; + } + } else { + peer = uds_fd_table[minor].peer; + if (peer == -1) { + return ENOTCONN; + } + } + +#if DEBUG == 1 + printf("(uds) [%d] sendmsg() -- peer=%d\n", minor, peer); +#endif + /* note: it's possible that there is already some file + * descriptors in ancillary_data if the peer didn't call + * recvmsg() yet. That's okay. The receiver will + * get the current file descriptors plus the new ones. + */ + rc = msg_control_read(&msg_ctrl, &uds_fd_table[peer].ancillary_data, + minor); + if (rc != OK) { + return rc; + } + + return send_fds(minor, &uds_fd_table[peer].ancillary_data); +} + +PUBLIC int do_recvmsg(message *dev_m_in, message *dev_m_out) +{ + int minor; + int rc; + struct msg_control msg_ctrl; + socklen_t controllen_avail = 0; + socklen_t controllen_needed = 0; + socklen_t controllen_desired = 0; + +#if DEBUG == 1 + static int call_count = 0; + printf("(uds) [%d] do_sendmsg() call_count=%d\n", + uds_minor(dev_m_in), ++call_count); +#endif + + minor = uds_minor(dev_m_in); + + +#if DEBUG == 1 + printf("(uds) [%d] CREDENTIALS {pid:%d,uid:%d,gid:%d}\n", minor, + uds_fd_table[minor].ancillary_data.cred.pid, + uds_fd_table[minor].ancillary_data.cred.uid, + uds_fd_table[minor].ancillary_data.cred.gid); +#endif + + memset(&msg_ctrl, '\0', sizeof(struct msg_control)); + + /* get the msg_control from the user, it will include the + * amount of space the user has allocated for control data. + */ + rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT, + (vir_bytes) 0, (vir_bytes) &msg_ctrl, + sizeof(struct msg_control), D); + + if (rc != OK) { + return EIO; + } + + controllen_avail = MIN(msg_ctrl.msg_controllen, MSG_CONTROL_MAX); + + if (uds_fd_table[minor].ancillary_data.nfiledes > 0) { + controllen_needed = CMSG_LEN(sizeof(int) * + (uds_fd_table[minor].ancillary_data.nfiledes)); + } + + /* if there is room we also include credentials */ + controllen_desired = controllen_needed + + CMSG_LEN(sizeof(struct ucred)); + + if (controllen_needed > controllen_avail) { + return EOVERFLOW; + } + + rc = recv_fds(minor, &uds_fd_table[minor].ancillary_data, &msg_ctrl); + if (rc != OK) { + return rc; + } + + if (controllen_desired <= controllen_avail) { + rc = recv_cred(minor, &uds_fd_table[minor].ancillary_data, + &msg_ctrl); + if (rc != OK) { + return rc; + } + } + + /* send the user the control data */ + rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT, + (vir_bytes) 0, (vir_bytes) &msg_ctrl, + sizeof(struct msg_control), D); + + return rc ? EIO : OK; +} diff --git a/servers/apfs/uds.h b/servers/apfs/uds.h new file mode 100644 index 000000000..2c3d85520 --- /dev/null +++ b/servers/apfs/uds.h @@ -0,0 +1,250 @@ +#ifndef __PFS_UDS_H__ +#define __PFS_UDS_H__ + +/* + * Unix Domain Sockets Implementation (PF_UNIX, PF_LOCAL) + * + * Also See... + * + * dev_uds.c, table.c, uds.c + */ + +#include +#include +#include +#include + +#include + +/* max connection backlog for incoming connections */ +#define UDS_SOMAXCONN 64 + +typedef void* filp_id_t; + +/* ancillary data to be sent */ +struct ancillary { + filp_id_t filps[OPEN_MAX]; + int fds[OPEN_MAX]; + int nfiledes; + struct ucred cred; +}; + +/* + * Internal State Information for a socket descriptor. + */ +struct uds_fd { + +/* Flags */ + + enum UDS_STATE { + /* This file descriptor is UDS_FREE and can be allocated. */ + UDS_FREE = 0, + + /* OR it is UDS_INUSE and can't be allocated. */ + UDS_INUSE = 1 + + /* state is set to UDS_INUSE in uds_open(). state is Set to + * UDS_FREE in uds_init() and uds_close(). state should be + * checked prior to all operations. + */ + } state; + +/* Owner Info */ + + /* Socket Owner */ + endpoint_t owner; + + /* endpoint for suspend/resume */ + endpoint_t endpoint; + +/* Pipe Housekeeping */ + + /* inode number on PFS -- each descriptor is backed by 1 + * PIPE which is allocated in uds_open() and freed in + * uds_close(). Data is sent/written to a peer's PIPE. + * Data is recv/read from this PIPE. + */ + ino_t inode_nr; + + + /* position in the PIPE where the data starts */ + off_t pos; + + /* size of data in the PIPE */ + size_t size; + + /* control read/write, set by uds_open() and shutdown(2). + * Can be set to S_IRUSR|S_IWUSR, S_IRUSR, S_IWUSR, or 0 + * for read and write, read only, write only, or neither. + * default is S_IRUSR|S_IWUSR. + */ + mode_t mode; + +/* Socket Info */ + + + /* socket type - SOCK_STREAM, SOCK_DGRAM, or SOCK_SEQPACKET + * Set by uds_ioctl(NWIOSUDSTYPE). It defaults to -1 in + * uds_open(). Any action on a socket with type -1 besides + * uds_ioctl(NWIOSUDSTYPE) and uds_close() will result in + * an error. + */ + int type; + + /* queue of pending connections for server sockets. + * connect(2) inserts and accept(2) removes from the queue + */ + int backlog[UDS_SOMAXCONN]; + + /* requested connection backlog size. Set by listen(2) + * Bounds (0 <= backlog_size <= UDS_SOMAXCONN) + * Defaults to UDS_SOMAXCONN which is defined above. + */ + unsigned char backlog_size; + + /* index of peer in uds_fd_table for connected sockets. + * -1 is used to mean no peer. Assumptions: peer != -1 means + * connected. + */ + int peer; + + /* index of child (client sd returned by accept(2)) + * -1 is used to mean no child. + */ + int child; + + /* address -- the address the socket is bound to. + * Assumptions: addr.sun_family == AF_UNIX means its bound. + */ + struct sockaddr_un addr; + + /* target -- where DGRAMs are sent to on the next uds_write(). */ + struct sockaddr_un target; + + /* source -- address where DGRAMs are from. used to fill in the + * from address in recvfrom(2) and recvmsg(2). + */ + struct sockaddr_un source; + + /* Flag (1 or 0) - listening for incoming connections. + * Default to 0. Set to 1 by do_listen() + */ + int listening; + + /* stores file pointers and credentials being sent between + * processes with sendmsg(2) and recvmsg(2). + */ + struct ancillary ancillary_data; + + /* Holds an errno. This is set when a connected socket is + * closed and we need to pass ECONNRESET on to a suspended + * peer. + */ + int err; + +/* Suspend/Revive Housekeeping */ + + + /* SUSPEND State Flags */ + enum UDS_SUSPENDED { + + /* Socket isn't blocked. */ + UDS_NOT_SUSPENDED = 0, + + /* Socket is blocked on read(2) waiting for data to read. */ + UDS_SUSPENDED_READ = 1, + + /* Socket is blocked on write(2) for space to write data. */ + UDS_SUSPENDED_WRITE = 2, + + /* Socket is blocked on connect(2) waiting for the server. */ + UDS_SUSPENDED_CONNECT = 4, + + /* Socket is blocked on accept(2) waiting for clients. */ + UDS_SUSPENDED_ACCEPT = 8 + } suspended; + + /* Flag (1 or 0) - thing socket was waiting for is ready. + * If 1, then uds_status() will attempt the operation that + * the socket was blocked on. + */ + int ready_to_revive; + + /* i/o grant, saved for later use by suspended procs */ + cp_grant_id_t io_gr; + + /* is of i/o grant, saved for later use by suspended procs */ + size_t io_gr_size; + + /* Save the call number so that uds_cancel() can unwind the + * call properly. + */ + int call_nr; + + /* Save the IOCTL so uds_cancel() knows what got cancelled. */ + int ioctl; + + /* Flag (1 or 0) - the system call completed. + * A doc I read said DEV_CANCEL might be called even though + * the operation is finished. We use this variable to + * determine if we should rollback the changes or not. + */ + int syscall_done; + +/* select() */ + + /* Flag (1 or 0) - the process blocked on select(2). When + * selecting is 1 and I/O happens on this socket, then + * select_proc should be notified. + */ + int selecting; + + /* when a select is in progress, we notify() this endpoint + * of new data. + */ + endpoint_t select_proc; + + /* Options (SEL_RD, SEL_WR, SEL_ERR) that are requested. */ + int sel_ops_in; + + /* Options that are available for this socket. */ + int sel_ops_out; + + /* Flag (1 or 0) to be set to one before calling notify(). + * uds_status() will use the flag to locate this descriptor. + */ + int status_updated; +}; + +typedef struct uds_fd uds_fd_t; + +/* File Descriptor Table -- Defined in uds.c */ +EXTERN uds_fd_t uds_fd_table[NR_FDS]; + +/* + * Take message m and get the index in uds_fd_table. + */ +#define uds_minor(m) (minor((dev_t) m->DEVICE) & BYTE) + +/* + * Fill in a reply message. + */ +#define uds_set_reply(msg,type,endpoint,io_gr,status) \ + do { \ + (msg)->m_type = type; \ + (msg)->REP_ENDPT = endpoint; \ + (msg)->REP_IO_GRANT = io_gr; \ + (msg)->REP_STATUS = status; \ + } while (0) + +#define uds_sel_reply(msg,type,minor,ops) \ + do { \ + (msg)->m_type = type; \ + (msg)->DEV_MINOR = minor; \ + (msg)->DEV_SEL_OPS = ops; \ + } while (0) + + + + +#endif diff --git a/servers/apfs/utility.c b/servers/apfs/utility.c new file mode 100644 index 000000000..fac9ec625 --- /dev/null +++ b/servers/apfs/utility.c @@ -0,0 +1,33 @@ +#include "fs.h" + + +/*===========================================================================* + * no_sys * + *===========================================================================*/ +PUBLIC int no_sys(message *pfs_m_in, message *pfs_m_out) +{ +/* Somebody has used an illegal system call number */ + printf("no_sys: invalid call 0x%x to pfs\n", req_nr); + return(EINVAL); +} + + +/*===========================================================================* + * clock_time * + *===========================================================================*/ +PUBLIC time_t clock_time() +{ +/* This routine returns the time in seconds since 1.1.1970. MINIX is an + * astrophysically naive system that assumes the earth rotates at a constant + * rate and that such things as leap seconds do not exist. + */ + + int r; + clock_t uptime; /* Uptime in ticks */ + time_t boottime; + + if ((r = getuptime2(&uptime, &boottime)) != OK) + panic("clock_time: getuptme2 failed: %d", r); + + return( (time_t) (boottime + (uptime/sys_hz()))); +} diff --git a/servers/avfs/Makefile b/servers/avfs/Makefile new file mode 100644 index 000000000..1fc72128e --- /dev/null +++ b/servers/avfs/Makefile @@ -0,0 +1,25 @@ +# Makefile for Virtual File System (VFS) +.include + +PROG= vfs +SRCS= main.c open.c read.c write.c pipe.c dmap.c \ + path.c device.c mount.c link.c exec.c \ + filedes.c stadir.c protect.c time.c \ + lock.c misc.c utility.c select.c table.c \ + vnode.c vmnt.c request.c fscall.c \ + tll.c comm.c worker.c + +.if ${MKCOVERAGE} != "no" +SRCS+= gcov.c +CPPFLAGS+= -DUSE_COVERAGE +.endif + +DPADD+= ${LIBSYS} ${LIBTIMERS} ${LIBEXEC} +LDADD+= -lsys -ltimers -lexec -lmthread + +MAN= + +BINDIR?= /usr/sbin +INSTALLFLAGS+= -S 16k + +.include diff --git a/servers/avfs/comm.c b/servers/avfs/comm.c new file mode 100644 index 000000000..b7254463a --- /dev/null +++ b/servers/avfs/comm.c @@ -0,0 +1,163 @@ +#include "fs.h" +#include "glo.h" +#include "vmnt.h" +#include "fproc.h" +#include +#include + +FORWARD _PROTOTYPE( int sendmsg, (struct vmnt *vmp, struct fproc *rfp) ); +FORWARD _PROTOTYPE( int queuemsg, (struct vmnt *vmp) ); + +/*===========================================================================* + * sendmsg * + *===========================================================================*/ +PRIVATE int sendmsg(vmp, rfp) +struct vmnt *vmp; +struct fproc *rfp; +{ +/* This is the low level function that sends requests to FS processes. + */ + int r, transid; + + if (vmp->m_fs_e == rfp->fp_endpoint) return(EDEADLK); + vmp->m_comm.c_cur_reqs++; /* One more request awaiting a reply */ + + transid = rfp->fp_wtid + VFS_TRANSID; + rfp->fp_sendrec->m_type = TRNS_ADD_ID(rfp->fp_sendrec->m_type, transid); + if ((r = asynsend3(vmp->m_fs_e, rfp->fp_sendrec, AMF_NOREPLY)) != OK) { + printf("VFS: sendmsg: error sending message. " + "FS_e: %d req_nr: %d err: %d\n", vmp->m_fs_e, + rfp->fp_sendrec->m_type, r); + util_stacktrace(); + return(r); + } + + return(r); +} + +/*===========================================================================* + * send_work * + *===========================================================================*/ +PUBLIC void send_work(void) +{ +/* Try to send out as many requests as possible */ + struct vmnt *vmp; + + if (sending == 0) return; + for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; vmp++) + fs_sendmore(vmp); +} + +/*===========================================================================* + * fs_sendmore * + *===========================================================================*/ +PUBLIC void fs_sendmore(struct vmnt *vmp) +{ + struct worker_thread *worker; + + /* Can we send more requests? */ + if (vmp->m_fs_e == NONE) return; + if ((worker = vmp->m_comm.c_req_queue) == NULL) /* No process is queued */ + return; + if (vmp->m_comm.c_cur_reqs >= vmp->m_comm.c_max_reqs)/*No room to send more*/ + return; + if (vmp->m_flags & VMNT_BACKCALL) /* Hold off for now */ + return; + + vmp->m_comm.c_req_queue = worker->w_next; /* Remove head */ + worker->w_next = NULL; + sending--; + assert(sending >= 0); + sendmsg(vmp, worker->w_job.j_fp); +} + +/*===========================================================================* + * fs_sendrec * + *===========================================================================*/ +PUBLIC int fs_sendrec(endpoint_t fs_e, message *reqmp) +{ + struct vmnt *vmp; + int r; + + if ((vmp = find_vmnt(fs_e)) == NULL) + panic("Trying to talk to non-existent FS"); + + if (!force_sync) { + fp->fp_sendrec = reqmp; /* Where to store request and reply */ + + /* Find out whether we can send right away or have to enqueue */ + if ( !(vmp->m_flags & VMNT_BACKCALL) && + vmp->m_comm.c_cur_reqs < vmp->m_comm.c_max_reqs) { + /* There's still room to send more and no proc is queued */ + r = sendmsg(vmp, fp); + } else { + r = queuemsg(vmp); + } + self->w_next = NULL; /* End of list */ + + if (r != OK) return(r); + + worker_wait(); /* Yield execution until we've received the reply. */ + } else if (force_sync == 1) { + int r; + if (OK != (r = sendrec(fs_e, reqmp))) { + printf("VFS: sendrec failed: %d\n", r); + util_stacktrace(); + return(r); + } + } else if (force_sync == 2) { + int r, status; + if (OK != (r = asynsend(fs_e, reqmp)) || + OK != (r = receive(fs_e, reqmp, &status))) { + printf("VFS: asynrec failed: %d\n", r); + util_stacktrace(); + return(r); + } + } else if (force_sync == 3) { + int r, status; + if (OK != (r = send(fs_e, reqmp)) || + OK != (r = receive(fs_e, reqmp, &status))) { + printf("VFS: sendreceive failed: %d\n", r); + util_stacktrace(); + return(r); + } + } + + if (reqmp->m_type == -EENTERMOUNT || reqmp->m_type == -ELEAVEMOUNT || + reqmp->m_type == -ESYMLINK) { + reqmp->m_type = -reqmp->m_type; + } else if (force_sync != 0 && reqmp->m_type > 0) { + /* XXX: Keep this as long as we're interested in having support + * for synchronous communication. */ + nested_fs_call(reqmp); + return fs_sendrec(fs_e, reqmp); + } + + return(reqmp->m_type); +} + +/*===========================================================================* + * queuemsg * + *===========================================================================*/ +PRIVATE int queuemsg(struct vmnt *vmp) +{ +/* Put request on queue for vmnt */ + + struct worker_thread *queue; + + if (vmp->m_comm.c_req_queue == NULL) { + vmp->m_comm.c_req_queue = self; + } else { + /* Walk the list ... */ + queue = vmp->m_comm.c_req_queue; + while (queue->w_next != NULL) queue = queue->w_next; + + /* ... and append this worker */ + queue->w_next = self; + } + + self->w_next = NULL; /* End of list */ + sending++; + + return(OK); +} diff --git a/servers/avfs/comm.h b/servers/avfs/comm.h new file mode 100644 index 000000000..4e0d00cc3 --- /dev/null +++ b/servers/avfs/comm.h @@ -0,0 +1,12 @@ +#ifndef __VFS_COMM_H__ +#define __VFS_COMM_H__ + +/* VFS<->FS communication */ + +typedef struct { + int c_max_reqs; /* Max requests an FS can handle simultaneously */ + int c_cur_reqs; /* Number of requests the FS is currently handling */ + struct worker_thread *c_req_queue;/* Queue of procs waiting to send a message */ +} comm_t; + +#endif diff --git a/servers/avfs/const.h b/servers/avfs/const.h new file mode 100644 index 000000000..44a339d7c --- /dev/null +++ b/servers/avfs/const.h @@ -0,0 +1,50 @@ +#ifndef __VFS_CONST_H__ +#define __VFS_CONST_H__ + +/* Tables sizes */ +#define NR_FILPS 512 /* # slots in filp table */ +#define NR_LOCKS 8 /* # slots in the file locking table */ +#define NR_MNTS 16 /* # slots in mount table */ +#define NR_VNODES 512 /* # slots in vnode table */ +#define NR_WTHREADS 8 /* # slots in worker thread table */ + +#define NR_NONEDEVS NR_MNTS /* # slots in nonedev bitmap */ + +/* Miscellaneous constants */ +#define SU_UID ((uid_t) 0) /* super_user's uid_t */ +#define SYS_UID ((uid_t) 0) /* uid_t for system processes and INIT */ +#define SYS_GID ((gid_t) 0) /* gid_t for system processes and INIT */ + +#define FP_BLOCKED_ON_NONE 0 /* not blocked */ +#define FP_BLOCKED_ON_PIPE 1 /* susp'd on pipe */ +#define FP_BLOCKED_ON_LOCK 2 /* susp'd on lock */ +#define FP_BLOCKED_ON_POPEN 3 /* susp'd on pipe open */ +#define FP_BLOCKED_ON_SELECT 4 /* susp'd on select */ +#define FP_BLOCKED_ON_DOPEN 5 /* susp'd on device open */ +#define FP_BLOCKED_ON_OTHER 6 /* blocked on other process, check + fp_task to find out */ + +/* test if the process is blocked on something */ +#define fp_is_blocked(fp) ((fp)->fp_blocked_on != FP_BLOCKED_ON_NONE) + +#define DUP_MASK 0100 /* mask to distinguish dup2 from dup */ + +#define LOOK_UP 0 /* tells search_dir to lookup string */ +#define ENTER 1 /* tells search_dir to make dir entry */ +#define DELETE 2 /* tells search_dir to delete entry */ +#define IS_EMPTY 3 /* tells search_dir to ret. OK or ENOTEMPTY */ + +#define SYMLOOP 16 + +#define LABEL_MAX 16 /* maximum label size (including '\0'). Should + * not be smaller than 16 or bigger than + * M3_LONG_STRING. + */ + +/* Args to dev_io */ +#define VFS_DEV_READ 2001 +#define VFS_DEV_WRITE 2002 +#define VFS_DEV_IOCTL 2005 +#define VFS_DEV_SELECT 2006 + +#endif diff --git a/servers/avfs/device.c b/servers/avfs/device.c new file mode 100644 index 000000000..4fc25ab95 --- /dev/null +++ b/servers/avfs/device.c @@ -0,0 +1,1060 @@ +/* When a needed block is not in the cache, it must be fetched from the disk. + * Special character files also require I/O. The routines for these are here. + * + * The entry points in this file are: + * dev_open: FS opens a device + * dev_close: FS closes a device + * dev_io: FS does a read or write on a device + * dev_status: FS processes callback request alert + * gen_opcl: generic call to a task to perform an open/close + * gen_io: generic call to a task to perform an I/O operation + * no_dev: open/close processing for devices that don't exist + * no_dev_io: i/o processing for devices that don't exist + * tty_opcl: perform tty-specific processing for open/close + * ctty_opcl: perform controlling-tty-specific processing for open/close + * ctty_io: perform controlling-tty-specific processing for I/O + * pm_setsid: perform VFS's side of setsid system call + * do_ioctl: perform the IOCTL system call + */ + +#include "fs.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include "file.h" +#include "fproc.h" +#include "dmap.h" +#include +#include "vnode.h" +#include "vmnt.h" +#include "param.h" + +FORWARD _PROTOTYPE( void restart_reopen, (int major) ); +FORWARD _PROTOTYPE( int safe_io_conversion, (endpoint_t, cp_grant_id_t *, + int *, + endpoint_t *, void **, + size_t, u32_t *) ); + +PRIVATE int dummyproc; + + +/*===========================================================================* + * dev_open * + *===========================================================================*/ +PUBLIC int dev_open( + dev_t dev, /* device to open */ + endpoint_t proc_e, /* process to open for */ + int flags /* mode bits and flags */ +) +{ + int major, r; + + /* Determine the major device number call the device class specific + * open/close routine. (This is the only routine that must check the + * device number for being in range. All others can trust this check.) + */ + major = major(dev); + if (major < 0 || major >= NR_DEVICES) major = 0; + if (dmap[major].dmap_driver == NONE) return(ENXIO); + r = (*dmap[major].dmap_opcl)(DEV_OPEN, dev, proc_e, flags); + return(r); +} + + +/*===========================================================================* + * dev_reopen * + *===========================================================================*/ +PUBLIC int dev_reopen( + dev_t dev, /* device to open */ + int filp_no, /* filp to reopen for */ + int flags /* mode bits and flags */ +) +{ +/* Reopen a device after a failing device driver */ + + int major, r; + struct dmap *dp; + + /* Determine the major device number and call the device class specific + * open/close routine. (This is the only routine that must check the device + * number for being in range. All others can trust this check.) + */ + + major = major(dev); + if (major < 0 || major >= NR_DEVICES) major = 0; + dp = &dmap[major]; + if (dp->dmap_driver == NONE) return(ENXIO); + r = (*dp->dmap_opcl)(DEV_REOPEN, dev, filp_no, flags); + if (r == SUSPEND) r = OK; + return(r); +} + + +/*===========================================================================* + * dev_close * + *===========================================================================*/ +PUBLIC int dev_close( + dev_t dev, /* device to close */ + int filp_no +) +{ +/* Close a device */ + int r, major; + + /* See if driver is roughly valid. */ + major = major(dev); + if (major < 0 || major >= NR_DEVICES) return(ENXIO); + if (dmap[major].dmap_driver == NONE) return(ENXIO); + r = (*dmap[major].dmap_opcl)(DEV_CLOSE, dev, filp_no, 0); + return(r); +} + + +/*===========================================================================* + * find_suspended_ep * + *===========================================================================*/ +endpoint_t find_suspended_ep(endpoint_t driver, cp_grant_id_t g) +{ +/* A process is suspended on a driver for which VFS issued a grant. Find out + * which process it was. + */ + struct fproc *rfp; + for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) { + if(rfp->fp_pid == PID_FREE) + continue; + + if(rfp->fp_blocked_on == FP_BLOCKED_ON_OTHER && + rfp->fp_task == driver && rfp->fp_grant == g) + return(rfp->fp_endpoint); + } + + return(NONE); +} + + +/*===========================================================================* + * dev_status * + *===========================================================================*/ +PUBLIC void dev_status(message *m) +{ +/* A device sent us a notification it has something for us. Retrieve it. */ + + message st; + int major, get_more = 1; + endpoint_t endpt; + + for (major = 0; major < NR_DEVICES; major++) + if (dmap_driver_match(m->m_source, major)) + break; /* 'major' is the device that sent the message */ + + if (major >= NR_DEVICES) /* Device endpoint not found; nothing to do */ + return; + + if (dmap[major].dmap_style == STYLE_DEVA || + dmap[major].dmap_style == STYLE_CLONE_A) { + printf("VFS: not doing dev_status for async driver %d\n", m->m_source); + return; + } + + /* Continuously send DEV_STATUS messages until the device has nothing to + * say to us anymore. */ + do { + int r; + st.m_type = DEV_STATUS; + r = sendrec(m->m_source, &st); + if (r == OK && st.REP_STATUS == ERESTART) r = EDEADEPT; + if (r != OK) { + printf("VFS: DEV_STATUS failed to %d: %d\n", m->m_source, r); + if (r == EDEADSRCDST || r == EDEADEPT) return; + panic("VFS: couldn't sendrec for DEV_STATUS: %d", r); + } + + switch(st.m_type) { + case DEV_REVIVE: + /* We've got results for a read/write/ioctl call to a + * synchronous character driver */ + endpt = st.REP_ENDPT; + if (endpt == VFS_PROC_NR) { + endpt = find_suspended_ep(m->m_source,st.REP_IO_GRANT); + if(endpt == NONE) { + printf("VFS: proc with grant %d from %d not found\n", + st.REP_IO_GRANT, st.m_source); + continue; + } + } + revive(endpt, st.REP_STATUS); + break; + case DEV_IO_READY: + /* Reply to a select request: driver is ready for I/O */ + select_reply2(st.m_source, st.DEV_MINOR, st.DEV_SEL_OPS); + break; + default: + printf("VFS: unrecognized reply %d to DEV_STATUS\n",st.m_type); + /* Fall through. */ + case DEV_NO_STATUS: + get_more = 0; + break; + } + } while(get_more); +} + +/*===========================================================================* + * safe_io_conversion * + *===========================================================================*/ +PRIVATE int safe_io_conversion(driver, gid, op, io_ept, buf, bytes, pos_lo) +endpoint_t driver; +cp_grant_id_t *gid; +int *op; +endpoint_t *io_ept; +void **buf; +size_t bytes; +u32_t *pos_lo; +{ +/* Convert operation to the 'safe' variant (i.e., grant based) if applicable. + * If no copying of data is involved, there is also no need to convert. */ + + int access = 0; + size_t size; + + *gid = GRANT_INVALID; /* Grant to buffer */ + + switch(*op) { + case VFS_DEV_READ: + case VFS_DEV_WRITE: + /* Change to safe op. */ + *op = (*op == VFS_DEV_READ) ? DEV_READ_S : DEV_WRITE_S; + *gid = cpf_grant_magic(driver, *io_ept, (vir_bytes) *buf, bytes, + *op == DEV_READ_S ? CPF_WRITE : CPF_READ); + if (*gid < 0) + panic("VFS: cpf_grant_magic of READ/WRITE buffer failed"); + break; + case VFS_DEV_IOCTL: + *pos_lo = *io_ept; /* Old endpoint in POSITION field. */ + *op = DEV_IOCTL_S; + if(_MINIX_IOCTL_IOR(m_in.REQUEST)) access |= CPF_WRITE; + if(_MINIX_IOCTL_IOW(m_in.REQUEST)) access |= CPF_READ; + if(_MINIX_IOCTL_BIG(m_in.REQUEST)) + size = _MINIX_IOCTL_SIZE_BIG(m_in.REQUEST); + else + size = _MINIX_IOCTL_SIZE(m_in.REQUEST); + + /* Grant access to the buffer even if no I/O happens with the ioctl, in + * order to disambiguate requests with DEV_IOCTL_S. + */ + *gid = cpf_grant_magic(driver, *io_ept, (vir_bytes) *buf, size, access); + if (*gid < 0) + panic("VFS: cpf_grant_magic IOCTL buffer failed"); + + break; + case VFS_DEV_SELECT: + *op = DEV_SELECT; + break; + default: + panic("VFS: unknown operation %d for safe I/O conversion", *op); + } + + /* If we have converted to a safe operation, I/O endpoint becomes VFS if it + * wasn't already. + */ + if(GRANT_VALID(*gid)) { + *io_ept = VFS_PROC_NR; + return(1); + } + + /* Not converted to a safe operation (because there is no copying involved in + * this operation). + */ + return(0); +} + +/*===========================================================================* + * dev_io * + *===========================================================================*/ +PUBLIC int dev_io( + int op, /* DEV_READ, DEV_WRITE, DEV_IOCTL, etc. */ + dev_t dev, /* major-minor device number */ + int proc_e, /* in whose address space is buf? */ + void *buf, /* virtual address of the buffer */ + u64_t pos, /* byte position */ + size_t bytes, /* how many bytes to transfer */ + int flags, /* special flags, like O_NONBLOCK */ + int suspend_reopen /* Just suspend the process */ +) +{ +/* Read from or write to a device. The parameter 'dev' tells which one. */ + struct dmap *dp; + u32_t pos_lo, pos_high; + message dev_mess; + cp_grant_id_t gid = GRANT_INVALID; + int safe, minor_dev, major_dev; + void *buf_used; + endpoint_t ioproc; + + pos_lo = ex64lo(pos); + pos_high = ex64hi(pos); + major_dev = major(dev); + minor_dev = minor(dev); + + /* Determine task dmap. */ + dp = &dmap[major_dev]; + + /* See if driver is roughly valid. */ + if (dp->dmap_driver == NONE) { + printf("VFS: dev_io: no driver for major %d\n", major_dev); + return(ENXIO); + } + + if (suspend_reopen) { + /* Suspend user. */ + fp->fp_grant = GRANT_INVALID; + fp->fp_ioproc = NONE; + wait_for(dp->dmap_driver); + fp->fp_flags |= FP_SUSP_REOPEN; + return(SUSPEND); + } + + if(isokendpt(dp->dmap_driver, &dummyproc) != OK) { + printf("VFS: dev_io: old driver for major %x (%d)\n", major_dev, + dp->dmap_driver); + return(ENXIO); + } + + /* By default, these are right. */ + dev_mess.USER_ENDPT = proc_e; + dev_mess.ADDRESS = buf; + + /* Convert DEV_* to DEV_*_S variants. */ + buf_used = buf; + safe = safe_io_conversion(dp->dmap_driver, &gid, &op, + (endpoint_t *) &dev_mess.USER_ENDPT, &buf_used, + bytes, &pos_lo); + + /* If the safe conversion was done, set the IO_GRANT to + * the grant id. + */ + if(safe) dev_mess.IO_GRANT = (char *) gid; + + /* Set up the rest of the message passed to task. */ + dev_mess.m_type = op; + dev_mess.DEVICE = minor_dev; + dev_mess.POSITION = pos_lo; + dev_mess.COUNT = bytes; + dev_mess.HIGHPOS = pos_high; + + /* This will be used if the i/o is suspended. */ + ioproc = dev_mess.USER_ENDPT; + + /* Call the task. */ + (*dp->dmap_io)(dp->dmap_driver, &dev_mess); + + if(dp->dmap_driver == NONE) { + /* Driver has vanished. */ + printf("VFS: driver gone?!\n"); + if(safe) cpf_revoke(gid); + return(EIO); + } + + /* Task has completed. See if call completed. */ + if (dev_mess.REP_STATUS == SUSPEND) { + if ((flags & O_NONBLOCK) && !(dp->dmap_style == STYLE_DEVA || + dp->dmap_style == STYLE_CLONE_A)) { + /* Not supposed to block. */ + dev_mess.m_type = CANCEL; + dev_mess.USER_ENDPT = ioproc; + dev_mess.IO_GRANT = (char *) gid; + + /* This R_BIT/W_BIT check taken from suspend()/unpause() + * logic. Mode is expected in the COUNT field. + */ + dev_mess.COUNT = 0; + if (call_nr == READ) dev_mess.COUNT = R_BIT; + else if (call_nr == WRITE) dev_mess.COUNT = W_BIT; + dev_mess.DEVICE = minor_dev; + (*dp->dmap_io)(dp->dmap_driver, &dev_mess); + if (dev_mess.REP_STATUS == EINTR) dev_mess.REP_STATUS = EAGAIN; + } else { + /* select() will do suspending itself. */ + if(op != DEV_SELECT) { + /* Suspend user. */ + wait_for(dp->dmap_driver); + } + assert(!GRANT_VALID(fp->fp_grant)); + fp->fp_grant = gid; /* revoke this when unsuspended. */ + fp->fp_ioproc = ioproc; + + if (flags & O_NONBLOCK) { + /* Not supposed to block, send cancel message */ + dev_mess.m_type = CANCEL; + dev_mess.USER_ENDPT = ioproc; + dev_mess.IO_GRANT = (char *) gid; + + /* This R_BIT/W_BIT check taken from suspend()/unpause() + * logic. Mode is expected in the COUNT field. + */ + dev_mess.COUNT = 0; + if(call_nr == READ) dev_mess.COUNT = R_BIT; + else if(call_nr == WRITE) dev_mess.COUNT = W_BIT; + dev_mess.DEVICE = minor_dev; + (*dp->dmap_io)(dp->dmap_driver, &dev_mess); + + /* Should do something about EINTR -> EAGAIN mapping */ + } + return(SUSPEND); + } + } + + /* No suspend, or cancelled suspend, so I/O is over and can be cleaned up. */ + if(safe) cpf_revoke(gid); + + return(dev_mess.REP_STATUS); +} + +/*===========================================================================* + * gen_opcl * + *===========================================================================*/ +PUBLIC int gen_opcl( + int op, /* operation, DEV_OPEN or DEV_CLOSE */ + dev_t dev, /* device to open or close */ + endpoint_t proc_e, /* process to open/close for */ + int flags /* mode bits and flags */ +) +{ +/* Called from the dmap struct on opens & closes of special files.*/ + int r, minor_dev, major_dev; + struct dmap *dp; + message dev_mess; + + /* Determine task dmap. */ + major_dev = major(dev); + minor_dev = minor(dev); + if (major_dev < 0 || major_dev >= NR_DEVICES) return(ENXIO); + dp = &dmap[major_dev]; + if (dp->dmap_driver == NONE) { + printf("VFS: gen_opcl: no driver for major %d\n", major_dev); + return(ENXIO); + } + + dev_mess.m_type = op; + dev_mess.DEVICE = minor_dev; + dev_mess.USER_ENDPT = proc_e; + dev_mess.COUNT = flags; + + /* Call the task. */ + r = (*dp->dmap_io)(dp->dmap_driver, &dev_mess); + if (r != OK) return(r); + + return(dev_mess.REP_STATUS); +} + +/*===========================================================================* + * tty_opcl * + *===========================================================================*/ +PUBLIC int tty_opcl( + int op, /* operation, DEV_OPEN or DEV_CLOSE */ + dev_t dev, /* device to open or close */ + endpoint_t proc_e, /* process to open/close for */ + int flags /* mode bits and flags */ +) +{ +/* This procedure is called from the dmap struct on tty open/close. */ + + int r; + register struct fproc *rfp; + + /* Add O_NOCTTY to the flags if this process is not a session leader, or + * if it already has a controlling tty, or if it is someone elses + * controlling tty. + */ + if (!(fp->fp_flags & FP_SESLDR) || fp->fp_tty != 0) { + flags |= O_NOCTTY; + } else { + for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) { + if(rfp->fp_pid == PID_FREE) continue; + if (rfp->fp_tty == dev) flags |= O_NOCTTY; + } + } + + r = gen_opcl(op, dev, proc_e, flags); + + /* Did this call make the tty the controlling tty? */ + if (r == 1) { + fp->fp_tty = dev; + r = OK; + } + + return(r); +} + + +/*===========================================================================* + * ctty_opcl * + *===========================================================================*/ +PUBLIC int ctty_opcl( + int op, /* operation, DEV_OPEN or DEV_CLOSE */ + dev_t dev, /* device to open or close */ + endpoint_t proc_e, /* process to open/close for */ + int flags /* mode bits and flags */ +) +{ +/* This procedure is called from the dmap struct on opening or closing + * /dev/tty, the magic device that translates to the controlling tty. + */ + + return(fp->fp_tty == 0 ? ENXIO : OK); +} + + +/*===========================================================================* + * pm_setsid * + *===========================================================================*/ +PUBLIC void pm_setsid(proc_e) +int proc_e; +{ +/* Perform the VFS side of the SETSID call, i.e. get rid of the controlling + * terminal of a process, and make the process a session leader. + */ + register struct fproc *rfp; + int slot; + + /* Make the process a session leader with no controlling tty. */ + okendpt(proc_e, &slot); + rfp = &fproc[slot]; + rfp->fp_flags |= FP_SESLDR; + rfp->fp_tty = 0; +} + + +/*===========================================================================* + * do_ioctl * + *===========================================================================*/ +PUBLIC int do_ioctl() +{ +/* Perform the ioctl(ls_fd, request, argx) system call (uses m2 fmt). */ + + int r = OK, suspend_reopen; + struct filp *f; + register struct vnode *vp; + dev_t dev; + + if ((f = get_filp(m_in.ls_fd, VNODE_READ)) == NULL) return(err_code); + vp = f->filp_vno; /* get vnode pointer */ + if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL && + (vp->v_mode & I_TYPE) != I_BLOCK_SPECIAL) { + r = ENOTTY; + } + + if (r == OK) { + suspend_reopen = (f->filp_state != FS_NORMAL); + dev = (dev_t) vp->v_sdev; + + r = dev_io(VFS_DEV_IOCTL, dev, who_e, m_in.ADDRESS, cvu64(0), + m_in.REQUEST, f->filp_flags, suspend_reopen); + } + + unlock_filp(f); + + return(r); +} + + +/*===========================================================================* + * gen_io * + *===========================================================================*/ +PUBLIC int gen_io(task_nr, mess_ptr) +endpoint_t task_nr; /* which task to call */ +message *mess_ptr; /* pointer to message for task */ +{ +/* All file system I/O ultimately comes down to I/O on major/minor device + * pairs. These lead to calls on the following routines via the dmap table. + */ + + int r, proc_e; + + proc_e = mess_ptr->USER_ENDPT; + + r = sendrec(task_nr, mess_ptr); + if (r == OK && mess_ptr->REP_STATUS == ERESTART) r = EDEADEPT; + if (r != OK) { + if (r == EDEADSRCDST || r == EDEADEPT) { + printf("VFS: dead driver %d\n", task_nr); + dmap_unmap_by_endpt(task_nr); + return(r); + } else if (r == ELOCKED) { + printf("VFS: ELOCKED talking to %d\n", task_nr); + return(r); + } + panic("call_task: can't send/receive: %d", r); + } + + /* Did the process we did the sendrec() for get a result? */ + if (mess_ptr->REP_ENDPT != proc_e) { + printf("VFS: strange device reply from %d, type = %d, " + "proc = %d (not %d) (2) ignored\n", mess_ptr->m_source, + mess_ptr->m_type, proc_e, mess_ptr->REP_ENDPT); + + return(EIO); + } + + return(OK); +} + + +/*===========================================================================* + * asyn_io * + *===========================================================================*/ +PUBLIC int asyn_io(task_nr, mess_ptr) +int task_nr; /* which task to call */ +message *mess_ptr; /* pointer to message for task */ +{ +/* All file system I/O ultimately comes down to I/O on major/minor device + * pairs. These lead to calls on the following routines via the dmap table. + */ + + int r; + + fp->fp_sendrec = mess_ptr; /* Remember where result should be stored */ + r = asynsend3(task_nr, mess_ptr, AMF_NOREPLY); + + if (r != OK) panic("VFS: asynsend in asyn_io failed: %d", r); + + /* Fake a SUSPEND */ + mess_ptr->REP_STATUS = SUSPEND; + return(OK); +} + + +/*===========================================================================* + * ctty_io * + *===========================================================================*/ +PUBLIC int ctty_io(task_nr, mess_ptr) +int task_nr; /* not used - for compatibility with dmap_t */ +message *mess_ptr; /* pointer to message for task */ +{ +/* This routine is only called for one device, namely /dev/tty. Its job + * is to change the message to use the controlling terminal, instead of the + * major/minor pair for /dev/tty itself. + */ + + struct dmap *dp; + + if (fp->fp_tty == 0) { + /* No controlling tty present anymore, return an I/O error. */ + mess_ptr->REP_STATUS = EIO; + } else { + /* Substitute the controlling terminal device. */ + dp = &dmap[major(fp->fp_tty)]; + mess_ptr->DEVICE = minor(fp->fp_tty); + + if (dp->dmap_driver == NONE) { + printf("FS: ctty_io: no driver for dev\n"); + return(EIO); + } + + if (isokendpt(dp->dmap_driver, &dummyproc) != OK) { + printf("VFS: ctty_io: old driver %d\n", dp->dmap_driver); + return(EIO); + } + + (*dp->dmap_io)(dp->dmap_driver, mess_ptr); + } + + return(OK); +} + + +/*===========================================================================* + * no_dev * + *===========================================================================*/ +PUBLIC int no_dev( + int UNUSED(op), /* operation, DEV_OPEN or DEV_CLOSE */ + dev_t UNUSED(dev), /* device to open or close */ + int UNUSED(proc), /* process to open/close for */ + int UNUSED(flags) /* mode bits and flags */ +) +{ +/* Called when opening a nonexistent device. */ + return(ENODEV); +} + +/*===========================================================================* + * no_dev_io * + *===========================================================================*/ +PUBLIC int no_dev_io(int proc, message *m) +{ +/* Called when doing i/o on a nonexistent device. */ + printf("VFS: I/O on unmapped device number\n"); + return(EIO); +} + + +/*===========================================================================* + * clone_opcl * + *===========================================================================*/ +PUBLIC int clone_opcl( + int op, /* operation, DEV_OPEN or DEV_CLOSE */ + dev_t dev, /* device to open or close */ + int proc_e, /* process to open/close for */ + int flags /* mode bits and flags */ +) +{ +/* Some devices need special processing upon open. Such a device is "cloned", + * i.e. on a succesful open it is replaced by a new device with a new unique + * minor device number. This new device number identifies a new object (such + * as a new network connection) that has been allocated within a task. + */ + struct dmap *dp; + int r, minor_dev, major_dev; + message dev_mess; + + /* Determine task dmap. */ + minor_dev = minor(dev); + major_dev = major(dev); + if (major_dev < 0 || major_dev >= NR_DEVICES) return(ENXIO); + dp = &dmap[major_dev]; + if (dp->dmap_driver == NONE) { + printf("VFS clone_opcl: no driver for major %d\n", major_dev); + return(ENXIO); + } + + dev_mess.m_type = op; + dev_mess.DEVICE = minor_dev; + dev_mess.USER_ENDPT = proc_e; + dev_mess.COUNT = flags; + + if(isokendpt(dp->dmap_driver, &dummyproc) != OK) { + printf("VFS clone_opcl: bad driver endpoint for major %d (%d)\n", + major_dev, dp->dmap_driver); + return(ENXIO); + } + + /* Call the task. */ + r = (*dp->dmap_io)(dp->dmap_driver, &dev_mess); + if (r != OK) return(r); + + if (op == DEV_OPEN && dp->dmap_style == STYLE_CLONE_A) { + /* Wait for reply when driver is asynchronous */ + worker_wait(); + } + + if (op == DEV_OPEN && dev_mess.REP_STATUS >= 0) { + if (dev_mess.REP_STATUS != minor_dev) { + struct vnode *vp; + struct node_details res; + + /* A new minor device number has been returned. + * Request PFS to create a temporary device file to hold it. + */ + + /* Device number of the new device. */ + dev = (dev & ~(BYTE << MINOR)) | (dev_mess.REP_STATUS << MINOR); + + /* Issue request */ + r = req_newnode(PFS_PROC_NR, fp->fp_effuid, fp->fp_effgid, + ALL_MODES | I_CHAR_SPECIAL, dev, &res); + if (r != OK) { + (void) clone_opcl(DEV_CLOSE, dev, proc_e, 0); + return r; + } + + /* Drop old node and use the new values */ + vp = fp->fp_filp[m_in.fd]->filp_vno; + + unlock_vnode(vp); + put_vnode(vp); + if ((vp = get_free_vnode()) == NULL) + return(err_code); + + lock_vnode(vp, VNODE_OPCL); + + vp->v_fs_e = res.fs_e; + vp->v_vmnt = NULL; + vp->v_dev = NO_DEV; + vp->v_fs_e = res.fs_e; + vp->v_inode_nr = res.inode_nr; + vp->v_mode = res.fmode; + vp->v_sdev = dev; + vp->v_fs_count = 1; + vp->v_ref_count = 1; + fp->fp_filp[m_in.fd]->filp_vno = vp; + } + dev_mess.REP_STATUS = OK; + } + return(dev_mess.REP_STATUS); +} + + +/*===========================================================================* + * dev_up * + *===========================================================================*/ +PUBLIC void dev_up(int maj) +{ + /* A new device driver has been mapped in. This function + * checks if any filesystems are mounted on it, and if so, + * dev_open()s them so the filesystem can be reused. + */ + int r, new_driver_e, needs_reopen, fd_nr, found; + struct filp *rfilp; + struct vmnt *vmp; + struct fproc *rfp; + struct vnode *vp; + + /* First deal with block devices. We need to consider both mounted file + * systems and open block-special files. + */ + if (maj < 0 || maj >= NR_DEVICES) panic("VFS: out-of-bound major"); + new_driver_e = dmap[maj].dmap_driver; + + /* Tell each affected mounted file system about the new endpoint. This code + * is currently useless, as driver endpoints do not change across restarts. + */ + for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) { + int minor_dev, major_dev; + major_dev = major(vmp->m_dev); + minor_dev = minor(vmp->m_dev); + if (major_dev != maj) continue; + + /* Send the new driver endpoint to the mounted file system. */ + if (OK != req_newdriver(vmp->m_fs_e, vmp->m_dev, new_driver_e)) + printf("VFS dev_up: error sending new driver endpoint." + " FS_e: %d req_nr: %d\n", vmp->m_fs_e, REQ_NEW_DRIVER); + } + + /* For each block-special file that was previously opened on the affected + * device, we need to reopen it on the new driver. + */ + found = 0; + for (rfilp = filp; rfilp < &filp[NR_FILPS]; rfilp++) { + if (rfilp->filp_count < 1 || !(vp = rfilp->filp_vno)) continue; + if (major(vp->v_sdev) != maj) continue; + if (!S_ISBLK(vp->v_mode)) continue; + + /* Reopen the device on the driver, once per filp. */ + if ((r = dev_open(vp->v_sdev, VFS_PROC_NR, rfilp->filp_mode)) != OK) + printf("VFS: mounted dev %d/%d re-open failed: %d.\n", + maj, minor(vp->v_sdev), r); + + found = 1; + } + + /* If any block-special file was open for this major at all, also inform the + * root file system about the new endpoint of the driver. We do this even if + * the block-special file is linked to another mounted file system, merely + * because it is more work to check for that case. + */ + if (found) { + if (OK != req_newdriver(ROOT_FS_E, makedev(maj, 0), new_driver_e)) + printf("VFSdev_up: error sending new driver endpoint." + " FS_e: %d req_nr: %d\n", ROOT_FS_E, REQ_NEW_DRIVER); + } + + /* The rest of the code deals with character-special files. To start with, + * look for processes that are suspened in an OPEN call. Set FP_SUSP_REOPEN + * to indicate that this process was suspended before the call to dev_up. + */ + for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) { + if(rfp->fp_pid == PID_FREE) continue; + if(rfp->fp_blocked_on != FP_BLOCKED_ON_DOPEN) continue; + + printf("VFS: dev_up: found process in FP_BLOCKED_ON_DOPEN, fd %d\n", + rfp->fp_block_fd); + fd_nr = rfp->fp_block_fd; + rfilp = rfp->fp_filp[fd_nr]; + vp = rfilp->filp_vno; + if (!vp) panic("VFS: restart_reopen: no vp"); + if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL) continue; + if (major(vp->v_sdev) != maj) continue; + + rfp->fp_flags |= FP_SUSP_REOPEN; + } + + needs_reopen= FALSE; + for (rfilp = filp; rfilp < &filp[NR_FILPS]; rfilp++) { + if (rfilp->filp_count < 1 || !(vp = rfilp->filp_vno)) continue; + if (major(vp->v_sdev) != maj) continue; + if (!S_ISCHR(vp->v_mode)) continue; + + rfilp->filp_state = FS_NEEDS_REOPEN; + needs_reopen = TRUE; + } + + if (needs_reopen) + restart_reopen(maj); + +} + +/*===========================================================================* + * open_reply * + *===========================================================================*/ +PUBLIC void open_reply(void) +{ + struct fproc *rfp; + endpoint_t proc_e; + int slot; + + proc_e = m_in.REP_ENDPT; + if (isokendpt(proc_e, &slot) != OK) return; + rfp = &fproc[slot]; + *rfp->fp_sendrec = m_in; + worker_signal(worker_get(rfp->fp_wtid)); /* Continue open */ +} + +/*===========================================================================* + * restart_reopen * + *===========================================================================*/ +PRIVATE void restart_reopen(maj) +int maj; +{ + int n, r, minor_dev, major_dev, fd_nr; + endpoint_t driver_e; + struct vnode *vp; + struct filp *rfilp; + struct fproc *rfp; + + if (maj < 0 || maj >= NR_DEVICES) panic("VFS: out-of-bound major"); + for (rfilp = filp; rfilp < &filp[NR_FILPS]; rfilp++) { + if (rfilp->filp_count < 1 || !(vp = rfilp->filp_vno)) continue; + if (rfilp->filp_state != FS_NEEDS_REOPEN) continue; + if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL) continue; + + major_dev = major(vp->v_sdev); + minor_dev = minor(vp->v_sdev); + if (major_dev != maj) continue; + + if (!(rfilp->filp_flags & O_REOPEN)) { + /* File descriptor is to be closed when driver restarts. */ + n = invalidate(rfilp); + if (n != rfilp->filp_count) { + printf("VFS: warning: invalidate/count " + "discrepancy (%d, %d)\n", n, rfilp->filp_count); + } + rfilp->filp_count = 0; + continue; + } + + r = dev_reopen(vp->v_sdev, rfilp-filp, vp->v_mode & (R_BIT|W_BIT)); + if (r == OK) return; + + /* Device could not be reopened. Invalidate all filps on that device.*/ + n = invalidate(rfilp); + if (n != rfilp->filp_count) { + printf("VFS: warning: invalidate/count " + "discrepancy (%d, %d)\n", n, rfilp->filp_count); + } + rfilp->filp_count = 0; + printf("VFS: file on dev %d/%d re-open failed: %d; " + "invalidated %d fd's.\n", major_dev, minor_dev, r, n); + } + + /* Nothing more to re-open. Restart suspended processes */ + driver_e = dmap[maj].dmap_driver; + + for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) { + if(rfp->fp_pid == PID_FREE) continue; + if(rfp->fp_blocked_on == FP_BLOCKED_ON_OTHER && + rfp->fp_task == driver_e && (rfp->fp_flags & FP_SUSP_REOPEN)) { + rfp->fp_flags &= ~FP_SUSP_REOPEN; + rfp->fp_blocked_on = FP_BLOCKED_ON_NONE; + reply(rfp->fp_endpoint, ERESTART); + } + } + + /* Look for processes that are suspened in an OPEN call */ + for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) { + if (rfp->fp_pid == PID_FREE) continue; + if (rfp->fp_blocked_on == FP_BLOCKED_ON_DOPEN || + !(rfp->fp_flags & FP_SUSP_REOPEN)) continue; + + printf("VFS: restart_reopen: found process in FP_BLOCKED_ON_DOPEN, fd %d\n", + rfp->fp_block_fd); + fd_nr = rfp->fp_block_fd; + rfilp = rfp->fp_filp[fd_nr]; + + if (!rfilp) { + /* Open failed, and automatic reopen was not requested */ + rfp->fp_blocked_on = FP_BLOCKED_ON_NONE; + FD_CLR(fd_nr, &rfp->fp_filp_inuse); + reply(rfp->fp_endpoint, EIO); + continue; + } + + vp = rfilp->filp_vno; + if (!vp) panic("VFS: restart_reopen: no vp"); + if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL) continue; + if (major(vp->v_sdev) != maj) continue; + + rfp->fp_blocked_on = FP_BLOCKED_ON_NONE; + reply(rfp->fp_endpoint, fd_nr); + } +} + + +/*===========================================================================* + * reopen_reply * + *===========================================================================*/ +PUBLIC void reopen_reply() +{ + endpoint_t driver_e; + int filp_no, status, maj; + struct filp *rfilp; + struct vnode *vp; + struct dmap *dp; + + driver_e = m_in.m_source; + filp_no = m_in.REP_ENDPT; + status = m_in.REP_STATUS; + + if (filp_no < 0 || filp_no >= NR_FILPS) { + printf("VFS: reopen_reply: bad filp number %d from driver %d\n", + filp_no, driver_e); + return; + } + + rfilp = &filp[filp_no]; + if (rfilp->filp_count < 1) { + printf("VFS: reopen_reply: filp number %d not inuse (from driver %d)\n", + filp_no, driver_e); + return; + } + + vp = rfilp->filp_vno; + if (!vp) { + printf("VFS: reopen_reply: no vnode for filp number %d (from driver " + "%d)\n", filp_no, driver_e); + return; + } + + if (rfilp->filp_state != FS_NEEDS_REOPEN) { + printf("VFS: reopen_reply: bad state %d for filp number %d" + " (from driver %d)\n", rfilp->filp_state, filp_no, driver_e); + return; + } + + if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL) { + printf("VFS: reopen_reply: bad mode 0%o for filp number %d" + " (from driver %d)\n", vp->v_mode, filp_no, driver_e); + return; + } + + maj = major(vp->v_sdev); + dp = &dmap[maj]; + if (dp->dmap_driver != driver_e) { + printf("VFS: reopen_reply: bad major %d for filp number %d " + "(from driver %d, current driver is %d)\n", maj, filp_no, + driver_e, dp->dmap_driver); + return; + } + + if (status == OK) { + rfilp->filp_state= FS_NORMAL; + } else { + printf("VFS: reopen_reply: should handle error status\n"); + return; + } + + restart_reopen(maj); +} diff --git a/servers/avfs/dmap.c b/servers/avfs/dmap.c new file mode 100644 index 000000000..d73aa1fac --- /dev/null +++ b/servers/avfs/dmap.c @@ -0,0 +1,257 @@ +/* This file contains the table with device <-> driver mappings. It also + * contains some routines to dynamically add and/ or remove device drivers + * or change mappings. + */ + +#include "fs.h" +#include +#include +#include +#include +#include +#include +#include "fproc.h" +#include "dmap.h" +#include "param.h" + +/* The order of the entries in the table determines the mapping between major + * device numbers and device drivers. Character and block devices + * can be intermixed at random. The ordering determines the device numbers in + * /dev. Note that the major device numbers used in /dev are NOT the same as + * the process numbers of the device drivers. See for mappings. + */ + +struct dmap dmap[NR_DEVICES]; + +#define DT_EMPTY { no_dev, no_dev_io, NONE, "", 0, STYLE_NDEV, NULL } + +/*===========================================================================* + * do_mapdriver * + *===========================================================================*/ +PUBLIC int do_mapdriver() +{ +/* Create a device->driver mapping. RS will tell us which major is driven by + * this driver, what type of device it is (regular, TTY, asynchronous, clone, + * etc), and its label. This label is registered with DS, and allows us to + * retrieve the driver's endpoint. + */ + int r, flags, major; + endpoint_t endpoint; + vir_bytes label_vir; + size_t label_len; + char label[LABEL_MAX]; + + /* Only RS can map drivers. */ + if (who_e != RS_PROC_NR) return(EPERM); + + /* Get the label */ + label_vir = (vir_bytes) m_in.md_label; + label_len = (size_t) m_in.md_label_len; + + if (label_len+1 > sizeof(label)) { /* Can we store this label? */ + printf("VFS: do_mapdriver: label too long\n"); + return(EINVAL); + } + r = sys_vircopy(who_e, D, label_vir, SELF, D, (vir_bytes) label, label_len); + if (r != OK) { + printf("VFS: do_mapdriver: sys_vircopy failed: %d\n", r); + return(EINVAL); + } + label[label_len] = '\0'; /* Terminate label */ + + /* Now we know how the driver is called, fetch its endpoint */ + r = ds_retrieve_label_endpt(label, &endpoint); + if (r != OK) { + printf("VFS: do_mapdriver: label '%s' unknown\n", label); + return(EINVAL); + } + + /* Try to update device mapping. */ + major = m_in.md_major; + flags = m_in.md_flags; + + return map_driver(label, major, endpoint, m_in.md_style, flags); +} + +/*===========================================================================* + * map_driver * + *===========================================================================*/ +PUBLIC int map_driver(label, major, proc_nr_e, style, flags) +const char *label; /* name of the driver */ +int major; /* major number of the device */ +endpoint_t proc_nr_e; /* process number of the driver */ +int style; /* style of the device */ +int flags; /* device flags */ +{ +/* Add a new device driver mapping in the dmap table. If the proc_nr is set to + * NONE, we're supposed to unmap it. + */ + + int slot; + size_t len; + struct dmap *dp; + + /* Get pointer to device entry in the dmap table. */ + if (major < 0 || major >= NR_DEVICES) return(ENODEV); + dp = &dmap[major]; + + /* Check if we're supposed to unmap it. */ + if(proc_nr_e == NONE) { + dp->dmap_opcl = no_dev; + dp->dmap_io = no_dev_io; + dp->dmap_driver = NONE; + dp->dmap_flags = flags; + return(OK); + } + + /* Check process number of new driver if it was alive before mapping */ + if (! (flags & DRV_FORCED)) { + if (isokendpt(proc_nr_e, &slot) != OK) + return(EINVAL); + } + + if (label != NULL) { + len = strlen(label); + if (len+1 > sizeof(dp->dmap_label)) + panic("VFS: map_driver: label too long: %d", len); + strcpy(dp->dmap_label, label); + } + + /* Store driver I/O routines based on type of device */ + switch (style) { + case STYLE_DEV: + dp->dmap_opcl = gen_opcl; + dp->dmap_io = gen_io; + break; + case STYLE_DEVA: + dp->dmap_opcl = gen_opcl; + dp->dmap_io = asyn_io; + break; + case STYLE_TTY: + dp->dmap_opcl = tty_opcl; + dp->dmap_io = gen_io; + break; + case STYLE_CTTY: + dp->dmap_opcl = ctty_opcl; + dp->dmap_io = ctty_io; + break; + case STYLE_CLONE: + dp->dmap_opcl = clone_opcl; + dp->dmap_io = gen_io; + break; + case STYLE_CLONE_A: + dp->dmap_opcl = clone_opcl; + dp->dmap_io = asyn_io; + break; + default: + return(EINVAL); + } + + dp->dmap_driver = proc_nr_e; + dp->dmap_flags = flags; + dp->dmap_style = style; + + return(OK); +} + +/*===========================================================================* + * dmap_unmap_by_endpt * + *===========================================================================*/ +PUBLIC void dmap_unmap_by_endpt(endpoint_t proc_e) +{ +/* Lookup driver in dmap table by endpoint and unmap it */ + int major, r; + + for (major = 0; major < NR_DEVICES; major++) { + if (dmap_driver_match(proc_e, major)) { + /* Found driver; overwrite it with a NULL entry */ + if ((r = map_driver(NULL, major, NONE, 0, 0)) != OK) { + printf("VFS: unmapping driver %d for major %d failed:" + " %d\n", proc_e, major, r); + } + } + } +} + +/*===========================================================================* + * map_service * + *===========================================================================*/ +PUBLIC int map_service(struct rprocpub *rpub) +{ +/* Map a new service by storing its device driver properties. */ + int r; + + /* Not a driver, nothing more to do. */ + if(rpub->dev_nr == NO_DEV) return(OK); + + /* Map driver. */ + r = map_driver(rpub->label, rpub->dev_nr, rpub->endpoint, rpub->dev_style, + rpub->dev_flags); + if(r != OK) return(r); + + /* If driver has two major numbers associated, also map the other one. */ + if(rpub->dev_style2 != STYLE_NDEV) { + r = map_driver(rpub->label, rpub->dev_nr+1, rpub->endpoint, + rpub->dev_style2, rpub->dev_flags); + if(r != OK) return(r); + } + + return(OK); +} + +/*===========================================================================* + * init_dmap * + *===========================================================================*/ +PUBLIC void init_dmap() +{ +/* Initialize the table with empty device <-> driver mappings. */ + int i; + struct dmap dmap_default = DT_EMPTY; + + for (i = 0; i < NR_DEVICES; i++) + dmap[i] = dmap_default; +} + +/*===========================================================================* + * dmap_driver_match * + *===========================================================================*/ +PUBLIC int dmap_driver_match(endpoint_t proc, int major) +{ + if (major < 0 || major >= NR_DEVICES) return(0); + if (dmap[major].dmap_driver != NONE && dmap[major].dmap_driver == proc) + return(1); + + return(0); +} + +/*===========================================================================* + * dmap_endpt_up * + *===========================================================================*/ +PUBLIC void dmap_endpt_up(endpoint_t proc_e) +{ +/* A device driver with endpoint proc_e has been restarted. Go tell everyone + * that might be blocking on it that this device is 'up'. + */ + + int major; + for (major = 0; major < NR_DEVICES; major++) + if (dmap_driver_match(proc_e, major)) + dev_up(major); + +} + +/*===========================================================================* + * get_dmap * + *===========================================================================*/ +PUBLIC struct dmap *get_dmap(endpoint_t proc_e) +{ +/* See if 'proc_e' endpoint belongs to a valid dmap entry. If so, return a + * pointer */ + + int major; + for (major = 0; major < NR_DEVICES; major++) + if (dmap_driver_match(proc_e, major)) + return(&dmap[major]); + + return(NULL); +} diff --git a/servers/avfs/dmap.h b/servers/avfs/dmap.h new file mode 100644 index 000000000..6a83bf551 --- /dev/null +++ b/servers/avfs/dmap.h @@ -0,0 +1,28 @@ +#ifndef __VFS_DMAP_H__ +#define __VFS_DMAP_H__ + +/* +dmap.h +*/ + +/*===========================================================================* + * Device <-> Driver Table * + *===========================================================================*/ + +/* Device table. This table is indexed by major device number. It provides + * the link between major device numbers and the routines that process them. + * The table can be update dynamically. The field 'dmap_flags' describe an + * entry's current status and determines what control options are possible. + */ + +extern struct dmap { + int _PROTOTYPE ((*dmap_opcl), (int, dev_t, int, int) ); + int _PROTOTYPE ((*dmap_io), (int, message *) ); + endpoint_t dmap_driver; + char dmap_label[LABEL_MAX]; + int dmap_flags; + int dmap_style; + struct filp *dmap_sel_filp; +} dmap[]; + +#endif diff --git a/servers/avfs/exec.c b/servers/avfs/exec.c new file mode 100644 index 000000000..dc29c6875 --- /dev/null +++ b/servers/avfs/exec.c @@ -0,0 +1,707 @@ +/* This file handles the EXEC system call. It performs the work as follows: + * - see if the permissions allow the file to be executed + * - read the header and extract the sizes + * - fetch the initial args and environment from the user space + * - allocate the memory for the new process + * - copy the initial stack from PM to the process + * - read in the text and data segments and copy to the process + * - take care of setuid and setgid bits + * - fix up 'mproc' table + * - tell kernel about EXEC + * - save offset to initial argc (for ps) + * + * The entry points into this file are: + * pm_exec: perform the EXEC system call + */ + +#include "fs.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "fproc.h" +#include "path.h" +#include "param.h" +#include "vnode.h" +#include +#include +#include +#include "exec.h" + +FORWARD _PROTOTYPE( void lock_exec, (void) ); +FORWARD _PROTOTYPE( void unlock_exec, (void) ); +FORWARD _PROTOTYPE( int exec_newmem, (int proc_e, vir_bytes text_addr, vir_bytes text_bytes, + vir_bytes data_addr, vir_bytes data_bytes, + vir_bytes tot_bytes, vir_bytes frame_len, int sep_id, + int is_elf, dev_t st_dev, ino_t st_ino, time_t ctime, + char *progname, int new_uid, int new_gid, + vir_bytes *stack_topp, int *load_textp, + int *allow_setuidp) ); +FORWARD _PROTOTYPE( int is_script, (const char *exec_hdr, size_t exec_len)); +FORWARD _PROTOTYPE( int patch_stack, (struct vnode *vp, char stack[ARG_MAX], + vir_bytes *stk_bytes, char path[PATH_MAX+1]) ); +FORWARD _PROTOTYPE( int insert_arg, (char stack[ARG_MAX], vir_bytes *stk_bytes, + char *arg, int replace) ); +FORWARD _PROTOTYPE( void patch_ptr, (char stack[ARG_MAX], vir_bytes base)); +FORWARD _PROTOTYPE( void clo_exec, (struct fproc *rfp) ); +FORWARD _PROTOTYPE( int read_seg, (struct vnode *vp, off_t off, int proc_e, + int seg, vir_bytes seg_addr, + phys_bytes seg_bytes) ); +FORWARD _PROTOTYPE( int load_aout, (struct exec_info *execi) ); +FORWARD _PROTOTYPE( int load_elf, (struct exec_info *execi) ); +FORWARD _PROTOTYPE( int map_header, (char **exec_hdr, + const struct vnode *vp) ); + +#define PTRSIZE sizeof(char *) /* Size of pointers in argv[] and envp[]. */ + +/* Array of loaders for different object file formats */ +struct exec_loaders { + int (*load_object)(struct exec_info *); +}; + +PRIVATE const struct exec_loaders exec_loaders[] = { + { load_aout }, + { load_elf }, + { NULL } +}; + +PRIVATE char hdr[PAGE_SIZE]; /* Assume that header is not larger than a page */ + +/*===========================================================================* + * lock_exec * + *===========================================================================*/ +PRIVATE void lock_exec(void) +{ + message org_m_in; + struct fproc *org_fp; + struct worker_thread *org_self; + + /* First try to get it right off the bat */ + if (mutex_trylock(&exec_lock) == 0) + return; + + org_m_in = m_in; + org_fp = fp; + org_self = self; + + if (mutex_lock(&exec_lock) != 0) + panic("Could not obtain lock on exec"); + + m_in = org_m_in; + fp = org_fp; + self = org_self; +} + +/*===========================================================================* + * unlock_exec * + *===========================================================================*/ +PRIVATE void unlock_exec(void) +{ + if (mutex_unlock(&exec_lock) != 0) + panic("Could not release lock on exec"); +} + +/*===========================================================================* + * pm_exec * + *===========================================================================*/ +PUBLIC int pm_exec(int proc_e, char *path, vir_bytes path_len, char *frame, + vir_bytes frame_len, vir_bytes *pc) +{ +/* Perform the execve(name, argv, envp) call. The user library builds a + * complete stack image, including pointers, args, environ, etc. The stack + * is copied to a buffer inside VFS, and then to the new core image. + */ + int r, r1, round, slot; + vir_bytes vsp; + struct fproc *rfp; + struct vnode *vp; + struct vmnt *vmp; + char *cp; + static char mbuf[ARG_MAX]; /* buffer for stack and zeroes */ + struct exec_info execi; + int i; + char fullpath[PATH_MAX+1]; + struct lookup resolve; + + lock_exec(); + + okendpt(proc_e, &slot); + rfp = fp = &fproc[slot]; + vp = NULL; + + lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); + resolve.l_vmnt_lock = VMNT_READ; + resolve.l_vnode_lock = VNODE_READ; + + /* Get the exec file name. */ + if ((r = fetch_name(path, path_len, 0, fullpath)) != OK) + goto pm_execfinal; + + /* Fetch the stack from the user before destroying the old core image. */ + if (frame_len > ARG_MAX) { + printf("VFS: pm_exec: stack too big\n"); + r = ENOMEM; /* stack too big */ + goto pm_execfinal; + } + r = sys_datacopy(proc_e, (vir_bytes) frame, SELF, (vir_bytes) mbuf, + (phys_bytes) frame_len); + if (r != OK) { /* can't fetch stack (e.g. bad virtual addr) */ + printf("VFS: pm_exec: sys_datacopy failed\n"); + goto pm_execfinal; + } + + /* The default is to keep the original user and group IDs */ + execi.new_uid = rfp->fp_effuid; + execi.new_gid = rfp->fp_effgid; + + for (round = 0; round < 2; round++) { + /* round = 0 (first attempt), or 1 (interpreted script) */ + /* Save the name of the program */ + (cp = strrchr(fullpath, '/')) ? cp++ : (cp = fullpath); + + strncpy(execi.progname, cp, PROC_NAME_LEN-1); + execi.progname[PROC_NAME_LEN-1] = '\0'; + + /* Open executable */ + if ((vp = eat_path(&resolve, fp)) == NULL) { + r = err_code; + goto pm_execfinal; + } + execi.vp = vp; + unlock_vmnt(vmp); + + if ((vp->v_mode & I_TYPE) != I_REGULAR) + r = ENOEXEC; + else if ((r1 = forbidden(vp, X_BIT)) != OK) + r = r1; + else + r = req_stat(vp->v_fs_e, vp->v_inode_nr, VFS_PROC_NR, + (char *) &(execi.sb), 0, 0); + if (r != OK) goto pm_execfinal; + + if (round == 0) { + /* Deal with setuid/setgid executables */ + if (vp->v_mode & I_SET_UID_BIT) execi.new_uid = vp->v_uid; + if (vp->v_mode & I_SET_GID_BIT) execi.new_gid = vp->v_gid; + } + + r = map_header(&execi.hdr, execi.vp); + if (r != OK) goto pm_execfinal; + + if (!is_script(execi.hdr, execi.vp->v_size) || round != 0) + break; + + /* Get fresh copy of the file name. */ + if ((r = fetch_name(path, path_len, 0, fullpath)) != OK) + printf("VFS pm_exec: 2nd fetch_name failed\n"); + else if ((r = patch_stack(vp, mbuf, &frame_len, fullpath)) != OK) + printf("VFS pm_exec: patch_stack failed\n"); + + unlock_vnode(vp); + put_vnode(vp); + vp = NULL; + if (r != OK) goto pm_execfinal; + } + + execi.proc_e = proc_e; + execi.frame_len = frame_len; + + for (i = 0; exec_loaders[i].load_object != NULL; i++) { + r = (*exec_loaders[i].load_object)(&execi); + /* Loaded successfully, so no need to try other loaders */ + if (r == OK) break; + } + + if (r != OK) { /* No exec loader could load the object */ + r = ENOEXEC; + goto pm_execfinal; + } + + /* Save off PC */ + *pc = execi.pc; + + /* Patch up stack and copy it from VFS to new core image. */ + vsp = execi.stack_top; + vsp -= frame_len; + patch_ptr(mbuf, vsp); + if ((r = sys_datacopy(SELF, (vir_bytes) mbuf, proc_e, (vir_bytes) vsp, + (phys_bytes)frame_len)) != OK) { + printf("VFS: datacopy failed (%d) trying to copy to %lu\n", r, vsp); + goto pm_execfinal; + } + + if (r != OK) goto pm_execfinal; + clo_exec(rfp); + + if (execi.allow_setuid) { + rfp->fp_effuid = execi.new_uid; + rfp->fp_effgid = execi.new_gid; + } + +pm_execfinal: + if (vp != NULL) { + unlock_vnode(vp); + put_vnode(vp); + } + unlock_exec(); + return(r); +} + +/*===========================================================================* + * load_aout * + *===========================================================================*/ +PRIVATE int load_aout(struct exec_info *execi) +{ + int r; + struct vnode *vp; + int proc_e; + off_t off; + int hdrlen; + int sep_id; + vir_bytes text_bytes, data_bytes, bss_bytes; + phys_bytes tot_bytes; /* total space for program, including gap */ + + assert(execi != NULL); + assert(execi->hdr != NULL); + assert(execi->vp != NULL); + + proc_e = execi->proc_e; + vp = execi->vp; + + /* Read the file header and extract the segment sizes. */ + r = read_header_aout(execi->hdr, execi->vp->v_size, &sep_id, + &text_bytes, &data_bytes, &bss_bytes, + &tot_bytes, &execi->pc, &hdrlen); + if (r != OK) return(r); + + r = exec_newmem(proc_e, 0 /* text_addr */, text_bytes, + 0 /* data_addr */, data_bytes + bss_bytes, tot_bytes, + execi->frame_len, sep_id, 0 /* is_elf */, vp->v_dev, vp->v_inode_nr, + execi->sb.st_ctime, + execi->progname, execi->new_uid, execi->new_gid, + &execi->stack_top, &execi->load_text, &execi->allow_setuid); + + if (r != OK) { + printf("VFS: load_aout: exec_newmem failed: %d\n", r); + return(r); + } + + off = hdrlen; + + /* Read in text and data segments. */ + if (execi->load_text) + r = read_seg(vp, off, proc_e, T, 0, text_bytes); + off += text_bytes; + if (r == OK) + r = read_seg(vp, off, proc_e, D, 0, data_bytes); + + return(r); +} + +/*===========================================================================* + * load_elf * + *===========================================================================*/ +PRIVATE int load_elf(struct exec_info *execi) +{ + int r; + struct vnode *vp; + int proc_e; + phys_bytes tot_bytes; /* total space for program, including gap */ + vir_bytes text_vaddr, text_paddr, text_filebytes, text_membytes; + vir_bytes data_vaddr, data_paddr, data_filebytes, data_membytes; + off_t text_offset, data_offset; + int sep_id, is_elf; + + assert(execi != NULL); + assert(execi->hdr != NULL); + assert(execi->vp != NULL); + + proc_e = execi->proc_e; + vp = execi->vp; + + /* Read the file header and extract the segment sizes. */ + r = read_header_elf(execi->hdr, &text_vaddr, &text_paddr, + &text_filebytes, &text_membytes, + &data_vaddr, &data_paddr, + &data_filebytes, &data_membytes, + &execi->pc, &text_offset, &data_offset); + if (r != OK) return(r); + + sep_id = 0; + is_elf = 1; + tot_bytes = 0; /* Use default stack size */ + r = exec_newmem(proc_e, + trunc_page(text_vaddr), text_membytes, + trunc_page(data_vaddr), data_membytes, + tot_bytes, execi->frame_len, sep_id, is_elf, + vp->v_dev, vp->v_inode_nr, execi->sb.st_ctime, + execi->progname, execi->new_uid, execi->new_gid, + &execi->stack_top, &execi->load_text, &execi->allow_setuid); + + if (r != OK) { + printf("VFS: load_elf: exec_newmem failed: %d\n", r); + return(r); + } + + /* Read in text and data segments. */ + if (execi->load_text) + r = read_seg(vp, text_offset, proc_e, T, text_vaddr, text_filebytes); + + if (r == OK) + r = read_seg(vp, data_offset, proc_e, D, data_vaddr, data_filebytes); + + return(r); +} + +/*===========================================================================* + * exec_newmem * + *===========================================================================*/ +PRIVATE int exec_newmem( + int proc_e, + vir_bytes text_addr, + vir_bytes text_bytes, + vir_bytes data_addr, + vir_bytes data_bytes, + vir_bytes tot_bytes, + vir_bytes frame_len, + int sep_id, + int is_elf, + dev_t st_dev, + ino_t st_ino, + time_t ctime, + char *progname, + int new_uid, + int new_gid, + vir_bytes *stack_topp, + int *load_textp, + int *allow_setuidp +) +{ +/* Allocate a new memory map for a process that tries to exec */ + int r; + struct exec_newmem e; + message m; + + e.text_addr = text_addr; + e.text_bytes = text_bytes; + e.data_addr = data_addr; + e.data_bytes = data_bytes; + e.tot_bytes = tot_bytes; + e.args_bytes = frame_len; + e.sep_id = sep_id; + e.is_elf = is_elf; + e.st_dev = st_dev; + e.st_ino = st_ino; + e.enst_ctime = ctime; + e.new_uid = new_uid; + e.new_gid = new_gid; + strncpy(e.progname, progname, sizeof(e.progname)-1); + e.progname[sizeof(e.progname)-1] = '\0'; + + m.m_type = EXEC_NEWMEM; + m.EXC_NM_PROC = proc_e; + m.EXC_NM_PTR = (char *)&e; + if ((r = sendrec(PM_PROC_NR, &m)) != OK) return(r); + + *stack_topp = m.m1_i1; + *load_textp = !!(m.m1_i2 & EXC_NM_RF_LOAD_TEXT); + *allow_setuidp = !!(m.m1_i2 & EXC_NM_RF_ALLOW_SETUID); + + return(m.m_type); +} + +/*===========================================================================* + * is_script * + *===========================================================================*/ +PRIVATE int is_script(const char *exec_hdr, size_t exec_len) +{ +/* Is Interpreted script? */ + assert(exec_hdr != NULL); + + return(exec_hdr[0] == '#' && exec_hdr[1] == '!' && exec_len >= 2); +} + +/*===========================================================================* + * patch_stack * + *===========================================================================*/ +PRIVATE int patch_stack(vp, stack, stk_bytes, path) +struct vnode *vp; /* pointer for open script file */ +char stack[ARG_MAX]; /* pointer to stack image within VFS */ +vir_bytes *stk_bytes; /* size of initial stack */ +char path[PATH_MAX+1]; /* path to script file */ +{ +/* Patch the argument vector to include the path name of the script to be + * interpreted, and all strings on the #! line. Returns the path name of + * the interpreter. + */ + enum { INSERT=FALSE, REPLACE=TRUE }; + int n, r; + off_t pos; + char *sp, *interp = NULL; + u64_t new_pos; + unsigned int cum_io; + char buf[_MAX_BLOCK_SIZE]; + + /* Make 'path' the new argv[0]. */ + if (!insert_arg(stack, stk_bytes, path, REPLACE)) return(ENOMEM); + + pos = 0; /* Read from the start of the file */ + + /* Issue request */ + r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, cvul64(pos), READING, + VFS_PROC_NR, buf, _MAX_BLOCK_SIZE, &new_pos, &cum_io); + if (r != OK) return(r); + + n = vp->v_size; + if (n > _MAX_BLOCK_SIZE) + n = _MAX_BLOCK_SIZE; + if (n < 2) return ENOEXEC; + + sp = &(buf[2]); /* just behind the #! */ + n -= 2; + if (n > PATH_MAX) n = PATH_MAX; + + /* Use the 'path' variable for temporary storage */ + memcpy(path, sp, n); + + if ((sp = memchr(path, '\n', n)) == NULL) /* must be a proper line */ + return(ENOEXEC); + + /* Move sp backwards through script[], prepending each string to stack. */ + for (;;) { + /* skip spaces behind argument. */ + while (sp > path && (*--sp == ' ' || *sp == '\t')) {} + if (sp == path) break; + + sp[1] = 0; + /* Move to the start of the argument. */ + while (sp > path && sp[-1] != ' ' && sp[-1] != '\t') --sp; + + interp = sp; + if (!insert_arg(stack, stk_bytes, sp, INSERT)) { + printf("VFS: patch_stack: insert_arg failed\n"); + return(ENOMEM); + } + } + + /* Round *stk_bytes up to the size of a pointer for alignment contraints. */ + *stk_bytes= ((*stk_bytes + PTRSIZE - 1) / PTRSIZE) * PTRSIZE; + + if (interp != path) + memmove(path, interp, strlen(interp)+1); + return(OK); +} + +/*===========================================================================* + * insert_arg * + *===========================================================================*/ +PRIVATE int insert_arg( +char stack[ARG_MAX], /* pointer to stack image within PM */ +vir_bytes *stk_bytes, /* size of initial stack */ +char *arg, /* argument to prepend/replace as new argv[0] */ +int replace +) +{ +/* Patch the stack so that arg will become argv[0]. Be careful, the stack may + * be filled with garbage, although it normally looks like this: + * nargs argv[0] ... argv[nargs-1] NULL envp[0] ... NULL + * followed by the strings "pointed" to by the argv[i] and the envp[i]. The + * pointers are really offsets from the start of stack. + * Return true iff the operation succeeded. + */ + int offset, a0, a1, old_bytes = *stk_bytes; + + /* Prepending arg adds at least one string and a zero byte. */ + offset = strlen(arg) + 1; + + a0 = (int) ((char **) stack)[1]; /* argv[0] */ + if (a0 < 4 * PTRSIZE || a0 >= old_bytes) return(FALSE); + + a1 = a0; /* a1 will point to the strings to be moved */ + if (replace) { + /* Move a1 to the end of argv[0][] (argv[1] if nargs > 1). */ + do { + if (a1 == old_bytes) return(FALSE); + --offset; + } while (stack[a1++] != 0); + } else { + offset += PTRSIZE; /* new argv[0] needs new pointer in argv[] */ + a0 += PTRSIZE; /* location of new argv[0][]. */ + } + + /* stack will grow by offset bytes (or shrink by -offset bytes) */ + if ((*stk_bytes += offset) > ARG_MAX) return(FALSE); + + /* Reposition the strings by offset bytes */ + memmove(stack + a1 + offset, stack + a1, old_bytes - a1); + + strcpy(stack + a0, arg); /* Put arg in the new space. */ + + if (!replace) { + /* Make space for a new argv[0]. */ + memmove(stack + 2 * PTRSIZE, stack + 1 * PTRSIZE, a0 - 2 * PTRSIZE); + + ((char **) stack)[0]++; /* nargs++; */ + } + /* Now patch up argv[] and envp[] by offset. */ + patch_ptr(stack, (vir_bytes) offset); + ((char **) stack)[1] = (char *) a0; /* set argv[0] correctly */ + return(TRUE); +} + + +/*===========================================================================* + * patch_ptr * + *===========================================================================*/ +PRIVATE void patch_ptr( +char stack[ARG_MAX], /* pointer to stack image within PM */ +vir_bytes base /* virtual address of stack base inside user */ +) +{ +/* When doing an exec(name, argv, envp) call, the user builds up a stack + * image with arg and env pointers relative to the start of the stack. Now + * these pointers must be relocated, since the stack is not positioned at + * address 0 in the user's address space. + */ + + char **ap, flag; + vir_bytes v; + + flag = 0; /* counts number of 0-pointers seen */ + ap = (char **) stack; /* points initially to 'nargs' */ + ap++; /* now points to argv[0] */ + while (flag < 2) { + if (ap >= (char **) &stack[ARG_MAX]) return; /* too bad */ + if (*ap != NULL) { + v = (vir_bytes) *ap; /* v is relative pointer */ + v += base; /* relocate it */ + *ap = (char *) v; /* put it back */ + } else { + flag++; + } + ap++; + } +} + +/*===========================================================================* + * read_seg * + *===========================================================================*/ +PRIVATE int read_seg( +struct vnode *vp, /* inode descriptor to read from */ +off_t off, /* offset in file */ +int proc_e, /* process number (endpoint) */ +int seg, /* T, D, or S */ +vir_bytes seg_addr, /* address to load segment */ +phys_bytes seg_bytes /* how much is to be transferred? */ +) +{ +/* + * The byte count on read is usually smaller than the segment count, because + * a segment is padded out to a click multiple, and the data segment is only + * partially initialized. + */ + int r; + unsigned n, o; + u64_t new_pos; + unsigned int cum_io; + static char buf[128 * 1024]; + + assert((seg == T)||(seg == D)); + + /* Make sure that the file is big enough */ + if (vp->v_size < off+seg_bytes) return(EIO); + + if (seg == T) { + /* We have to use a copy loop until safecopies support segments */ + o = 0; + while (o < seg_bytes) { + n = seg_bytes - o; + if (n > sizeof(buf)) + n = sizeof(buf); + + if ((r = req_readwrite(vp->v_fs_e,vp->v_inode_nr,cvul64(off+o), + READING, VFS_PROC_NR, buf, + n, &new_pos, &cum_io)) != OK) { + printf("VFS: read_seg: req_readwrite failed (text)\n"); + return(r); + } + + if (cum_io != n) { + printf( + "VFSread_seg segment has not been read properly by exec() \n"); + return(EIO); + } + + if ((r = sys_vircopy(VFS_PROC_NR, D, (vir_bytes)buf, proc_e, + seg, seg_addr + o, n)) != OK) { + printf("VFS: read_seg: copy failed (text)\n"); + return(r); + } + + o += n; + } + return(OK); + } else if (seg == D) { + + if ((r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, cvul64(off), READING, + proc_e, (char*)seg_addr, seg_bytes, + &new_pos, &cum_io)) != OK) { + printf("VFS: read_seg: req_readwrite failed (data)\n"); + return(r); + } + + if (r == OK && cum_io != seg_bytes) + printf("VFS: read_seg segment has not been read properly by exec()\n"); + + return(r); + } + + return(OK); +} + + +/*===========================================================================* + * clo_exec * + *===========================================================================*/ +PRIVATE void clo_exec(struct fproc *rfp) +{ +/* Files can be marked with the FD_CLOEXEC bit (in fp->fp_cloexec). + */ + int i; + + /* Check the file desriptors one by one for presence of FD_CLOEXEC. */ + for (i = 0; i < OPEN_MAX; i++) + if ( FD_ISSET(i, &rfp->fp_cloexec_set)) + (void) close_fd(rfp, i); +} + +/*===========================================================================* + * map_header * + *===========================================================================*/ +PRIVATE int map_header(char **exec_hdr, const struct vnode *vp) +{ + int r; + u64_t new_pos; + unsigned int cum_io; + off_t pos; + + pos = 0; /* Read from the start of the file */ + + r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, cvul64(pos), READING, + VFS_PROC_NR, hdr, MIN(vp->v_size, PAGE_SIZE), + &new_pos, &cum_io); + if (r != OK) { + printf("VFS: exec: map_header: req_readwrite failed\n"); + return(r); + } + + *exec_hdr = hdr; + return(OK); +} diff --git a/servers/avfs/exec.h b/servers/avfs/exec.h new file mode 100644 index 000000000..32114d6ab --- /dev/null +++ b/servers/avfs/exec.h @@ -0,0 +1,19 @@ +#ifndef _VFS_EXEC_H_ +#define _VFS_EXEC_H_ 1 + +struct exec_info { + int proc_e; /* Process endpoint */ + char *hdr; /* Exec file's header */ + vir_bytes pc; /* Entry point of exec file */ + vir_bytes stack_top; /* Top of the stack */ + vir_bytes frame_len; /* Stack size */ + uid_t new_uid; /* Process UID after exec */ + gid_t new_gid; /* Process GID after exec */ + int load_text; /* Load text section? */ + int allow_setuid; /* Allow setuid execution? */ + struct vnode *vp; /* Exec file's vnode */ + struct stat sb; /* Exec file's stat structure */ + char progname[PROC_NAME_LEN]; /* Program name */ +}; + +#endif /* !_VFS_EXEC_H_ */ diff --git a/servers/avfs/file.h b/servers/avfs/file.h new file mode 100644 index 000000000..52a5773c6 --- /dev/null +++ b/servers/avfs/file.h @@ -0,0 +1,48 @@ +#ifndef __VFS_FILE_H__ +#define __VFS_FILE_H__ + +/* This is the filp table. It is an intermediary between file descriptors and + * inodes. A slot is free if filp_count == 0. + */ + +EXTERN struct filp { + mode_t filp_mode; /* RW bits, telling how file is opened */ + int filp_flags; /* flags from open and fcntl */ + int filp_state; /* state for crash recovery */ + int filp_count; /* how many file descriptors share this slot?*/ + struct vnode *filp_vno; /* vnode belonging to this file */ + u64_t filp_pos; /* file position */ + mutex_t filp_lock; /* lock to gain exclusive access */ + struct fproc *filp_softlock; /* if not NULL; this filp didn't lock the + * vnode. Another filp already holds a lock + * for this thread */ + + /* the following fields are for select() and are owned by the generic + * select() code (i.e., fd-type-specific select() code can't touch these). + */ + int filp_selectors; /* select()ing processes blocking on this fd */ + int filp_select_ops; /* interested in these SEL_* operations */ + int filp_select_flags; /* Select flags for the filp */ + + /* following are for fd-type-specific select() */ + int filp_pipe_select_ops; +} filp[NR_FILPS]; + +#define FILP_CLOSED 0 /* filp_mode: associated device closed */ + +#define FS_NORMAL 0 /* file descriptor can be used normally */ +#define FS_NEEDS_REOPEN 1 /* file descriptor needs to be re-opened */ + +#define FSF_UPDATE 001 /* The driver should be informed about new + * state. + */ +#define FSF_BUSY 002 /* Select operation sent to driver but no + * reply yet. + */ +#define FSF_RD_BLOCK 010 /* Read request is blocking, the driver should + * keep state. + */ +#define FSF_WR_BLOCK 020 /* Write request is blocking */ +#define FSF_ERR_BLOCK 040 /* Exception request is blocking */ +#define FSF_BLOCKED 070 +#endif diff --git a/servers/avfs/filedes.c b/servers/avfs/filedes.c new file mode 100644 index 000000000..dd788eaf9 --- /dev/null +++ b/servers/avfs/filedes.c @@ -0,0 +1,556 @@ +/* This file contains the procedures that manipulate file descriptors. + * + * The entry points into this file are + * get_fd: look for free file descriptor and free filp slots + * get_filp: look up the filp entry for a given file descriptor + * find_filp: find a filp slot that points to a given vnode + * inval_filp: invalidate a filp and associated fd's, only let close() + * happen on it + * do_verify_fd: verify whether the given file descriptor is valid for + * the given endpoint. + * do_set_filp: marks a filp as in-flight. + * do_copy_filp: copies a filp to another endpoint. + * do_put_filp: marks a filp as not in-flight anymore. + * do_cancel_fd: cancel the transaction when something goes wrong for + * the receiver. + */ + +#include +#include +#include +#include +#include "fs.h" +#include "file.h" +#include "fproc.h" +#include "vnode.h" + + +FORWARD _PROTOTYPE( filp_id_t verify_fd, (endpoint_t ep, int fd) ); + +#if LOCK_DEBUG +/*===========================================================================* + * check_filp_locks * + *===========================================================================*/ +PUBLIC void check_filp_locks_by_me(void) +{ +/* Check whether this thread still has filp locks held */ + struct filp *f; + int r; + + for (f = &filp[0]; f < &filp[NR_FILPS]; f++) { + r = mutex_trylock(&f->filp_lock); + if (r == -EDEADLK) + panic("Thread %d still holds filp lock on filp %p call_nr=%d\n", + mthread_self(), f, call_nr); + else if (r == 0) { + /* We just obtained the lock, release it */ + mutex_unlock(&f->filp_lock); + } + } +} +#endif + +/*===========================================================================* + * check_filp_locks * + *===========================================================================*/ +PUBLIC void check_filp_locks(void) +{ + struct filp *f; + int r, count = 0; + + for (f = &filp[0]; f < &filp[NR_FILPS]; f++) { + r = mutex_trylock(&f->filp_lock); + if (r == -EBUSY) { + /* Mutex is still locked */ + count++; + } else if (r == 0) { + /* We just obtained a lock, don't want it */ + mutex_unlock(&f->filp_lock); + } else + panic("filp_lock weird state"); + } + if (count) panic("locked filps"); +#if 0 + else printf("check_filp_locks OK\n"); +#endif +} + +/*===========================================================================* + * init_filps * + *===========================================================================*/ +PUBLIC void init_filps(void) +{ +/* Initialize filps */ + struct filp *f; + + for (f = &filp[0]; f < &filp[NR_FILPS]; f++) { + mutex_init(&f->filp_lock, NULL); + } + +} + +/*===========================================================================* + * get_fd * + *===========================================================================*/ +PUBLIC int get_fd(int start, mode_t bits, int *k, struct filp **fpt) +{ +/* Look for a free file descriptor and a free filp slot. Fill in the mode word + * in the latter, but don't claim either one yet, since the open() or creat() + * may yet fail. + */ + + register struct filp *f; + register int i; + + /* Search the fproc fp_filp table for a free file descriptor. */ + for (i = start; i < OPEN_MAX; i++) { + if (fp->fp_filp[i] == NULL && !FD_ISSET(i, &fp->fp_filp_inuse)) { + /* A file descriptor has been located. */ + *k = i; + break; + } + } + + /* Check to see if a file descriptor has been found. */ + if (i >= OPEN_MAX) return(EMFILE); + + /* If we don't care about a filp, return now */ + if (fpt == NULL) return(OK); + + /* Now that a file descriptor has been found, look for a free filp slot. */ + for (f = &filp[0]; f < &filp[NR_FILPS]; f++) { + assert(f->filp_count >= 0); + if (f->filp_count == 0 && mutex_trylock(&f->filp_lock) == 0) { + if (verbose) printf("get_fd: locking filp=%p\n", f); + f->filp_mode = bits; + f->filp_pos = cvu64(0); + f->filp_selectors = 0; + f->filp_select_ops = 0; + f->filp_pipe_select_ops = 0; + f->filp_flags = 0; + f->filp_state = FS_NORMAL; + f->filp_select_flags = 0; + f->filp_softlock = NULL; + *fpt = f; + return(OK); + } + } + + /* If control passes here, the filp table must be full. Report that back. */ + return(ENFILE); +} + + +/*===========================================================================* + * get_filp * + *===========================================================================*/ +PUBLIC struct filp *get_filp(fild, locktype) +int fild; /* file descriptor */ +tll_access_t locktype; +{ +/* See if 'fild' refers to a valid file descr. If so, return its filp ptr. */ + + return get_filp2(fp, fild, locktype); +} + + +/*===========================================================================* + * get_filp2 * + *===========================================================================*/ +PUBLIC struct filp *get_filp2(rfp, fild, locktype) +register struct fproc *rfp; +int fild; /* file descriptor */ +tll_access_t locktype; +{ +/* See if 'fild' refers to a valid file descr. If so, return its filp ptr. */ + struct filp *filp; + + err_code = EBADF; + if (fild < 0 || fild >= OPEN_MAX ) return(NULL); + if (rfp->fp_filp[fild] == NULL && FD_ISSET(fild, &rfp->fp_filp_inuse)) + err_code = EIO; /* The filedes is not there, but is not closed either. + */ + if ((filp = rfp->fp_filp[fild]) != NULL) lock_filp(filp, locktype); + + return(filp); /* may also be NULL */ +} + + +/*===========================================================================* + * find_filp * + *===========================================================================*/ +PUBLIC struct filp *find_filp(struct vnode *vp, mode_t bits) +{ +/* Find a filp slot that refers to the vnode 'vp' in a way as described + * by the mode bit 'bits'. Used for determining whether somebody is still + * interested in either end of a pipe. Also used when opening a FIFO to + * find partners to share a filp field with (to shared the file position). + * Like 'get_fd' it performs its job by linear search through the filp table. + */ + + struct filp *f; + + for (f = &filp[0]; f < &filp[NR_FILPS]; f++) { + if (f->filp_count != 0 && f->filp_vno == vp && (f->filp_mode & bits)) { + return(f); + } + } + + /* If control passes here, the filp wasn't there. Report that back. */ + return(NULL); +} + +/*===========================================================================* + * invalidate * + *===========================================================================*/ +PUBLIC int invalidate(struct filp *fp) +{ +/* Invalidate filp. fp_filp_inuse is not cleared, so filp can't be reused + until it is closed first. */ + + int f, fd, n = 0; + for(f = 0; f < NR_PROCS; f++) { + if(fproc[f].fp_pid == PID_FREE) continue; + for(fd = 0; fd < OPEN_MAX; fd++) { + if(fproc[f].fp_filp[fd] && fproc[f].fp_filp[fd] == fp) { + fproc[f].fp_filp[fd] = NULL; + n++; + } + } + } + + return(n); /* Report back how often this filp has been invalidated. */ +} + +/*===========================================================================* + * lock_filp * + *===========================================================================*/ +PUBLIC void lock_filp(filp, locktype) +struct filp *filp; +tll_access_t locktype; +{ + message org_m_in; + struct fproc *org_fp; + struct worker_thread *org_self; + struct vnode *vp; + + assert(filp->filp_count > 0); + vp = filp->filp_vno; + assert(vp != NULL); + + if (verbose) + printf("lock_filp: filp=%p locking vnode %p with locktype %d\n", filp, + vp, locktype); + + /* Lock vnode only if we haven't already locked it. If already locked by us, + * we're allowed to have one additional 'soft' lock. */ + if (tll_locked_by_me(&vp->v_lock)) { + assert(filp->filp_softlock == NULL); + filp->filp_softlock = fp; + } else { + lock_vnode(vp, locktype); + } + + assert(vp->v_ref_count > 0); /* vnode still in use? */ + assert(filp->filp_vno == vp); /* vnode still what we think it is? */ + assert(filp->filp_count > 0); /* filp still in use? */ + + /* First try to get filp lock right off the bat */ + if (mutex_trylock(&filp->filp_lock) != 0) { + + /* Already in use, let's wait for our turn */ + org_m_in = m_in; + org_fp = fp; + org_self = self; + assert(mutex_lock(&filp->filp_lock) == 0); + m_in = org_m_in; + fp = org_fp; + self = org_self; + } + + assert(filp->filp_count > 0); /* Yet again; filp still in use? */ +} + +/*===========================================================================* + * unlock_filp * + *===========================================================================*/ +PUBLIC void unlock_filp(filp) +struct filp *filp; +{ + /* If this filp holds a soft lock on the vnode, we must be the owner */ + if (filp->filp_softlock != NULL) + assert(filp->filp_softlock == fp); + + if (filp->filp_count > 0) { + /* Only unlock vnode if filp is still in use */ + + /* and if we don't hold a soft lock */ + if (filp->filp_softlock == NULL) { + assert(tll_islocked(&(filp->filp_vno->v_lock))); + unlock_vnode(filp->filp_vno); + } + } + + filp->filp_softlock = NULL; + assert(mutex_unlock(&filp->filp_lock) == 0); +} + +/*===========================================================================* + * unlock_filps * + *===========================================================================*/ +PUBLIC void unlock_filps(filp1, filp2) +struct filp *filp1; +struct filp *filp2; +{ +/* Unlock two filps that are tied to the same vnode. As a thread can lock a + * vnode only once, unlocking the vnode twice would result in an error. */ + + /* No NULL pointers and not equal */ + assert(filp1); + assert(filp2); + assert(filp1 != filp2); + + /* Must be tied to the same vnode and not NULL */ + assert(filp1->filp_vno == filp2->filp_vno); + assert(filp1->filp_vno != NULL); + + if (filp1->filp_count > 0 && filp2->filp_count > 0) { + /* Only unlock vnode if filps are still in use */ + unlock_vnode(filp1->filp_vno); + } + + filp1->filp_softlock = NULL; + filp2->filp_softlock = NULL; + assert(mutex_unlock(&filp2->filp_lock) == 0); + assert(mutex_unlock(&filp1->filp_lock) == 0); +} + +/*===========================================================================* + * verify_fd * + *===========================================================================*/ +PRIVATE filp_id_t verify_fd(ep, fd) +endpoint_t ep; +int fd; +{ +/* Verify whether the file descriptor 'fd' is valid for the endpoint 'ep'. When + * the file descriptor is valid, verify_fd returns a pointer to that filp, else + * it returns NULL. + */ + int slot; + struct filp *rfilp; + + if (isokendpt(ep, &slot) != OK) + return(NULL); + + rfilp = get_filp2(&fproc[slot], fd, VNODE_READ); + + return(rfilp); +} + +/*===========================================================================* + * do_verify_fd * + *===========================================================================*/ +PUBLIC int do_verify_fd(void) +{ + struct filp *rfilp; + rfilp = (struct filp *) verify_fd(m_in.USER_ENDPT, m_in.COUNT); + m_out.ADDRESS = (void *) rfilp; + if (rfilp != NULL) unlock_filp(rfilp); + return (rfilp != NULL) ? OK : EINVAL; +} + +/*===========================================================================* + * set_filp * + *===========================================================================*/ +PUBLIC int set_filp(sfilp) +filp_id_t sfilp; +{ + if (sfilp == NULL) return(EINVAL); + + lock_filp(sfilp, VNODE_READ); + sfilp->filp_count++; + unlock_filp(sfilp); + + return(OK); +} + +/*===========================================================================* + * do_set_filp * + *===========================================================================*/ +PUBLIC int do_set_filp(void) +{ + return set_filp((filp_id_t) m_in.ADDRESS); +} + +/*===========================================================================* + * copy_filp * + *===========================================================================*/ +PUBLIC int copy_filp(to_ep, cfilp) +endpoint_t to_ep; +filp_id_t cfilp; +{ + int fd; + int slot; + struct fproc *rfp; + + if (isokendpt(to_ep, &slot) != OK) return(EINVAL); + rfp = &fproc[slot]; + + /* Find an open slot in fp_filp */ + for (fd = 0; fd < OPEN_MAX; fd++) { + if (rfp->fp_filp[fd] == NULL && + !FD_ISSET(fd, &rfp->fp_filp_inuse)) { + + /* Found a free slot, add descriptor */ + FD_SET(fd, &rfp->fp_filp_inuse); + rfp->fp_filp[fd] = cfilp; + rfp->fp_filp[fd]->filp_count++; + return(fd); + } + } + + /* File descriptor table is full */ + return(EMFILE); +} + +/*===========================================================================* + * do_copy_filp * + *===========================================================================*/ +PUBLIC int do_copy_filp(void) +{ + return copy_filp(m_in.USER_ENDPT, (filp_id_t) m_in.ADDRESS); +} + +/*===========================================================================* + * put_filp * + *===========================================================================*/ +PUBLIC int put_filp(pfilp) +filp_id_t pfilp; +{ + if (pfilp == NULL) { + return EINVAL; + } else { + lock_filp(pfilp, VNODE_OPCL); + close_filp(pfilp); + return(OK); + } +} + +/*===========================================================================* + * do_put_filp * + *===========================================================================*/ +PUBLIC int do_put_filp(void) +{ + return put_filp((filp_id_t) m_in.ADDRESS); +} + +/*===========================================================================* + * cancel_fd * + *===========================================================================*/ +PUBLIC int cancel_fd(ep, fd) +endpoint_t ep; +int fd; +{ + int slot; + struct fproc *rfp; + struct filp *rfilp; + + if (isokendpt(ep, &slot) != OK) return(EINVAL); + rfp = &fproc[slot]; + + /* Check that the input 'fd' is valid */ + rfilp = (struct filp *) verify_fd(ep, fd); + if (rfilp != NULL) { + /* Found a valid descriptor, remove it */ + FD_CLR(fd, &rfp->fp_filp_inuse); + if (rfp->fp_filp[fd]->filp_count == 0) { + unlock_filp(rfilp); + printf("VFS: filp_count for slot %d fd %d already zero", slot, + fd); + return(EINVAL); + } + rfp->fp_filp[fd]->filp_count--; + rfp->fp_filp[fd] = NULL; + unlock_filp(rfilp); + return(fd); + } + + /* File descriptor is not valid for the endpoint. */ + return(EINVAL); +} + +/*===========================================================================* + * do_cancel_fd * + *===========================================================================*/ +PUBLIC int do_cancel_fd(void) +{ + return cancel_fd(m_in.USER_ENDPT, m_in.COUNT); +} + +/*===========================================================================* + * close_filp * + *===========================================================================*/ +PUBLIC void close_filp(f) +struct filp *f; +{ +/* Close a file. Will also unlock filp when done */ + + int mode_word, rw; + dev_t dev; + struct vnode *vp; + + /* Must be locked */ + assert(mutex_trylock(&f->filp_lock) == -EDEADLK); + assert(tll_islocked(&f->filp_vno->v_lock)); + + vp = f->filp_vno; + + if (f->filp_count - 1 == 0 && f->filp_mode != FILP_CLOSED) { + /* Check to see if the file is special. */ + mode_word = vp->v_mode & I_TYPE; + if (mode_word == I_CHAR_SPECIAL || mode_word == I_BLOCK_SPECIAL) { + dev = (dev_t) vp->v_sdev; + if (mode_word == I_BLOCK_SPECIAL) { + lock_bsf(); + if (vp->v_bfs_e == ROOT_FS_E) { + /* Invalidate the cache unless the special is + * mounted. Assume that the root filesystem's + * is open only for fsck. + */ + req_flush(vp->v_bfs_e, dev); + } + unlock_bsf(); + } + /* Do any special processing on device close. */ + (void) dev_close(dev, f-filp); /* Ignore any errors, even + * SUSPEND. */ + f->filp_mode = FILP_CLOSED; + } + } + + /* If the inode being closed is a pipe, release everyone hanging on it. */ + if (vp->v_pipe == I_PIPE) { + rw = (f->filp_mode & R_BIT ? WRITE : READ); + release(vp, rw, NR_PROCS); + } + + /* If a write has been done, the inode is already marked as DIRTY. */ + if (--f->filp_count == 0) { + if (vp->v_pipe == I_PIPE) { + /* Last reader or writer is going. Tell PFS about latest + * pipe size. + */ + truncate_vnode(vp, vp->v_size); + } + + unlock_vnode(f->filp_vno); + put_vnode(f->filp_vno); + } else if (f->filp_count < 0) { + panic("VFS: invalid filp count: %d ino %d/%d", f->filp_count, + vp->v_dev, vp->v_inode_nr); + } else { + unlock_vnode(f->filp_vno); + } + + mutex_unlock(&f->filp_lock); +} diff --git a/servers/avfs/fproc.h b/servers/avfs/fproc.h new file mode 100644 index 000000000..465c8ed64 --- /dev/null +++ b/servers/avfs/fproc.h @@ -0,0 +1,72 @@ +#ifndef __VFS_FPROC_H__ +#define __VFS_FPROC_H__ + +#include "threads.h" + +#include +#include + +/* This is the per-process information. A slot is reserved for each potential + * process. Thus NR_PROCS must be the same as in the kernel. It is not + * possible or even necessary to tell when a slot is free here. + */ +#define LOCK_DEBUG 0 +EXTERN struct fproc { + unsigned fp_flags; + + pid_t fp_pid; /* process id */ + endpoint_t fp_endpoint; /* kernel endpoint number of this process */ + + struct vnode *fp_wd; /* working directory; NULL during reboot */ + struct vnode *fp_rd; /* root directory; NULL during reboot */ + + struct filp *fp_filp[OPEN_MAX];/* the file descriptor table */ + fd_set fp_filp_inuse; /* which fd's are in use? */ + fd_set fp_cloexec_set; /* bit map for POSIX Table 6-2 FD_CLOEXEC */ + + dev_t fp_tty; /* major/minor of controlling tty */ + int fp_block_fd; /* place to save fd if rd/wr can't finish */ + int fp_block_callnr; /* blocked call if rd/wr can't finish */ + char *fp_buffer; /* place to save buffer if rd/wr can't finish*/ + int fp_nbytes; /* place to save bytes if rd/wr can't finish */ + int fp_cum_io_partial; /* partial byte count if rd/wr can't finish */ + endpoint_t fp_task; /* which task is proc suspended on */ + int fp_blocked_on; /* what is it blocked on */ + endpoint_t fp_ioproc; /* proc no. in suspended-on i/o message */ + + cp_grant_id_t fp_grant; /* revoke this grant on unsuspend if > -1 */ + + uid_t fp_realuid; /* real user id */ + uid_t fp_effuid; /* effective user id */ + gid_t fp_realgid; /* real group id */ + gid_t fp_effgid; /* effective group id */ + int fp_ngroups; /* number of supplemental groups */ + gid_t fp_sgroups[NGROUPS_MAX];/* supplemental groups */ + mode_t fp_umask; /* mask set by umask system call */ + message *fp_sendrec; /* request/reply to/from FS/driver */ + mutex_t fp_lock; /* mutex to lock fproc object */ + struct job fp_job; /* pending job */ + thread_t fp_wtid; /* Thread ID of worker */ +#if LOCK_DEBUG + int fp_vp_rdlocks; /* number of read-only locks on vnodes */ + int fp_vmnt_rdlocks; /* number of read-only locks on vmnts */ +#endif +} fproc[NR_PROCS]; + +/* fp_flags */ +#define FP_NOFLAGS 00 +#define FP_SUSP_REOPEN 01 /* Process is suspended until the reopens are + * completed (after the restart of a driver). + */ +#define FP_REVIVED 02 /* Indicates process is being revived */ +#define FP_SESLDR 04 /* Set if process is session leader */ +#define FP_PENDING 010 /* Set if process has pending work */ +#define FP_EXITING 020 /* Set if process is exiting */ +#define FP_PM_PENDING 040 /* Set if process has pending PM request */ + +/* Field values. */ +#define NOT_REVIVING 0xC0FFEEE /* process is not being revived */ +#define REVIVING 0xDEEAD /* process is being revived from suspension */ +#define PID_FREE 0 /* process slot free */ + +#endif /* __VFS_FPROC_H__ */ diff --git a/servers/avfs/fs.h b/servers/avfs/fs.h new file mode 100644 index 000000000..9531c2207 --- /dev/null +++ b/servers/avfs/fs.h @@ -0,0 +1,51 @@ +#ifndef __VFS_FS_H__ +#define __VFS_FS_H__ + +/* This is the master header for fs. It includes some other files + * and defines the principal constants. + */ +#define _POSIX_SOURCE 1 /* tell headers to include POSIX stuff */ +#define _MINIX 1 /* tell headers to include MINIX stuff */ +#define _SYSTEM 1 /* tell headers that this is the kernel */ + +#define DO_SANITYCHECKS 0 + +#if DO_SANITYCHECKS +#define SANITYCHECK do { \ + if(!check_vrefs() || !check_pipe()) { \ + printf("VFS:%s:%d: call_nr %d who_e %d\n", \ + __FILE__, __LINE__, call_nr, who_e); \ + panic("sanity check failed"); \ + } \ +} while(0) +#else +#define SANITYCHECK +#endif + +/* The following are so basic, all the *.c files get them automatically. */ +#include /* MUST be first */ +#include /* MUST be second */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include "const.h" +#include "dmap.h" +#include "proto.h" +#include "threads.h" +#include "glo.h" +#include "comm.h" +#include "vmnt.h" + +#endif diff --git a/servers/avfs/fscall.c b/servers/avfs/fscall.c new file mode 100644 index 000000000..2ef4f62d7 --- /dev/null +++ b/servers/avfs/fscall.c @@ -0,0 +1,136 @@ +/* This file handles nested counter-request calls to VFS sent by file system + * (FS) servers in response to VFS requests. + * + * The entry points into this file are + * nested_fs_call perform a nested call from a file system server + * nested_dev_call perform a nested call from a device driver server + * + */ + +#include "fs.h" +#include "fproc.h" +#include +#include +#include +#include +#include + +/* maximum nested call stack depth */ +#define MAX_DEPTH 1 + +/* global variables stack */ +PRIVATE struct { + struct fproc *g_fp; /* pointer to caller process */ + message g_m_in; /* request message */ + message g_m_out; /* reply message */ + int g_who_e; /* endpoint of caller process */ + int g_who_p; /* slot number of caller process */ + int g_call_nr; /* call number */ + int g_super_user; /* is the caller root? */ + char g_user_fullpath[PATH_MAX+1]; /* path to look up */ +} globals[MAX_DEPTH]; + +PRIVATE int depth = 0; /* current globals stack level */ + +#if ENABLE_SYSCALL_STATS +EXTERN unsigned long calls_stats[NCALLS]; +#endif + +FORWARD _PROTOTYPE( int push_globals, (void) ); +FORWARD _PROTOTYPE( void pop_globals, (void) ); +FORWARD _PROTOTYPE( void set_globals, (message *m) ); + +/*===========================================================================* + * push_globals * + *===========================================================================*/ +PRIVATE int push_globals() +{ +/* Save the global variables of the current call onto the globals stack. + */ + + if (depth == MAX_DEPTH) + return(EPERM); + + globals[depth].g_fp = fp; + globals[depth].g_m_in = m_in; + globals[depth].g_m_out = m_out; + globals[depth].g_super_user = super_user; + + /* err_code is not used across blocking calls */ + depth++; + return(OK); +} + +/*===========================================================================* + * pop_globals * + *===========================================================================*/ +PRIVATE void pop_globals() +{ +/* Restore the global variables of a call from the globals stack. + */ + + if (depth == 0) + panic("Popping from empty globals stack!"); + + depth--; + + fp = globals[depth].g_fp; + m_in = globals[depth].g_m_in; + m_out = globals[depth].g_m_out; + +} + +/*===========================================================================* + * set_globals * + *===========================================================================*/ +PRIVATE void set_globals(m) +message *m; /* request message */ +{ +/* Initialize global variables based on a request message. + */ + int proc_p; + + m_in = *m; + + proc_p = _ENDPOINT_P(m_in.m_source); + fp = &fproc[proc_p]; + + /* the rest need not be initialized */ +} + +/*===========================================================================* + * nested_fs_call * + *===========================================================================*/ +PUBLIC void nested_fs_call(m) +message *m; /* request/reply message pointer */ +{ +/* Handle a nested call from a file system server. + */ + int r; + + /* Save global variables of the current call */ + if ((r = push_globals()) != OK) { + printf("VFS: error saving global variables in call %d from FS %d\n", + m->m_type, m->m_source); + } else { + /* Initialize global variables for the nested call */ + set_globals(m); + + /* Perform the nested call - only getsysinfo() is allowed right now */ + if (call_nr == COMMON_GETSYSINFO) { + r = do_getsysinfo(); + } else { + printf("VFS: invalid nested call %d from FS %d\n", call_nr, + who_e); + + r = ENOSYS; + } + + /* Store the result, and restore original global variables */ + *m = m_out; + + pop_globals(); + } + + m->m_type = r; +} diff --git a/servers/avfs/gcov.c b/servers/avfs/gcov.c new file mode 100644 index 000000000..50bed6451 --- /dev/null +++ b/servers/avfs/gcov.c @@ -0,0 +1,66 @@ + +#include "fs.h" +#include "file.h" +#include "fproc.h" + +_PROTOTYPE( int gcov_flush, (cp_grant_id_t grantid, size_t size )); + +/*===========================================================================* + * do_gcov_flush * + *===========================================================================*/ +PUBLIC int do_gcov_flush() +{ +/* A userland tool has requested the gcov data from another + * process (possibly vfs itself). Grant the target process + * access to the supplied buffer, and perform the call that + * makes the target copy its buffer to the caller (incl vfs + * itself). + */ + struct fproc *rfp; + ssize_t size; + cp_grant_id_t grantid; + int r, n; + pid_t target; + message m; + + size = m_in.GCOV_BUFF_SZ; + target = m_in.GCOV_PID; + + /* If the wrong process is sent to, the system hangs; so make this root-only. + */ + + if (!super_user) return(EPERM); + + /* Find target gcov process. */ + for(n = 0; n < NR_PROCS; n++) { + if(fproc[n].fp_endpoint != NONE && fproc[n].fp_pid == target) + break; + } + if(n >= NR_PROCS) { + printf("VFS: gcov process %d not found\n", target); + return(ESRCH); + } + rfp = &fproc[n]; + + /* Grant target process to requestor's buffer. */ + if ((grantid = cpf_grant_magic(rfp->fp_endpoint, who_e, + (vir_bytes) m_in.GCOV_BUFF_P, size, + CPF_WRITE)) < 0) { + printf("VFS: gcov_flush: grant failed\n"); + return(ENOMEM); + } + + if(rfp->fp_endpoint == VFS_PROC_NR) { + /* Request is for VFS itself. */ + r = gcov_flush(grantid, size); + } else { + /* Perform generic GCOV request. */ + m.GCOV_GRANT = grantid; + m.GCOV_BUFF_SZ = size; + r = _taskcall(rfp->fp_endpoint, COMMON_REQ_GCOV_DATA, &m); + } + + cpf_revoke(grantid); + + return(r); +} diff --git a/servers/avfs/glo.h b/servers/avfs/glo.h new file mode 100644 index 000000000..ed50c1b5a --- /dev/null +++ b/servers/avfs/glo.h @@ -0,0 +1,57 @@ +#ifndef __VFS_GLO_H__ +#define __VFS_GLO_H__ + +/* EXTERN should be extern except for the table file */ +#ifdef _TABLE +#undef EXTERN +#define EXTERN +#endif + +/* File System global variables */ +EXTERN struct fproc *fp; /* pointer to caller's fproc struct */ +EXTERN int susp_count; /* number of procs suspended on pipe */ +EXTERN int nr_locks; /* number of locks currently in place */ +EXTERN int reviving; /* number of pipe processes to be revived */ +EXTERN int pending; +EXTERN int sending; + +EXTERN dev_t ROOT_DEV; /* device number of the root device */ +EXTERN int ROOT_FS_E; /* kernel endpoint of the root FS proc */ +EXTERN u32_t system_hz; /* system clock frequency. */ + +/* The parameters of the call are kept here. */ +EXTERN message m_in; /* the input message itself */ +EXTERN message m_out; /* the output message used for reply */ +# define who_p ((int) (fp - fproc)) +# define isokslot(p) (p >= 0 && \ + p < (int)(sizeof(fproc) / sizeof(struct fproc))) +#if 0 +# define who_e (isokslot(who_p) ? fp->fp_endpoint : m_in.m_source) +#else +# define who_e (isokslot(who_p) && fp->fp_endpoint != NONE ? \ + fp->fp_endpoint : m_in.m_source) +#endif +# define call_nr (m_in.m_type) +# define super_user (fp->fp_effuid == SU_UID ? 1 : 0) +EXTERN struct worker_thread *self; +EXTERN endpoint_t receive_from;/* endpoint with pending reply */ +EXTERN int force_sync; /* toggle forced synchronous communication */ +EXTERN int verbose; +EXTERN int deadlock_resolving; +EXTERN mutex_t exec_lock; +EXTERN mutex_t bsf_lock;/* Global lock for access to block special files */ +EXTERN struct worker_thread workers[NR_WTHREADS]; +EXTERN struct worker_thread sys_worker; +EXTERN struct worker_thread dl_worker; +EXTERN char mount_label[LABEL_MAX]; /* label of file system to mount */ + +/* The following variables are used for returning results to the caller. */ +EXTERN int err_code; /* temporary storage for error number */ + +/* Data initialized elsewhere. */ +extern _PROTOTYPE (int (*call_vec[]), (void) ); /* sys call table */ +extern _PROTOTYPE (int (*pfs_call_vec[]), (void) ); /* pfs callback table */ +extern char dot1[2]; /* dot1 (&dot1[0]) and dot2 (&dot2[0]) have a special */ +extern char dot2[3]; /* meaning to search_dir: no access permission check. */ + +#endif diff --git a/servers/avfs/job.h b/servers/avfs/job.h new file mode 100644 index 000000000..bfcdc5b9e --- /dev/null +++ b/servers/avfs/job.h @@ -0,0 +1,11 @@ +#ifndef __VFS_WORK_H__ +#define __VFS_WORK_H__ + +struct job { + struct fproc *j_fp; + message j_m_in; + void *(*j_func)(void *arg); + struct job *j_next; +}; + +#endif diff --git a/servers/avfs/link.c b/servers/avfs/link.c new file mode 100644 index 000000000..90bf4a148 --- /dev/null +++ b/servers/avfs/link.c @@ -0,0 +1,455 @@ +/* This file handles the LINK and UNLINK system calls. It also deals with + * deallocating the storage used by a file when the last UNLINK is done to a + * file and the blocks must be returned to the free block pool. + * + * The entry points into this file are + * do_link: perform the LINK system call + * do_unlink: perform the UNLINK and RMDIR system calls + * do_rename: perform the RENAME system call + * do_truncate: perform the TRUNCATE system call + * do_ftruncate: perform the FTRUNCATE system call + * do_rdlink: perform the RDLNK system call + */ + +#include "fs.h" +#include +#include +#include +#include +#include +#include +#include +#include "file.h" +#include "fproc.h" +#include "path.h" +#include "vnode.h" +#include "param.h" + +/*===========================================================================* + * do_link * + *===========================================================================*/ +PUBLIC int do_link() +{ +/* Perform the link(name1, name2) system call. */ + int r = OK; + struct vnode *vp = NULL, *dirp = NULL; + struct vmnt *vmp1 = NULL, *vmp2 = NULL; + char fullpath[PATH_MAX+1]; + struct lookup resolve; + + lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp1, &vp); + resolve.l_vmnt_lock = VMNT_WRITE; + resolve.l_vnode_lock = VNODE_READ; + + /* See if 'name1' (file to be linked to) exists. */ + if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK) + return(err_code); + if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code); + + /* Does the final directory of 'name2' exist? */ + lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp2, &dirp); + resolve.l_vmnt_lock = VMNT_READ; + resolve.l_vnode_lock = VNODE_READ; + if (fetch_name(m_in.name2, m_in.name2_length, M1, fullpath) != OK) + r = err_code; + else if ((dirp = last_dir(&resolve, fp)) == NULL) + r = err_code; + + if (r != OK) { + unlock_vnode(vp); + unlock_vmnt(vmp1); + put_vnode(vp); + return(r); + } + + /* Check for links across devices. */ + if (vp->v_fs_e != dirp->v_fs_e) + r = EXDEV; + else + r = forbidden(dirp, W_BIT | X_BIT); + + if (r == OK) + r = req_link(vp->v_fs_e, dirp->v_inode_nr, fullpath, + vp->v_inode_nr); + + unlock_vnode(vp); + unlock_vnode(dirp); + if (vmp2 != NULL) unlock_vmnt(vmp2); + unlock_vmnt(vmp1); + put_vnode(vp); + put_vnode(dirp); + return(r); +} + + +/*===========================================================================* + * do_unlink * + *===========================================================================*/ +PUBLIC int do_unlink() +{ +/* Perform the unlink(name) or rmdir(name) system call. The code for these two + * is almost the same. They differ only in some condition testing. Unlink() + * may be used by the superuser to do dangerous things; rmdir() may not. + */ + struct vnode *dirp, *vp; + struct vmnt *vmp, *vmp2; + int r; + char fullpath[PATH_MAX+1]; + struct lookup resolve; + + lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &dirp); + resolve.l_vmnt_lock = VMNT_WRITE; + resolve.l_vnode_lock = VNODE_READ; + + /* Get the last directory in the path. */ + if (fetch_name(m_in.name, m_in.name_length, M3, fullpath) != OK) + return(err_code); + + if ((dirp = last_dir(&resolve, fp)) == NULL) return(err_code); + + /* Make sure that the object is a directory */ + if ((dirp->v_mode & I_TYPE) != I_DIRECTORY) { + unlock_vnode(dirp); + unlock_vmnt(vmp); + put_vnode(dirp); + return(ENOTDIR); + } + + /* The caller must have both search and execute permission */ + if ((r = forbidden(dirp, X_BIT | W_BIT)) != OK) { + unlock_vnode(dirp); + unlock_vmnt(vmp); + put_vnode(dirp); + return(r); + } + + /* Also, if the sticky bit is set, only the owner of the file or a privileged + user is allowed to unlink */ + if ((dirp->v_mode & S_ISVTX) == S_ISVTX) { + /* Look up inode of file to unlink to retrieve owner */ + resolve.l_flags = PATH_RET_SYMLINK; + resolve.l_vmp = &vmp2; /* Shouldn't actually get locked */ + resolve.l_vmnt_lock = VMNT_READ; + resolve.l_vnode = &vp; + resolve.l_vnode_lock = VNODE_READ; + vp = advance(dirp, &resolve, fp); + assert(vmp2 == NULL); + if (vp != NULL) { + if (vp->v_uid != fp->fp_effuid && fp->fp_effuid != SU_UID) + r = EPERM; + unlock_vnode(vp); + put_vnode(vp); + } else + r = err_code; + if (r != OK) { + unlock_vnode(dirp); + unlock_vmnt(vmp); + put_vnode(dirp); + return(r); + } + } + + tll_upgrade(&vmp->m_lock); + + if(call_nr == UNLINK) + r = req_unlink(dirp->v_fs_e, dirp->v_inode_nr, fullpath); + else + r = req_rmdir(dirp->v_fs_e, dirp->v_inode_nr, fullpath); + unlock_vnode(dirp); + unlock_vmnt(vmp); + put_vnode(dirp); + return(r); +} + +/*===========================================================================* + * do_rename * + *===========================================================================*/ +PUBLIC int do_rename() +{ +/* Perform the rename(name1, name2) system call. */ + int r = OK, r1; + struct vnode *old_dirp, *new_dirp = NULL, *vp; + struct vmnt *oldvmp, *newvmp, *vmp2; + char old_name[PATH_MAX+1]; + char fullpath[PATH_MAX+1]; + struct lookup resolve; + + lookup_init(&resolve, fullpath, PATH_NOFLAGS, &oldvmp, &old_dirp); + /* Do not yet request exclusive lock on vmnt to prevent deadlocks later on */ + resolve.l_vmnt_lock = VMNT_WRITE; + resolve.l_vnode_lock = VNODE_READ; + + /* See if 'name1' (existing file) exists. Get dir and file inodes. */ + if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK) + return(err_code); + if ((old_dirp = last_dir(&resolve, fp)) == NULL) + return(err_code); + + /* If the sticky bit is set, only the owner of the file or a privileged + user is allowed to rename */ + if ((old_dirp->v_mode & S_ISVTX) == S_ISVTX) { + /* Look up inode of file to unlink to retrieve owner */ + resolve.l_flags = PATH_RET_SYMLINK; + resolve.l_vmp = &vmp2; /* Shouldn't actually get locked */ + resolve.l_vmnt_lock = VMNT_READ; + resolve.l_vnode = &vp; + resolve.l_vnode_lock = VNODE_READ; + resolve.l_flags = PATH_RET_SYMLINK; + vp = advance(old_dirp, &resolve, fp); + assert(vmp2 == NULL); + if (vp != NULL) { + if(vp->v_uid != fp->fp_effuid && fp->fp_effuid != SU_UID) + r = EPERM; + unlock_vnode(vp); + put_vnode(vp); + } else + r = err_code; + if (r != OK) { + unlock_vnode(old_dirp); + unlock_vmnt(oldvmp); + put_vnode(old_dirp); + return(r); + } + } + + /* Save the last component of the old name */ + if(strlen(fullpath) >= sizeof(old_name)) { + unlock_vnode(old_dirp); + unlock_vmnt(oldvmp); + put_vnode(old_dirp); + return(ENAMETOOLONG); + } + strcpy(old_name, fullpath); + + /* See if 'name2' (new name) exists. Get dir inode */ + lookup_init(&resolve, fullpath, PATH_NOFLAGS, &newvmp, &new_dirp); + resolve.l_vmnt_lock = VMNT_READ; + resolve.l_vnode_lock = VNODE_READ; + if (fetch_name(m_in.name2, m_in.name2_length, M1, fullpath) != OK) + r = err_code; + else if ((new_dirp = last_dir(&resolve, fp)) == NULL) + r = err_code; + + if (r != OK) { + unlock_vnode(old_dirp); + unlock_vmnt(oldvmp); + put_vnode(old_dirp); + return(r); + } + + /* Both parent directories must be on the same device. */ + if (old_dirp->v_fs_e != new_dirp->v_fs_e) r = EXDEV; + + /* Parent dirs must be writable, searchable and on a writable device */ + if ((r1 = forbidden(old_dirp, W_BIT|X_BIT)) != OK || + (r1 = forbidden(new_dirp, W_BIT|X_BIT)) != OK) r = r1; + + if (r == OK) { + tll_upgrade(&oldvmp->m_lock); /* Upgrade to exclusive access */ + r = req_rename(old_dirp->v_fs_e, old_dirp->v_inode_nr, old_name, + new_dirp->v_inode_nr, fullpath); + } + unlock_vnode(old_dirp); + unlock_vnode(new_dirp); + unlock_vmnt(oldvmp); + if (newvmp) unlock_vmnt(newvmp); + + put_vnode(old_dirp); + put_vnode(new_dirp); + + return(r); +} + +/*===========================================================================* + * do_truncate * + *===========================================================================*/ +PUBLIC int do_truncate() +{ +/* truncate_vnode() does the actual work of do_truncate() and do_ftruncate(). + * do_truncate() and do_ftruncate() have to get hold of the inode, either + * by name or fd, do checks on it, and call truncate_inode() to do the + * work. + */ + struct vnode *vp; + struct vmnt *vmp; + int r; + char fullpath[PATH_MAX+1]; + struct lookup resolve; + + lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); + resolve.l_vmnt_lock = VMNT_EXCL; + resolve.l_vnode_lock = VNODE_WRITE; + + if ((off_t) m_in.flength < 0) return(EINVAL); + + /* Temporarily open file */ + if (fetch_name(m_in.m2_p1, m_in.m2_i1, M1, fullpath) != OK) return(err_code); + if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code); + + /* Ask FS to truncate the file */ + if ((r = forbidden(vp, W_BIT)) == OK) + r = truncate_vnode(vp, m_in.flength); + + unlock_vnode(vp); + unlock_vmnt(vmp); + put_vnode(vp); + return(r); +} + +/*===========================================================================* + * do_ftruncate * + *===========================================================================*/ +PUBLIC int do_ftruncate() +{ +/* As with do_truncate(), truncate_vnode() does the actual work. */ + struct filp *rfilp; + int r; + + if ((off_t) m_in.flength < 0) return(EINVAL); + + /* File is already opened; get a vnode pointer from filp */ + if ((rfilp = get_filp(m_in.m2_i1, VNODE_WRITE)) == NULL) return(err_code); + + if (!(rfilp->filp_mode & W_BIT)) + r = EBADF; + else + r = truncate_vnode(rfilp->filp_vno, m_in.flength); + + unlock_filp(rfilp); + return(r); +} + + +/*===========================================================================* + * truncate_vnode * + *===========================================================================*/ +PUBLIC int truncate_vnode(vp, newsize) +struct vnode *vp; +off_t newsize; +{ +/* Truncate a regular file or a pipe */ + int r, file_type; + + assert(tll_locked_by_me(&vp->v_lock)); + file_type = vp->v_mode & I_TYPE; + if (file_type != I_REGULAR && file_type != I_NAMED_PIPE) return(EINVAL); + if ((r = req_ftrunc(vp->v_fs_e, vp->v_inode_nr, newsize, 0)) == OK) + vp->v_size = newsize; + return(r); +} + + +/*===========================================================================* + * do_slink * + *===========================================================================*/ +PUBLIC int do_slink() +{ +/* Perform the symlink(name1, name2) system call. */ + int r; + struct vnode *vp; + struct vmnt *vmp; + char fullpath[PATH_MAX+1]; + struct lookup resolve; + + lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); + resolve.l_vmnt_lock = VMNT_WRITE; + resolve.l_vnode_lock = VNODE_READ; + + if (m_in.name1_length <= 1) return(ENOENT); + if (m_in.name1_length >= SYMLINK_MAX) return(ENAMETOOLONG); + + /* Get dir inode of 'name2' */ + if (fetch_name(m_in.name2, m_in.name2_length, M1, fullpath) != OK) + return(err_code); + + if ((vp = last_dir(&resolve, fp)) == NULL) return(err_code); + + if ((r = forbidden(vp, W_BIT|X_BIT)) == OK) { + r = req_slink(vp->v_fs_e, vp->v_inode_nr, fullpath, who_e, + m_in.name1, m_in.name1_length - 1, fp->fp_effuid, + fp->fp_effgid); + } + + unlock_vnode(vp); + unlock_vmnt(vmp); + put_vnode(vp); + + return(r); +} + +/*===========================================================================* + * rdlink_direct * + *===========================================================================*/ +PUBLIC int rdlink_direct(orig_path, link_path, rfp) +char *orig_path; +char *link_path; /* should have length PATH_MAX+1 */ +struct fproc *rfp; +{ +/* Perform a readlink()-like call from within the VFS */ + int r; + struct vnode *vp; + struct vmnt *vmp; + char fullpath[PATH_MAX+1]; + struct lookup resolve; + + lookup_init(&resolve, fullpath, PATH_RET_SYMLINK, &vmp, &vp); + resolve.l_vmnt_lock = VMNT_READ; + resolve.l_vnode_lock = VNODE_READ; + + /* Temporarily open the file containing the symbolic link */ + strncpy(fullpath, orig_path, PATH_MAX); + if ((vp = eat_path(&resolve, rfp)) == NULL) return(err_code); + + /* Make sure this is a symbolic link */ + if ((vp->v_mode & I_TYPE) != I_SYMBOLIC_LINK) + r = EINVAL; + else + r = req_rdlink(vp->v_fs_e, vp->v_inode_nr, (endpoint_t) 0, + link_path, PATH_MAX+1, 1); + + if (r > 0) link_path[r] = '\0'; /* Terminate string when succesful */ + + unlock_vnode(vp); + unlock_vmnt(vmp); + put_vnode(vp); + + return r; +} + +/*===========================================================================* + * do_rdlink * + *===========================================================================*/ +PUBLIC int do_rdlink() +{ +/* Perform the readlink(name, buf, bufsize) system call. */ + int r, copylen; + struct vnode *vp; + struct vmnt *vmp; + char fullpath[PATH_MAX+1]; + struct lookup resolve; + + lookup_init(&resolve, fullpath, PATH_RET_SYMLINK, &vmp, &vp); + resolve.l_vmnt_lock = VMNT_READ; + resolve.l_vnode_lock = VNODE_READ; + + copylen = m_in.nbytes; + if (copylen < 0) return(EINVAL); + + /* Temporarily open the file containing the symbolic link */ + if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK) + return(err_code); + if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code); + + /* Make sure this is a symbolic link */ + if ((vp->v_mode & I_TYPE) != I_SYMBOLIC_LINK) + r = EINVAL; + else + r = req_rdlink(vp->v_fs_e, vp->v_inode_nr, who_e, m_in.name2, + copylen, 0); + + unlock_vnode(vp); + unlock_vmnt(vmp); + put_vnode(vp); + + return(r); +} diff --git a/servers/avfs/lock.c b/servers/avfs/lock.c new file mode 100644 index 000000000..8b459e91f --- /dev/null +++ b/servers/avfs/lock.c @@ -0,0 +1,191 @@ +/* This file handles advisory file locking as required by POSIX. + * + * The entry points into this file are + * lock_op: perform locking operations for FCNTL system call + * lock_revive: revive processes when a lock is released + */ + +#include "fs.h" +#include +#include +#include +#include +#include "file.h" +#include "fproc.h" +#include "lock.h" +#include "vnode.h" +#include "param.h" + +/*===========================================================================* + * lock_op * + *===========================================================================*/ +PUBLIC int lock_op(f, req) +struct filp *f; +int req; /* either F_SETLK or F_SETLKW */ +{ +/* Perform the advisory locking required by POSIX. */ + + int r, ltype, i, conflict = 0, unlocking = 0; + mode_t mo; + off_t first, last; + struct flock flock; + vir_bytes user_flock; + struct file_lock *flp, *flp2, *empty; + + /* Fetch the flock structure from user space. */ + user_flock = (vir_bytes) m_in.name1; + r = sys_datacopy(who_e, (vir_bytes) user_flock, VFS_PROC_NR, + (vir_bytes) &flock, (phys_bytes) sizeof(flock)); + if (r != OK) return(EINVAL); + + /* Make some error checks. */ + ltype = flock.l_type; + mo = f->filp_mode; + if (ltype != F_UNLCK && ltype != F_RDLCK && ltype != F_WRLCK) return(EINVAL); + if (req == F_GETLK && ltype == F_UNLCK) return(EINVAL); + if ( (f->filp_vno->v_mode & I_TYPE) != I_REGULAR) return(EINVAL); + if (req != F_GETLK && ltype == F_RDLCK && (mo & R_BIT) == 0) return(EBADF); + if (req != F_GETLK && ltype == F_WRLCK && (mo & W_BIT) == 0) return(EBADF); + + /* Compute the first and last bytes in the lock region. */ + switch (flock.l_whence) { + case SEEK_SET: first = 0; break; + case SEEK_CUR: + if (ex64hi(f->filp_pos) != 0) + panic("lock_op: position in file too high"); + first = ex64lo(f->filp_pos); + break; + case SEEK_END: first = f->filp_vno->v_size; break; + default: return(EINVAL); + } + + /* Check for overflow. */ + if (((long) flock.l_start > 0) && ((first + flock.l_start) < first)) + return(EINVAL); + if (((long) flock.l_start < 0) && ((first + flock.l_start) > first)) + return(EINVAL); + first = first + flock.l_start; + last = first + flock.l_len - 1; + if (flock.l_len == 0) last = MAX_FILE_POS; + if (last < first) return(EINVAL); + + /* Check if this region conflicts with any existing lock. */ + empty = NULL; + for (flp = &file_lock[0]; flp < &file_lock[NR_LOCKS]; flp++) { + if (flp->lock_type == 0) { + if (empty == NULL) empty = flp; + continue; /* 0 means unused slot */ + } + if (flp->lock_vnode != f->filp_vno) continue; /* different file */ + if (last < flp->lock_first) continue; /* new one is in front */ + if (first > flp->lock_last) continue; /* new one is afterwards */ + if (ltype == F_RDLCK && flp->lock_type == F_RDLCK) continue; + if (ltype != F_UNLCK && flp->lock_pid == fp->fp_pid) continue; + + /* There might be a conflict. Process it. */ + conflict = 1; + if (req == F_GETLK) break; + + /* If we are trying to set a lock, it just failed. */ + if (ltype == F_RDLCK || ltype == F_WRLCK) { + if (req == F_SETLK) { + /* For F_SETLK, just report back failure. */ + return(EAGAIN); + } else { + /* For F_SETLKW, suspend the process. */ + suspend(FP_BLOCKED_ON_LOCK); + return(SUSPEND); + } + } + + /* We are clearing a lock and we found something that overlaps. */ + unlocking = 1; + if (first <= flp->lock_first && last >= flp->lock_last) { + flp->lock_type = 0; /* mark slot as unused */ + nr_locks--; /* number of locks is now 1 less */ + continue; + } + + /* Part of a locked region has been unlocked. */ + if (first <= flp->lock_first) { + flp->lock_first = last + 1; + continue; + } + + if (last >= flp->lock_last) { + flp->lock_last = first - 1; + continue; + } + + /* Bad luck. A lock has been split in two by unlocking the middle. */ + if (nr_locks == NR_LOCKS) return(ENOLCK); + for (i = 0; i < NR_LOCKS; i++) + if (file_lock[i].lock_type == 0) break; + flp2 = &file_lock[i]; + flp2->lock_type = flp->lock_type; + flp2->lock_pid = flp->lock_pid; + flp2->lock_vnode = flp->lock_vnode; + flp2->lock_first = last + 1; + flp2->lock_last = flp->lock_last; + flp->lock_last = first - 1; + nr_locks++; + } + if (unlocking) lock_revive(); + + if (req == F_GETLK) { + if (conflict) { + /* GETLK and conflict. Report on the conflicting lock. */ + flock.l_type = flp->lock_type; + flock.l_whence = SEEK_SET; + flock.l_start = flp->lock_first; + flock.l_len = flp->lock_last - flp->lock_first + 1; + flock.l_pid = flp->lock_pid; + + } else { + /* It is GETLK and there is no conflict. */ + flock.l_type = F_UNLCK; + } + + /* Copy the flock structure back to the caller. */ + r = sys_datacopy(VFS_PROC_NR, (vir_bytes) &flock, + who_e, (vir_bytes) user_flock, (phys_bytes) sizeof(flock)); + return(r); + } + + if (ltype == F_UNLCK) return(OK); /* unlocked a region with no locks */ + + /* There is no conflict. If space exists, store new lock in the table. */ + if (empty == NULL) return(ENOLCK); /* table full */ + empty->lock_type = ltype; + empty->lock_pid = fp->fp_pid; + empty->lock_vnode = f->filp_vno; + empty->lock_first = first; + empty->lock_last = last; + nr_locks++; + return(OK); +} + + +/*===========================================================================* + * lock_revive * + *===========================================================================*/ +PUBLIC void lock_revive() +{ +/* Go find all the processes that are waiting for any kind of lock and + * revive them all. The ones that are still blocked will block again when + * they run. The others will complete. This strategy is a space-time + * tradeoff. Figuring out exactly which ones to unblock now would take + * extra code, and the only thing it would win would be some performance in + * extremely rare circumstances (namely, that somebody actually used + * locking). + */ + + struct fproc *fptr; + + for (fptr = &fproc[0]; fptr < &fproc[NR_PROCS]; fptr++){ + if (fptr->fp_pid == PID_FREE) continue; + if (fptr->fp_blocked_on == FP_BLOCKED_ON_LOCK) { + revive(fptr->fp_endpoint, 0); + } + } +} diff --git a/servers/avfs/lock.h b/servers/avfs/lock.h new file mode 100644 index 000000000..c2baa651e --- /dev/null +++ b/servers/avfs/lock.h @@ -0,0 +1,15 @@ +#ifndef __VFS_LOCK_H__ +#define __VFS_LOCK_H__ + +/* This is the file locking table. Like the filp table, it points to the + * inode table, however, in this case to achieve advisory locking. + */ +EXTERN struct file_lock { + short lock_type; /* F_RDLOCK or F_WRLOCK; 0 means unused slot */ + pid_t lock_pid; /* pid of the process holding the lock */ + struct vnode *lock_vnode; + off_t lock_first; /* offset of first byte locked */ + off_t lock_last; /* offset of last byte locked */ +} file_lock[NR_LOCKS]; + +#endif diff --git a/servers/avfs/main.c b/servers/avfs/main.c new file mode 100644 index 000000000..2e548b80b --- /dev/null +++ b/servers/avfs/main.c @@ -0,0 +1,967 @@ +/* + * a loop that gets messages requesting work, carries out the work, and sends + * replies. + * + * The entry points into this file are: + * main: main program of the Virtual File System + * reply: send a reply to a process after the requested work is done + * + */ + +#include "fs.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "file.h" +#include "dmap.h" +#include "fproc.h" +#include "vmnt.h" +#include "vnode.h" +#include "job.h" +#include "param.h" + +#if ENABLE_SYSCALL_STATS +EXTERN unsigned long calls_stats[NCALLS]; +#endif + +/* Thread related prototypes */ +FORWARD _PROTOTYPE( void thread_cleanup_f, (struct fproc *rfp, char *f, + int l) ); +#define thread_cleanup(x) thread_cleanup_f(x, __FILE__, __LINE__) +FORWARD _PROTOTYPE( void *do_async_dev_result, (void *arg) ); +FORWARD _PROTOTYPE( void *do_control_msgs, (void *arg) ); +FORWARD _PROTOTYPE( void *do_fs_reply, (struct job *job) ); +FORWARD _PROTOTYPE( void *do_work, (void *arg) ); +FORWARD _PROTOTYPE( void *do_pm, (void *arg) ); +FORWARD _PROTOTYPE( void *do_init_root, (void *arg) ); +FORWARD _PROTOTYPE( void handle_work, (void *(*func)(void *arg)) ); + +FORWARD _PROTOTYPE( void get_work, (void) ); +FORWARD _PROTOTYPE( void lock_pm, (void) ); +FORWARD _PROTOTYPE( void unlock_pm, (void) ); +FORWARD _PROTOTYPE( void service_pm, (void) ); +FORWARD _PROTOTYPE( void service_pm_postponed, (void) ); +FORWARD _PROTOTYPE( int unblock, (struct fproc *rfp) ); + +/* SEF functions and variables. */ +FORWARD _PROTOTYPE( void sef_local_startup, (void) ); +FORWARD _PROTOTYPE( int sef_cb_init_fresh, (int type, sef_init_info_t *info) ); +PRIVATE mutex_t pm_lock; + +/*===========================================================================* + * main * + *===========================================================================*/ +PUBLIC int main(void) +{ +/* This is the main program of the file system. The main loop consists of + * three major activities: getting new work, processing the work, and sending + * the reply. This loop never terminates as long as the file system runs. + */ + int transid, req; + struct job *job; + + /* SEF local startup. */ + sef_local_startup(); + + printf("Started AVFS\n"); + verbose = 0; + + /* This is the main loop that gets work, processes it, and sends replies. */ + while (TRUE) { + yield_all(); /* let other threads run */ + send_work(); + get_work(); + + transid = TRNS_GET_ID(m_in.m_type); + req = TRNS_DEL_ID(m_in.m_type); + job = worker_getjob( (thread_t) transid - VFS_TRANSID); + + /* Transaction encoding changes original m_type value; restore. */ + if (job == NULL) + m_in.m_type = transid; + else + m_in.m_type = req; + + if (job != NULL) { + do_fs_reply(job); + continue; + } else if (who_e == PM_PROC_NR) { /* Calls from PM */ + /* Special control messages from PM */ + sys_worker_start(do_pm); + continue; + } else if (is_notify(call_nr)) { + /* A task notify()ed us */ + sys_worker_start(do_control_msgs); + continue; + } else if (who_p < 0) { /* i.e., message comes from a task */ + /* We're going to ignore this message. Tasks should + * send notify()s only. + */ + printf("VFS: ignoring message from %d (%d)\n", who_e, call_nr); + continue; + } + + /* At this point we either have results from an asynchronous device + * or a new system call. In both cases a new worker thread has to be + * started and there might not be one available from the pool. This is + * not a problem (requests/replies are simply queued), except when + * they're from an FS endpoint, because these can cause a deadlock. + * handle_work() takes care of the details. */ + if (IS_DEV_RS(call_nr)) { + /* We've got results for a device request */ + handle_work(do_async_dev_result); + continue; + } else { + /* Normal syscall. */ + handle_work(do_work); + } + } + return(OK); /* shouldn't come here */ +} + +/*===========================================================================* + * handle_work * + *===========================================================================*/ +PRIVATE void handle_work(void *(*func)(void *arg)) +{ +/* Handle asynchronous device replies and new system calls. If the originating + * endpoint is an FS endpoint, take extra care not to get in deadlock. */ + struct vmnt *vmp; + + if ((vmp = find_vmnt(who_e)) != NULL) { + /* A back call or dev result from an FS endpoint */ + if (worker_available() == 0) { + /* No worker threads available to handle call */ + if (deadlock_resolving) { + /* Already trying to resolve a deadlock, can't + * handle more, sorry */ + + reply(who_e, EAGAIN); + return; + } + deadlock_resolving = 1; + vmp->m_flags |= VMNT_BACKCALL; + dl_worker_start(func); + return; + } + } + + worker_start(func); +} + +/*===========================================================================* + * do_async_dev_result * + *===========================================================================*/ +PRIVATE void *do_async_dev_result(void *arg) +{ + endpoint_t endpt; + struct job my_job; + + my_job = *((struct job *) arg); + fp = my_job.j_fp; + m_in = my_job.j_m_in; + + /* An asynchronous character driver has results for us */ + if (call_nr == DEV_REVIVE) { + endpt = m_in.REP_ENDPT; + if (endpt == VFS_PROC_NR) + endpt = find_suspended_ep(m_in.m_source, m_in.REP_IO_GRANT); + + if (endpt == NONE) { + printf("VFS: proc with grant %d from %d not found\n", + m_in.REP_IO_GRANT, m_in.m_source); + } else if (m_in.REP_STATUS == SUSPEND) { + printf("VFS: got SUSPEND on DEV_REVIVE: not reviving proc\n"); + } else + revive(endpt, m_in.REP_STATUS); + } + else if (call_nr == DEV_OPEN_REPL) open_reply(); + else if (call_nr == DEV_REOPEN_REPL) reopen_reply(); + else if (call_nr == DEV_CLOSE_REPL) close_reply(); + else if (call_nr == DEV_SEL_REPL1) + select_reply1(m_in.m_source, m_in.DEV_MINOR, m_in.DEV_SEL_OPS); + else if (call_nr == DEV_SEL_REPL2) + select_reply2(m_in.m_source, m_in.DEV_MINOR, m_in.DEV_SEL_OPS); + + if (deadlock_resolving) { + struct vmnt *vmp; + if ((vmp = find_vmnt(who_e)) != NULL) + vmp->m_flags &= ~VMNT_BACKCALL; + + if (fp != NULL && fp->fp_wtid == dl_worker.w_tid) + deadlock_resolving = 0; + } + + thread_cleanup(NULL); + return(NULL); +} + +/*===========================================================================* + * do_control_msgs * + *===========================================================================*/ +PRIVATE void *do_control_msgs(void *arg) +{ + struct job my_job; + + my_job = *((struct job *) arg); + fp = my_job.j_fp; + m_in = my_job.j_m_in; + + /* Check for special control messages. */ + if (who_e == CLOCK) { + /* Alarm timer expired. Used only for select(). Check it. */ + expire_timers(m_in.NOTIFY_TIMESTAMP); + } else if (who_e == DS_PROC_NR) { + /* DS notifies us of an event. */ + ds_event(); + } else { + /* Device notifies us of an event. */ + dev_status(&m_in); + } + + thread_cleanup(NULL); + return(NULL); +} + +/*===========================================================================* + * do_fs_reply * + *===========================================================================*/ +PRIVATE void *do_fs_reply(struct job *job) +{ + struct vmnt *vmp; + struct fproc *rfp; + + if (verbose) printf("VFS: reply to request!\n"); + if ((vmp = find_vmnt(who_e)) == NULL) + panic("Couldn't find vmnt for endpoint %d", who_e); + + rfp = job->j_fp; + + if (rfp == NULL || rfp->fp_endpoint == NONE) { + printf("VFS: spurious reply from %d\n", who_e); + return(NULL); + } + + *rfp->fp_sendrec = m_in; + vmp->m_comm.c_cur_reqs--; /* We've got our reply, make room for others */ + + worker_signal(worker_get(rfp->fp_wtid));/* Continue this worker thread */ + return(NULL); +} + +/*===========================================================================* + * lock_pm * + *===========================================================================*/ +PRIVATE void lock_pm(void) +{ + message org_m_in; + struct fproc *org_fp; + struct worker_thread *org_self; + + /* First try to get it right off the bat */ + if (mutex_trylock(&pm_lock) == 0) + return; + + org_m_in = m_in; + org_fp = fp; + org_self = self; + + if (mutex_lock(&pm_lock) != 0) + panic("Could not obtain lock on pm\n"); + + m_in = org_m_in; + fp = org_fp; + self = org_self; +} + +/*===========================================================================* + * unlock_pm * + *===========================================================================*/ +PRIVATE void unlock_pm(void) +{ + if (mutex_unlock(&pm_lock) != 0) + panic("Could not release lock on pm"); +} + +/*===========================================================================* + * do_pm * + *===========================================================================*/ +PRIVATE void *do_pm(void *arg) +{ + struct job my_job; + struct fproc *rfp; + + my_job = *((struct job *) arg); + rfp = fp = my_job.j_fp; + m_in = my_job.j_m_in; + + lock_pm(); + service_pm(); + unlock_pm(); + + thread_cleanup(NULL); + return(NULL); +} + +/*===========================================================================* + * do_pending_pipe * + *===========================================================================*/ +PRIVATE void *do_pending_pipe(void *arg) +{ + int r, fd_nr; + struct filp *f; + struct job my_job; + tll_access_t locktype; + + my_job = *((struct job *) arg); + fp = my_job.j_fp; + m_in = my_job.j_m_in; + + lock_proc(fp, 1 /* force lock */); + + fd_nr = fp->fp_block_fd; + locktype = (call_nr == READ) ? VNODE_READ : VNODE_WRITE; + f = get_filp(fd_nr, locktype); + assert(f != NULL); + + r = rw_pipe((call_nr == READ) ? READING : WRITING, who_e, fd_nr, f, + fp->fp_buffer, fp->fp_nbytes); + + if (r != SUSPEND) /* Do we have results to report? */ + reply(who_e, r); + + unlock_filp(f); + + thread_cleanup(fp); + return(NULL); +} + +/*===========================================================================* + * do_dummy * + *===========================================================================*/ +PUBLIC void *do_dummy(void *arg) +{ + struct job my_job; + int r; + + my_job = *((struct job *) arg); + fp = my_job.j_fp; + m_in = my_job.j_m_in; + + if ((r = mutex_trylock(&fp->fp_lock)) == 0) { + thread_cleanup(fp); + } else { + /* Proc is busy, let that worker thread carry out the work */ + thread_cleanup(NULL); + } + return(NULL); +} + +/*===========================================================================* + * do_work * + *===========================================================================*/ +PRIVATE void *do_work(void *arg) +{ + int error; + struct job my_job; + + my_job = *((struct job *) arg); + fp = my_job.j_fp; + m_in = my_job.j_m_in; + + lock_proc(fp, 0); /* This proc is busy */ + + if (call_nr == MAPDRIVER) { + error = do_mapdriver(); + } else if (call_nr == COMMON_GETSYSINFO) { + error = do_getsysinfo(); + } else if (IS_PFS_VFS_RQ(call_nr)) { + if (who_e != PFS_PROC_NR) { + printf("VFS: only PFS is allowed to make nested VFS calls\n"); + error = ENOSYS; + } else if (call_nr <= PFS_BASE || call_nr >= PFS_BASE + PFS_NREQS) { + error = ENOSYS; + } else { + call_nr -= PFS_BASE; + error = (*pfs_call_vec[call_nr])(); + } + } else { + /* We're dealing with a POSIX system call from a normal + * process. Call the internal function that does the work. + */ + if (call_nr < 0 || call_nr >= NCALLS) { + error = ENOSYS; + } else if (fp->fp_flags & FP_EXITING) { + error = SUSPEND; + } else if (fp->fp_pid == PID_FREE) { + /* Process vanished before we were able to handle request. + * Replying has no use. Just drop it. */ + error = SUSPEND; + } else { +#if ENABLE_SYSCALL_STATS + calls_stats[call_nr]++; +#endif + error = (*call_vec[call_nr])(); + } + } + + /* Copy the results back to the user and send reply. */ + if (error != SUSPEND) { + if (deadlock_resolving) { + struct vmnt *vmp; + if ((vmp = find_vmnt(who_e)) != NULL) + vmp->m_flags &= ~VMNT_BACKCALL; + + if (fp->fp_wtid == dl_worker.w_tid) + deadlock_resolving = 0; + } + reply(who_e, error ); + } + + thread_cleanup(fp); + return(NULL); +} + +/*===========================================================================* + * sef_local_startup * + *===========================================================================*/ +PRIVATE void sef_local_startup() +{ + /* Register init callbacks. */ + sef_setcb_init_fresh(sef_cb_init_fresh); + sef_setcb_init_restart(sef_cb_init_fail); + + /* No live update support for now. */ + + /* Let SEF perform startup. */ + sef_startup(); +} + +/*===========================================================================* + * sef_cb_init_fresh * + *===========================================================================*/ +PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info) +{ +/* Initialize the virtual file server. */ + int s, i; + struct fproc *rfp; + message mess; + struct rprocpub rprocpub[NR_BOOT_PROCS]; + + force_sync = 0; + + /* Initialize proc endpoints to NONE */ + for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) { + rfp->fp_endpoint = NONE; + rfp->fp_pid = PID_FREE; + } + + /* Initialize the process table with help of the process manager messages. + * Expect one message for each system process with its slot number and pid. + * When no more processes follow, the magic process number NONE is sent. + * Then, stop and synchronize with the PM. + */ + do { + if ((s = sef_receive(PM_PROC_NR, &mess)) != OK) + panic("VFS: couldn't receive from PM: %d", s); + + if (mess.m_type != PM_INIT) + panic("unexpected message from PM: %d", mess.m_type); + + if (NONE == mess.PM_PROC) break; + + rfp = &fproc[mess.PM_SLOT]; + rfp->fp_flags = FP_NOFLAGS; + rfp->fp_pid = mess.PM_PID; + rfp->fp_endpoint = mess.PM_PROC; + rfp->fp_grant = GRANT_INVALID; + rfp->fp_blocked_on = FP_BLOCKED_ON_NONE; + rfp->fp_realuid = (uid_t) SYS_UID; + rfp->fp_effuid = (uid_t) SYS_UID; + rfp->fp_realgid = (gid_t) SYS_GID; + rfp->fp_effgid = (gid_t) SYS_GID; + rfp->fp_umask = ~0; + } while (TRUE); /* continue until process NONE */ + mess.m_type = OK; /* tell PM that we succeeded */ + s = send(PM_PROC_NR, &mess); /* send synchronization message */ + + /* All process table entries have been set. Continue with initialization. */ + fp = &fproc[_ENDPOINT_P(VFS_PROC_NR)];/* During init all communication with + * FSes is on behalf of myself */ + init_dmap(); /* Initialize device table. */ + system_hz = sys_hz(); + + /* Map all the services in the boot image. */ + if ((s = sys_safecopyfrom(RS_PROC_NR, info->rproctab_gid, 0, + (vir_bytes) rprocpub, sizeof(rprocpub), S)) != OK){ + panic("sys_safecopyfrom failed: %d", s); + } + for (i = 0; i < NR_BOOT_PROCS; i++) { + if (rprocpub[i].in_use) { + if ((s = map_service(&rprocpub[i])) != OK) { + panic("VFS: unable to map service: %d", s); + } + } + } + + /* Subscribe to driver events for VFS drivers. */ + if ((s = ds_subscribe("drv\\.vfs\\..*", DSF_INITIAL | DSF_OVERWRITE) != OK)){ + panic("VFS: can't subscribe to driver events (%d)", s); + } + +#if DO_SANITYCHECKS + FIXME("VFS: DO_SANITYCHECKS is on"); +#endif + + /* Initialize worker threads */ + for (i = 0; i < NR_WTHREADS; i++) { + worker_init(&workers[i]); + } + worker_init(&sys_worker); /* exclusive system worker thread */ + worker_init(&dl_worker); /* exclusive worker thread to resolve deadlocks */ + + /* Initialize global locks */ + if (mthread_mutex_init(&pm_lock, NULL) != 0) + panic("VFS: couldn't initialize pm lock mutex"); + if (mthread_mutex_init(&exec_lock, NULL) != 0) + panic("VFS: couldn't initialize exec lock"); + if (mthread_mutex_init(&bsf_lock, NULL) != 0) + panic("VFS: couldn't initialize block special file lock"); + + /* Initialize event resources for boot procs and locks for all procs */ + for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) { + assert(mutex_init(&rfp->fp_lock, NULL) == 0); +#if LOCK_DEBUG + rfp->fp_vp_rdlocks = 0; + rfp->fp_vmnt_rdlocks = 0; +#endif + } + + init_vnodes(); /* init vnodes */ + init_vmnts(); /* init vmnt structures */ + init_select(); /* init select() structures */ + init_filps(); /* Init filp structures */ + mount_pfs(); /* mount Pipe File Server */ + worker_start(do_init_root); /* mount initial ramdisk as file system root */ + + return(OK); +} + +/*===========================================================================* + * do_init_root * + *===========================================================================*/ +PRIVATE void *do_init_root(void *arg) +{ + struct fproc *rfp; + struct job my_job; + int r; + char *mount_label = "fs_imgrd"; /* FIXME: obtain this from RS */ + + my_job = *((struct job *) arg); + fp = my_job.j_fp; + + lock_proc(fp, 1 /* force lock */); /* This proc is busy */ + lock_pm(); + + /* Initialize process directories. mount_fs will set them to the correct + * values */ + for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) { + FD_ZERO(&(rfp->fp_filp_inuse)); + rfp->fp_rd = NULL; + rfp->fp_wd = NULL; + } + + if ((r = mount_fs(DEV_IMGRD, "/", MFS_PROC_NR, 0, mount_label)) != OK) + panic("Failed to initialize root"); + + unlock_pm(); + thread_cleanup(fp); + return(NULL); +} + +/*===========================================================================* + * lock_proc * + *===========================================================================*/ +PUBLIC void lock_proc(struct fproc *rfp, int force_lock) +{ + int r; + message org_m_in; + struct fproc *org_fp; + struct worker_thread *org_self; + + r = mutex_trylock(&rfp->fp_lock); + + /* Were we supposed to obtain this lock immediately? */ + if (force_lock) { + assert(r == 0); + return; + } + + if (r == 0) return; + + org_m_in = m_in; + org_fp = fp; + org_self = self; + assert(mutex_lock(&rfp->fp_lock) == 0); + m_in = org_m_in; + fp = org_fp; + self = org_self; +} + +/*===========================================================================* + * unlock_proc * + *===========================================================================*/ +PUBLIC void unlock_proc(struct fproc *rfp) +{ + int r; + + if ((r = mutex_unlock(&rfp->fp_lock)) != 0) + panic("Failed to unlock: %d", r); +} + +/*===========================================================================* + * thread_cleanup * + *===========================================================================*/ +PRIVATE void thread_cleanup_f(struct fproc *rfp, char *f, int l) +{ +/* Clean up worker thread. Skip parts if this thread is not associated + * with a particular process (i.e., rfp is NULL) */ + + if (verbose) printf("AVFS: thread %d is cleaning up for fp=%p (%s:%d)\n", + mthread_self(), rfp, f, l); + + assert(mthread_self() != -1); + +#if LOCK_DEBUG + if (rfp != NULL) { + check_filp_locks_by_me(); + check_vnode_locks_by_me(rfp); + check_vmnt_locks_by_me(rfp); + } +#endif + + if (rfp != NULL && rfp->fp_flags & FP_PM_PENDING) { /* Postponed PM call */ + m_in = rfp->fp_job.j_m_in; + rfp->fp_flags &= ~FP_PM_PENDING; + service_pm_postponed(); + } + +#if LOCK_DEBUG + if (rfp != NULL) { + check_filp_locks_by_me(); + check_vnode_locks_by_me(rfp); + check_vmnt_locks_by_me(rfp); + } +#endif + + if (rfp != NULL) unlock_proc(rfp); + +#if 0 + mthread_exit(NULL); +#endif +} + +/*===========================================================================* + * get_work * + *===========================================================================*/ +PRIVATE void get_work() +{ + /* Normally wait for new input. However, if 'reviving' is + * nonzero, a suspended process must be awakened. + */ + int r, found_one, proc_p; + register struct fproc *rp; + + if (verbose) printf("VFS: get_work looking for work\n"); + + while (reviving != 0) { + found_one = FALSE; + + /* Find a suspended process. */ + for (rp = &fproc[0]; rp < &fproc[NR_PROCS]; rp++) + if (rp->fp_pid != PID_FREE && (rp->fp_flags & FP_REVIVED)) { + found_one = TRUE; /* Found a suspended process */ + if (unblock(rp)) + return; /* So main loop can process job */ + send_work(); + } + + if (!found_one) /* Consistency error */ + panic("VFS: get_work couldn't revive anyone"); + } + + for(;;) { + /* Normal case. No one to revive. Get a useful request. */ + if ((r = sef_receive(ANY, &m_in)) != OK) { + panic("VFS: sef_receive error: %d", r); + } + + proc_p = _ENDPOINT_P(m_in.m_source); + if (proc_p < 0) fp = NULL; + else fp = &fproc[proc_p]; + + if (m_in.m_type == EDEADSRCDST) return; /* Failed 'sendrec' */ + + if (verbose) printf("AVFS: got work from %d (fp=%p)\n", m_in.m_source, + fp); + + /* Negative who_p is never used to access the fproc array. Negative + * numbers (kernel tasks) are treated in a special way. + */ + if (who_p >= (int)(sizeof(fproc) / sizeof(struct fproc))) + panic("receive process out of range: %d", who_p); + if (who_p >= 0 && fproc[who_p].fp_endpoint == NONE) { + printf("VFS: ignoring request from %d, endpointless slot %d (%d)\n", + m_in.m_source, who_p, m_in.m_type); + continue; + } + + /* Internal consistency check; our mental image of process numbers and + * endpoints must match with how the rest of the system thinks of them. + */ + if (who_p >= 0 && fproc[who_p].fp_endpoint != who_e) { + if (fproc[who_p].fp_endpoint == NONE) + printf("slot unknown even\n"); + + printf("VFS: receive endpoint inconsistent (source %d, who_p " + "%d, stored ep %d, who_e %d).\n", m_in.m_source, who_p, + fproc[who_p].fp_endpoint, who_e); + panic("VFS: inconsistent endpoint "); + } + + return; + } +} + + +/*===========================================================================* + * reply * + *===========================================================================*/ +PUBLIC void reply(whom, result) +int whom; /* process to reply to */ +int result; /* result of the call (usually OK or error #) */ +{ +/* Send a reply to a user process. If the send fails, just ignore it. */ + int r; + + m_out.reply_type = result; + r = sendnb(whom, &m_out); + if (r != OK) { + printf("VFS: couldn't send reply %d to %d: %d\n", result, whom, r); + panic("Yikes %d", call_nr); + } +} + +/*===========================================================================* + * service_pm_postponed * + *===========================================================================*/ +PRIVATE void service_pm_postponed(void) +{ + int r; + vir_bytes pc; + +#if 0 + printf("executing postponed: "); + if (call_nr == PM_EXEC) printf("PM_EXEC"); + if (call_nr == PM_EXIT) printf("PM_EXIT"); + if (call_nr == PM_DUMPCORE) printf("PM_DUMPCORE"); + printf("\n"); +#endif + + switch(call_nr) { + case PM_EXEC: + r = pm_exec(m_in.PM_PROC, m_in.PM_PATH, m_in.PM_PATH_LEN, + m_in.PM_FRAME, m_in.PM_FRAME_LEN, &pc); + + /* Reply status to PM */ + m_out.m_type = PM_EXEC_REPLY; + m_out.PM_PROC = m_in.PM_PROC; + m_out.PM_PC = (void*)pc; + m_out.PM_STATUS = r; + + break; + + case PM_EXIT: + pm_exit(m_in.PM_PROC); + + /* Reply dummy status to PM for synchronization */ + m_out.m_type = PM_EXIT_REPLY; + m_out.PM_PROC = m_in.PM_PROC; + + break; + + case PM_DUMPCORE: + r = pm_dumpcore(m_in.PM_PROC, + NULL /* (struct mem_map *) m_in.PM_SEGPTR */); + + /* Reply status to PM */ + m_out.m_type = PM_CORE_REPLY; + m_out.PM_PROC = m_in.PM_PROC; + m_out.PM_STATUS = r; + + break; + + default: + panic("Unhandled postponed PM call %d", m_in.m_type); + } + + r = send(PM_PROC_NR, &m_out); + if (r != OK) + panic("service_pm_postponed: send failed: %d", r); +} + +/*===========================================================================* + * service_pm * + *===========================================================================*/ +PRIVATE void service_pm() +{ + int r, slot; + + if (verbose) printf("service_pm: %d (%d)\n", call_nr, mthread_self()); + switch (call_nr) { + case PM_SETUID: + pm_setuid(m_in.PM_PROC, m_in.PM_EID, m_in.PM_RID); + + m_out.m_type = PM_SETUID_REPLY; + m_out.PM_PROC = m_in.PM_PROC; + + break; + + case PM_SETGID: + pm_setgid(m_in.PM_PROC, m_in.PM_EID, m_in.PM_RID); + + m_out.m_type = PM_SETGID_REPLY; + m_out.PM_PROC = m_in.PM_PROC; + + break; + + case PM_SETSID: + pm_setsid(m_in.PM_PROC); + + m_out.m_type = PM_SETSID_REPLY; + m_out.PM_PROC = m_in.PM_PROC; + + break; + + case PM_EXEC: + case PM_EXIT: + case PM_DUMPCORE: + okendpt(m_in.PM_PROC, &slot); + fp = &fproc[slot]; + + assert(!(fp->fp_flags & FP_PENDING)); + fp->fp_job.j_m_in = m_in; + fp->fp_flags |= FP_PM_PENDING; + +#if 0 + printf("Postponing: "); + if (call_nr == PM_EXEC) printf("PM_EXEC"); + if (call_nr == PM_EXIT) printf("PM_EXIT"); + if (call_nr == PM_DUMPCORE) printf("PM_DUMPCORE"); + printf("\n"); +#endif + + /* PM requests on behalf of a proc are handled after the system call + * that might be in progress for that proc has finished. If the proc + * is not busy, we start a dummy call */ + if (!(fp->fp_flags & FP_PENDING) && mutex_trylock(&fp->fp_lock) == 0) { + mutex_unlock(&fp->fp_lock); + worker_start(do_dummy); + yield(); + } + + return; + + case PM_FORK: + case PM_SRV_FORK: + pm_fork(m_in.PM_PPROC, m_in.PM_PROC, m_in.PM_CPID); + + m_out.m_type = (call_nr == PM_FORK) ? PM_FORK_REPLY : PM_SRV_FORK_REPLY; + m_out.PM_PROC = m_in.PM_PROC; + + break; + case PM_SETGROUPS: + pm_setgroups(m_in.PM_PROC, m_in.PM_GROUP_NO, m_in.PM_GROUP_ADDR); + + m_out.m_type = PM_SETGROUPS_REPLY; + m_out.PM_PROC = m_in.PM_PROC; + + break; + + case PM_UNPAUSE: + unpause(m_in.PM_PROC); + + m_out.m_type = PM_UNPAUSE_REPLY; + m_out.PM_PROC = m_in.PM_PROC; + + break; + + case PM_REBOOT: + pm_reboot(); + + /* Reply dummy status to PM for synchronization */ + m_out.m_type = PM_REBOOT_REPLY; + + break; + + default: + printf("VFS: don't know how to handle PM request %d\n", call_nr); + + return; + } + + r = send(PM_PROC_NR, &m_out); + if (r != OK) + panic("service_pm: send failed: %d", r); + +} + + +/*===========================================================================* + * unblock * + *===========================================================================*/ +PRIVATE int unblock(rfp) +struct fproc *rfp; +{ + int blocked_on; + + fp = rfp; + blocked_on = rfp->fp_blocked_on; + m_in.m_type = rfp->fp_block_callnr; + m_in.fd = rfp->fp_block_fd; + m_in.buffer = rfp->fp_buffer; + m_in.nbytes = rfp->fp_nbytes; + + rfp->fp_blocked_on = FP_BLOCKED_ON_NONE; /* no longer blocked */ + rfp->fp_flags &= ~FP_REVIVED; + reviving--; + assert(reviving >= 0); + + /* This should be a pipe I/O, not a device I/O. If it is, it'll 'leak' + * grants. + */ + assert(!GRANT_VALID(rfp->fp_grant)); + + /* Pending pipe reads/writes can be handled directly */ + if (blocked_on == FP_BLOCKED_ON_PIPE) { + worker_start(do_pending_pipe); + yield(); /* Give thread a chance to run */ + return(0); /* Retrieve more work */ + } + + return(1); /* We've unblocked a process */ +} diff --git a/servers/avfs/misc.c b/servers/avfs/misc.c new file mode 100644 index 000000000..0f0cba7cd --- /dev/null +++ b/servers/avfs/misc.c @@ -0,0 +1,617 @@ +/* This file contains a collection of miscellaneous procedures. Some of them + * perform simple system calls. Some others do a little part of system calls + * that are mostly performed by the Memory Manager. + * + * The entry points into this file are + * do_dup: perform the DUP system call + * do_fcntl: perform the FCNTL system call + * do_sync: perform the SYNC system call + * do_fsync: perform the FSYNC system call + * pm_reboot: sync disks and prepare for shutdown + * pm_fork: adjust the tables after PM has performed a FORK system call + * do_exec: handle files with FD_CLOEXEC on after PM has done an EXEC + * do_exit: a process has exited; note that in the tables + * do_set: set uid or gid for some process + * do_revive: revive a process that was waiting for something (e.g. TTY) + * do_svrctl: file system control + * do_getsysinfo: request copy of FS data structure + * pm_dumpcore: create a core dump + */ + +#include "fs.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "file.h" +#include "fproc.h" +#include "dmap.h" +#include +#include "vnode.h" +#include "vmnt.h" +#include "param.h" + +#define CORE_NAME "core" +#define CORE_MODE 0777 /* mode to use on core image files */ + +#if ENABLE_SYSCALL_STATS +PUBLIC unsigned long calls_stats[NCALLS]; +#endif + +FORWARD _PROTOTYPE( void free_proc, (struct fproc *freed, int flags) ); +/* +FORWARD _PROTOTYPE( int dumpcore, (int proc_e, struct mem_map *seg_ptr) ); +FORWARD _PROTOTYPE( int write_bytes, (struct inode *rip, off_t off, + char *buf, size_t bytes) ); +FORWARD _PROTOTYPE( int write_seg, (struct inode *rip, off_t off, int proc_e, + int seg, off_t seg_off, phys_bytes seg_bytes) ); +*/ + +/*===========================================================================* + * do_getsysinfo * + *===========================================================================*/ +PUBLIC int do_getsysinfo() +{ + vir_bytes src_addr, dst_addr; + size_t len; + + /* Only su may call do_getsysinfo. This call may leak information (and is not + * stable enough to be part of the API/ABI). + */ + + if (!super_user) return(EPERM); + + /* This call should no longer be used by user applications. In the future, + * requests from non-system processes should be denied. For now, just warn. + */ + if (call_nr == GETSYSINFO) { + printf("VFS: obsolete call of do_getsysinfo() by proc %d\n", + fp->fp_endpoint); + } + + switch(m_in.info_what) { + case SI_PROC_TAB: + src_addr = (vir_bytes) fproc; + len = sizeof(struct fproc) * NR_PROCS; + break; + case SI_DMAP_TAB: + src_addr = (vir_bytes) dmap; + len = sizeof(struct dmap) * NR_DEVICES; + break; +#if ENABLE_SYSCALL_STATS + case SI_CALL_STATS: + src_addr = (vir_bytes) calls_stats; + len = sizeof(calls_stats); + break; +#endif + default: + return(EINVAL); + } + + dst_addr = (vir_bytes) m_in.info_where; + return sys_datacopy(SELF, src_addr, who_e, dst_addr, len); +} + +/*===========================================================================* + * do_dup * + *===========================================================================*/ +PUBLIC int do_dup() +{ +/* Perform the dup(fd) or dup2(fd,fd2) system call. These system calls are + * obsolete. In fact, it is not even possible to invoke them using the + * current library because the library routines call fcntl(). They are + * provided to permit old binary programs to continue to run. + */ + + register int rfd; + register struct filp *f; + int r = OK; + + /* Is the file descriptor valid? */ + rfd = m_in.fd & ~DUP_MASK; /* kill off dup2 bit, if on */ + if ((f = get_filp(rfd, VNODE_READ)) == NULL) return(err_code); + + /* Distinguish between dup and dup2. */ + if (m_in.fd == rfd) { /* bit not on */ + /* dup(fd) */ + r = get_fd(0, 0, &m_in.fd2, NULL); + } else { + /* dup2(old_fd, new_fd) */ + if (m_in.fd2 < 0 || m_in.fd2 >= OPEN_MAX) { + r = EBADF; + } else if (rfd == m_in.fd2) { /* ignore the call: dup2(x, x) */ + r = m_in.fd2; + } else { + /* All is fine, close new_fd if necessary */ + m_in.fd = m_in.fd2; /* prepare to close fd2 */ + unlock_filp(f); /* or it might deadlock on do_close */ + (void) do_close(); /* cannot fail */ + f = get_filp(rfd, VNODE_READ); /* lock old_fd again */ + } + } + + if (r == OK) { + /* Success. Set up new file descriptors. */ + f->filp_count++; + fp->fp_filp[m_in.fd2] = f; + FD_SET(m_in.fd2, &fp->fp_filp_inuse); + r = m_in.fd2; + } + + unlock_filp(f); + return(r); +} + +/*===========================================================================* + * do_fcntl * + *===========================================================================*/ +PUBLIC int do_fcntl() +{ +/* Perform the fcntl(fd, request, ...) system call. */ + + register struct filp *f; + int new_fd, fl, r = OK; + tll_access_t locktype; + + /* Is the file descriptor valid? */ + locktype = (m_in.request == F_FREESP) ? VNODE_WRITE : VNODE_READ; + if ((f = get_filp(m_in.fd, locktype)) == NULL) return(err_code); + + switch (m_in.request) { + case F_DUPFD: + /* This replaces the old dup() system call. */ + if (m_in.addr < 0 || m_in.addr >= OPEN_MAX) r = EINVAL; + else if ((r = get_fd(m_in.addr, 0, &new_fd, NULL)) == OK) { + f->filp_count++; + fp->fp_filp[new_fd] = f; + r = new_fd; + } + break; + + case F_GETFD: + /* Get close-on-exec flag (FD_CLOEXEC in POSIX Table 6-2). */ + r = FD_ISSET(m_in.fd, &fp->fp_cloexec_set) ? FD_CLOEXEC : 0; + break; + + case F_SETFD: + /* Set close-on-exec flag (FD_CLOEXEC in POSIX Table 6-2). */ + if(m_in.addr & FD_CLOEXEC) + FD_SET(m_in.fd, &fp->fp_cloexec_set); + else + FD_CLR(m_in.fd, &fp->fp_cloexec_set); + break; + + case F_GETFL: + /* Get file status flags (O_NONBLOCK and O_APPEND). */ + fl = f->filp_flags & (O_NONBLOCK | O_APPEND | O_ACCMODE); + r = fl; + break; + + case F_SETFL: + /* Set file status flags (O_NONBLOCK and O_APPEND). */ + fl = O_NONBLOCK | O_APPEND | O_REOPEN; + f->filp_flags = (f->filp_flags & ~fl) | (m_in.addr & fl); + break; + + case F_GETLK: + case F_SETLK: + case F_SETLKW: + /* Set or clear a file lock. */ + r = lock_op(f, m_in.request); + break; + + case F_FREESP: + { + /* Free a section of a file. Preparation is done here, actual freeing + * in freesp_inode(). + */ + off_t start, end; + struct flock flock_arg; + signed long offset; + + /* Check if it's a regular file. */ + if ((f->filp_vno->v_mode & I_TYPE) != I_REGULAR) r = EINVAL; + else if (!(f->filp_mode & W_BIT)) r = EBADF; + else + /* Copy flock data from userspace. */ + r = sys_datacopy(who_e, (vir_bytes) m_in.name1, SELF, + (vir_bytes) &flock_arg, + (phys_bytes) sizeof(flock_arg)); + + if (r != OK) break; + + /* Convert starting offset to signed. */ + offset = (signed long) flock_arg.l_start; + + /* Figure out starting position base. */ + switch(flock_arg.l_whence) { + case SEEK_SET: start = 0; break; + case SEEK_CUR: + if (ex64hi(f->filp_pos) != 0) + panic("do_fcntl: position in file too high"); + start = ex64lo(f->filp_pos); + break; + case SEEK_END: start = f->filp_vno->v_size; break; + default: r = EINVAL; + } + if (r != OK) break; + + /* Check for overflow or underflow. */ + if (offset > 0 && start + offset < start) r = EINVAL; + else if (offset < 0 && start + offset > start) r = EINVAL; + else { + start += offset; + if (start < 0) r = EINVAL; + } + if (r != OK) break; + + if (flock_arg.l_len != 0) { + if (start >= f->filp_vno->v_size) r = EINVAL; + else if ((end = start + flock_arg.l_len) <= start) r = EINVAL; + else if (end > f->filp_vno->v_size) end = f->filp_vno->v_size; + } else { + end = 0; + } + if (r != OK) break; + + r = req_ftrunc(f->filp_vno->v_fs_e, f->filp_vno->v_inode_nr,start,end); + + if (r == OK && flock_arg.l_len == 0) + f->filp_vno->v_size = start; + + break; + } + + default: + r = EINVAL; + } + + unlock_filp(f); + return(r); +} + +/*===========================================================================* + * do_sync * + *===========================================================================*/ +PUBLIC int do_sync() +{ + struct vmnt *vmp; + for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) { + lock_vmnt(vmp, VMNT_EXCL); + if (vmp->m_dev != NO_DEV && vmp->m_fs_e != NONE) + req_sync(vmp->m_fs_e); + unlock_vmnt(vmp); + } + + return(OK); +} + +/*===========================================================================* + * do_fsync * + *===========================================================================*/ +PUBLIC int do_fsync() +{ +/* Perform the fsync() system call. For now, don't be unnecessarily smart. */ + struct filp *rfilp; + struct vmnt *vmp; + dev_t dev; + + if ((rfilp = get_filp(m_in.m1_i1, VNODE_READ)) == NULL) return(err_code); + dev = rfilp->filp_vno->v_dev; + for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) { + lock_vmnt(vmp, VMNT_EXCL); + if (vmp->m_dev != NO_DEV && vmp->m_dev == dev && vmp->m_fs_e != NONE) + req_sync(vmp->m_fs_e); + unlock_vmnt(vmp); + } + + unlock_filp(rfilp); + + return(OK); +} + +/*===========================================================================* + * pm_reboot * + *===========================================================================*/ +PUBLIC void pm_reboot() +{ + /* Perform the VFS side of the reboot call. */ + int i; + struct fproc *rfp; + + do_sync(); + + /* Do exit processing for all leftover processes and servers, + * but don't actually exit them (if they were really gone, PM + * will tell us about it). + */ + for (i = 0; i < NR_PROCS; i++) { + /* Don't just free the proc right away, but let it finish what it was + * doing first */ + rfp = &fproc[i]; + if (rfp->fp_endpoint != NONE) { + lock_proc(rfp, 0); + free_proc(rfp, 0); + unlock_proc(rfp); + } + } + + unmount_all(); +} + +/*===========================================================================* + * pm_fork * + *===========================================================================*/ +PUBLIC void pm_fork(pproc, cproc, cpid) +int pproc; /* Parent process */ +int cproc; /* Child process */ +int cpid; /* Child process id */ +{ +/* Perform those aspects of the fork() system call that relate to files. + * In particular, let the child inherit its parent's file descriptors. + * The parent and child parameters tell who forked off whom. The file + * system uses the same slot numbers as the kernel. Only PM makes this call. + */ + + register struct fproc *cp, *pp; + int i, parentno, childno; + mutex_t c_fp_lock; + + /* Check up-to-dateness of fproc. */ + okendpt(pproc, &parentno); + + /* PM gives child endpoint, which implies process slot information. + * Don't call isokendpt, because that will verify if the endpoint + * number is correct in fproc, which it won't be. + */ + childno = _ENDPOINT_P(cproc); + if (childno < 0 || childno >= NR_PROCS) + panic("VFS: bogus child for forking: %d", m_in.child_endpt); + if (fproc[childno].fp_pid != PID_FREE) + panic("VFS: forking on top of in-use child: %d", childno); + + /* Copy the parent's fproc struct to the child. */ + /* However, the mutex variables belong to a slot and must stay the same. */ + c_fp_lock = fproc[childno].fp_lock; + fproc[childno] = fproc[parentno]; + fproc[childno].fp_lock = c_fp_lock; + + /* Increase the counters in the 'filp' table. */ + cp = &fproc[childno]; + pp = &fproc[parentno]; + + for (i = 0; i < OPEN_MAX; i++) + if (cp->fp_filp[i] != NULL) cp->fp_filp[i]->filp_count++; + + /* Fill in new process and endpoint id. */ + cp->fp_pid = cpid; + cp->fp_endpoint = cproc; + + /* A forking process never has an outstanding grant, as it isn't blocking on + * I/O. */ + if(GRANT_VALID(pp->fp_grant)) { + panic("VFS: fork: pp (endpoint %d) has grant %d\n", pp->fp_endpoint, + pp->fp_grant); + } + if(GRANT_VALID(cp->fp_grant)) { + panic("VFS: fork: cp (endpoint %d) has grant %d\n", cp->fp_endpoint, + cp->fp_grant); + } + + /* A child is not a process leader, not being revived, etc. */ + cp->fp_flags = FP_NOFLAGS; + + /* Record the fact that both root and working dir have another user. */ + if (cp->fp_rd) dup_vnode(cp->fp_rd); + if (cp->fp_wd) dup_vnode(cp->fp_wd); +} + +/*===========================================================================* + * free_proc * + *===========================================================================*/ +PRIVATE void free_proc(struct fproc *exiter, int flags) +{ + int i; + register struct fproc *rfp; + register struct filp *rfilp; + register struct vnode *vp; + dev_t dev; + + if (exiter->fp_endpoint == NONE) + panic("free_proc: already free"); + + if (fp_is_blocked(exiter)) + unpause(exiter->fp_endpoint); + + /* Loop on file descriptors, closing any that are open. */ + for (i = 0; i < OPEN_MAX; i++) { + (void) close_fd(exiter, i); + } + + /* Check if any process is SUSPENDed on this driver. + * If a driver exits, unmap its entries in the dmap table. + * (unmapping has to be done after the first step, because the + * dmap table is used in the first step.) + */ + unsuspend_by_endpt(exiter->fp_endpoint); + + /* Release root and working directories. */ + if (exiter->fp_rd) { put_vnode(exiter->fp_rd); exiter->fp_rd = NULL; } + if (exiter->fp_wd) { put_vnode(exiter->fp_wd); exiter->fp_wd = NULL; } + + /* The rest of these actions is only done when processes actually exit. */ + if (!(flags & FP_EXITING)) return; + + /* Invalidate endpoint number for error and sanity checks. */ + exiter->fp_endpoint = NONE; + exiter->fp_flags |= FP_EXITING; + + /* If a session leader exits and it has a controlling tty, then revoke + * access to its controlling tty from all other processes using it. + */ + if ((exiter->fp_flags & FP_SESLDR) && exiter->fp_tty != 0) { + dev = exiter->fp_tty; + for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) { + if(rfp->fp_pid == PID_FREE) continue; + if (rfp->fp_tty == dev) rfp->fp_tty = 0; + + for (i = 0; i < OPEN_MAX; i++) { + if ((rfilp = rfp->fp_filp[i]) == NULL) continue; + if (rfilp->filp_mode == FILP_CLOSED) continue; + vp = rfilp->filp_vno; + if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL) continue; + if ((dev_t) vp->v_sdev != dev) continue; + lock_filp(rfilp, VNODE_READ); + (void) dev_close(dev, rfilp-filp); /* Ignore any errors, even + * SUSPEND. */ + + rfilp->filp_mode = FILP_CLOSED; + unlock_filp(rfilp); + } + } + } + + /* Exit done. Mark slot as free. */ + exiter->fp_pid = PID_FREE; + if (exiter->fp_flags & FP_PENDING) + pending--; /* No longer pending job, not going to do it */ + exiter->fp_flags = FP_NOFLAGS; +} + +/*===========================================================================* + * pm_exit * + *===========================================================================*/ +PUBLIC void pm_exit(proc) +int proc; +{ +/* Perform the file system portion of the exit(status) system call. */ + int exitee_p; + + /* Nevertheless, pretend that the call came from the user. */ + okendpt(proc, &exitee_p); + fp = &fproc[exitee_p]; + free_proc(fp, FP_EXITING); +} + +/*===========================================================================* + * pm_setgid * + *===========================================================================*/ +PUBLIC void pm_setgid(proc_e, egid, rgid) +int proc_e; +int egid; +int rgid; +{ + register struct fproc *tfp; + int slot; + + okendpt(proc_e, &slot); + tfp = &fproc[slot]; + + tfp->fp_effgid = egid; + tfp->fp_realgid = rgid; +} + + +/*===========================================================================* + * pm_setgroups * + *===========================================================================*/ +PUBLIC void pm_setgroups(proc_e, ngroups, groups) +int proc_e; +int ngroups; +gid_t *groups; +{ + struct fproc *rfp; + int slot; + + okendpt(proc_e, &slot); + rfp = &fproc[slot]; + if (ngroups * sizeof(gid_t) > sizeof(rfp->fp_sgroups)) + panic("VFS: pm_setgroups: too much data to copy"); + if (sys_datacopy(who_e, (vir_bytes) groups, SELF, (vir_bytes) rfp->fp_sgroups, + ngroups * sizeof(gid_t)) == OK) { + rfp->fp_ngroups = ngroups; + } else + panic("VFS: pm_setgroups: datacopy failed"); +} + + +/*===========================================================================* + * pm_setuid * + *===========================================================================*/ +PUBLIC void pm_setuid(proc_e, euid, ruid) +int proc_e; +int euid; +int ruid; +{ + struct fproc *tfp; + int slot; + + okendpt(proc_e, &slot); + tfp = &fproc[slot]; + + tfp->fp_effuid = euid; + tfp->fp_realuid = ruid; +} + +/*===========================================================================* + * do_svrctl * + *===========================================================================*/ +PUBLIC int do_svrctl() +{ + switch (m_in.svrctl_req) { + /* No control request implemented yet. */ + default: + return(EINVAL); + } +} + +/*===========================================================================* + * pm_dumpcore * + *===========================================================================*/ +PUBLIC int pm_dumpcore(proc_e, seg_ptr) +int proc_e; +struct mem_map *seg_ptr; +{ + int slot; + + okendpt(proc_e, &slot); + free_proc(&fproc[slot], FP_EXITING); + return(OK); +} + +/*===========================================================================* + * ds_event * + *===========================================================================*/ +PUBLIC void ds_event() +{ + char key[DS_MAX_KEYLEN]; + char *drv_prefix = "drv.vfs."; + u32_t value; + int type, r; + endpoint_t owner_endpoint; + + /* Get the event and the owner from DS. */ + if ((r = ds_check(key, &type, &owner_endpoint)) != OK) { + if(r != ENOENT) printf("VFS: ds_event: ds_check failed: %d\n", r); + return; + } + if ((r = ds_retrieve_u32(key, &value)) != OK) { + printf("VFS: ds_event: ds_retrieve_u32 failed\n"); + return; + } + + /* Only check for VFS driver up events. */ + if (strncmp(key, drv_prefix, sizeof(drv_prefix)) || value != DS_DRIVER_UP) + return; + + /* Perform up. */ + dmap_endpt_up(owner_endpoint); +} diff --git a/servers/avfs/mount.c b/servers/avfs/mount.c new file mode 100644 index 000000000..00f8c8258 --- /dev/null +++ b/servers/avfs/mount.c @@ -0,0 +1,605 @@ +/* This file performs the MOUNT and UMOUNT system calls. + * + * The entry points into this file are + * do_fsready: perform the FS_READY system call + * do_mount: perform the MOUNT system call + * do_umount: perform the UMOUNT system call + * unmount: unmount a file system + */ + +#include "fs.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "file.h" +#include "fproc.h" +#include "dmap.h" +#include +#include "vnode.h" +#include "vmnt.h" +#include "path.h" +#include "param.h" + +/* Allow the root to be replaced before the first 'real' mount. */ +PRIVATE int have_root = 0; + +/* Bitmap of in-use "none" pseudo devices. */ +PRIVATE bitchunk_t nonedev[BITMAP_CHUNKS(NR_NONEDEVS)] = { 0 }; + +#define alloc_nonedev(dev) SET_BIT(nonedev, minor(dev) - 1) +#define free_nonedev(dev) UNSET_BIT(nonedev, minor(dev) - 1) + +FORWARD _PROTOTYPE( dev_t name_to_dev, (int allow_mountpt, + char path[PATH_MAX+1]) ); +FORWARD _PROTOTYPE( int is_nonedev, (dev_t dev) ); +FORWARD _PROTOTYPE( dev_t find_free_nonedev, (void) ); +FORWARD _PROTOTYPE( void update_bspec, (dev_t dev, endpoint_t fs_e, + int send_drv_e) ); + +/*===========================================================================* + * update_bspec * + *===========================================================================*/ +PRIVATE void update_bspec(dev_t dev, endpoint_t fs_e, int send_drv_e) +{ +/* Update all block special files for a certain device, to use a new FS endpt + * to route raw block I/O requests through. + */ + struct vnode *vp; + struct dmap *dp; + int r, major; + + for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp) + if (vp->v_ref_count > 0 && S_ISBLK(vp->v_mode) && vp->v_sdev == dev) { + vp->v_bfs_e = fs_e; + if (send_drv_e) { + major = major(dev); + if (major < 0 || major >= NR_DEVICES) { + /* Can't update driver endpoint for out of + * range major */ + continue; + } + dp = &dmap[major(dev)]; + if (dp->dmap_driver == NONE) { + /* Can't send new driver endpoint for + * vanished driver */ + printf("VFS: can't send new driver endpt\n"); + continue; + } + + if ((r = req_newdriver(fs_e, vp->v_sdev, + dp->dmap_driver)) != OK) { + printf("VFS: Failed to send new driver endpoint" + " for moved block special file\n"); + } + } + } +} + +/*===========================================================================* + * do_fsready * + *===========================================================================*/ +PUBLIC int do_fsready() +{ + /* deprecated */ + return(SUSPEND); +} + +/*===========================================================================* + * do_mount * + *===========================================================================*/ +PUBLIC int do_mount() +{ +/* Perform the mount(name, mfile, mount_flags) system call. */ + endpoint_t fs_e; + int r, slot, rdonly, nodev; + char fullpath[PATH_MAX+1]; + char mount_label[LABEL_MAX]; + dev_t dev; + + /* Only the super-user may do MOUNT. */ + if (!super_user) return(EPERM); + + /* FS process' endpoint number */ + if (m_in.mount_flags & MS_LABEL16) { + /* Get the label from the caller, and ask DS for the endpoint. */ + r = sys_datacopy(who_e, (vir_bytes) m_in.fs_label, SELF, + (vir_bytes) mount_label, (phys_bytes) sizeof(mount_label)); + if (r != OK) return(r); + + mount_label[sizeof(mount_label)-1] = 0; + + r = ds_retrieve_label_endpt(mount_label, &fs_e); + if (r != OK) return(r); + } else { + /* Legacy support: get the endpoint from the request itself. */ + fs_e = (endpoint_t) m_in.fs_label; + mount_label[0] = 0; + } + + /* Sanity check on process number. */ + if (isokendpt(fs_e, &slot) != OK) return(EINVAL); + + /* Should the file system be mounted read-only? */ + rdonly = (m_in.mount_flags & MS_RDONLY); + + /* A null string for block special device means don't use a device at all. */ + nodev = (m_in.name1_length == 0); + if (!nodev) { + /* If 'name' is not for a block special file, return error. */ + if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK) + return(err_code); + if ((dev = name_to_dev(FALSE /*allow_mountpt*/, fullpath)) == NO_DEV) + return(err_code); + } else { + /* Find a free pseudo-device as substitute for an actual device. */ + if ((dev = find_free_nonedev()) == NO_DEV) + return(err_code); + } + + /* Fetch the name of the mountpoint */ + if (fetch_name(m_in.name2, m_in.name2_length, M1, fullpath) != OK) + return(err_code); + + /* Do the actual job */ + return mount_fs(dev, fullpath, fs_e, rdonly, mount_label); +} + + +/*===========================================================================* + * mount_fs * + *===========================================================================*/ +PUBLIC int mount_fs( +dev_t dev, +char mountpoint[PATH_MAX+1], +endpoint_t fs_e, +int rdonly, +char mount_label[LABEL_MAX] ) +{ + int rdir, mdir; /* TRUE iff {root|mount} file is dir */ + int i, r = OK, found, isroot, mount_root; + struct fproc *tfp; + struct dmap *dp; + struct vnode *root_node, *vp = NULL, *bspec; + struct vmnt *new_vmp, *parent_vmp; + char *label; + struct node_details res; + struct lookup resolve; + + /* Look up block device driver label when dev is not a pseudo-device */ + label = ""; + if (!is_nonedev(dev)) { + /* Get driver process' endpoint */ + dp = &dmap[major(dev)]; + if (dp->dmap_driver == NONE) { + printf("VFS: no driver for dev %d\n", dev); + return(EINVAL); + } + + label = dp->dmap_label; + assert(strlen(label) > 0); + } + + lock_bsf(); + + /* Check whether there is a block special file open which uses the + * same device (partition) */ + for (bspec = &vnode[0]; bspec < &vnode[NR_VNODES]; ++bspec) { + if (bspec->v_ref_count > 0 && bspec->v_sdev == dev) { + /* Found, flush and invalidate any blocks for this device. */ + req_flush(bspec->v_fs_e, dev); + break; + } + } + + /* Scan vmnt table to see if dev already mounted. If not, find a free slot.*/ + found = FALSE; + for (i = 0; i < NR_MNTS; ++i) { + if (vmnt[i].m_dev == dev) found = TRUE; + } + if (found) { + unlock_bsf(); + return(EBUSY); + } else if ((new_vmp = get_free_vmnt()) == NULL) { + unlock_bsf(); + return(ENOMEM); + } + + lock_vmnt(new_vmp, VMNT_EXCL); + + isroot = (strcmp(mountpoint, "/") == 0); + mount_root = (isroot && have_root < 2); /* Root can be mounted twice: + * 1: ramdisk + * 2: boot disk (e.g., harddisk) + */ + + if (!mount_root) { + /* Get vnode of mountpoint */ + lookup_init(&resolve, mountpoint, PATH_NOFLAGS, &parent_vmp, &vp); + resolve.l_vmnt_lock = VMNT_EXCL; + resolve.l_vnode_lock = VNODE_WRITE; + if ((vp = eat_path(&resolve, fp)) == NULL) + r = err_code; + else if (vp->v_ref_count == 1) { + /*Tell FS on which vnode it is mounted (glue into mount tree)*/ + r = req_mountpoint(vp->v_fs_e, vp->v_inode_nr); + } else + r = EBUSY; + + if (r != OK) { + if (vp != NULL) { + unlock_vnode(vp); + unlock_vmnt(parent_vmp); + put_vnode(vp); + } + unlock_vmnt(new_vmp); + unlock_bsf(); + return(r); + } + } + +/* XXX: move this upwards before lookup after proper locking. */ + /* We'll need a vnode for the root inode */ + if ((root_node = get_free_vnode()) == NULL || dev == 266) { + if (vp != NULL) { + unlock_vnode(vp); + unlock_vmnt(parent_vmp); + put_vnode(vp); + } + unlock_vmnt(new_vmp); + unlock_bsf(); + return(err_code); + } + + lock_vnode(root_node, VNODE_OPCL); + + /* Store some essential vmnt data first */ + new_vmp->m_fs_e = fs_e; + new_vmp->m_dev = dev; + if (rdonly) new_vmp->m_flags |= VMNT_READONLY; + else new_vmp->m_flags &= ~VMNT_READONLY; + + /* Tell FS which device to mount */ + if ((r = req_readsuper(fs_e, label, dev, rdonly, isroot, &res)) != OK) { + if (vp != NULL) { + unlock_vnode(vp); + unlock_vmnt(parent_vmp); + put_vnode(vp); + } + new_vmp->m_fs_e = NONE; + new_vmp->m_dev = NO_DEV; + unlock_vnode(root_node); + unlock_vmnt(new_vmp); + unlock_bsf(); + return(r); + } + + /* Fill in root node's fields */ + root_node->v_fs_e = res.fs_e; + root_node->v_inode_nr = res.inode_nr; + root_node->v_mode = res.fmode; + root_node->v_uid = res.uid; + root_node->v_gid = res.gid; + root_node->v_size = res.fsize; + root_node->v_sdev = NO_DEV; + root_node->v_fs_count = 1; + root_node->v_ref_count = 1; + + /* Root node is indeed on the partition */ + root_node->v_vmnt = new_vmp; + root_node->v_dev = new_vmp->m_dev; + + if(mount_root) { + /* Superblock and root node already read. + * Nothing else can go wrong. Perform the mount. */ + new_vmp->m_root_node = root_node; + new_vmp->m_mounted_on = NULL; + strcpy(new_vmp->m_label, mount_label); + if (is_nonedev(dev)) alloc_nonedev(dev); + update_bspec(dev, fs_e, 0 /* Don't send new driver endpoint */); + + ROOT_DEV = dev; + ROOT_FS_E = fs_e; + + /* Replace all root and working directories */ + for (i = 0, tfp = fproc; i < NR_PROCS; i++, tfp++) { + if (tfp->fp_pid == PID_FREE) + continue; + +#define MAKEROOT(what) { \ + if (what) put_vnode(what); \ + dup_vnode(root_node); \ + what = root_node; \ + } + + MAKEROOT(tfp->fp_rd); + MAKEROOT(tfp->fp_wd); + } + + unlock_vnode(root_node); + unlock_vmnt(new_vmp); + have_root++; /* We have a (new) root */ + unlock_bsf(); + return(OK); + } + + /* File types may not conflict. */ + mdir = ((vp->v_mode & I_TYPE) == I_DIRECTORY); /*TRUE iff dir*/ + rdir = ((root_node->v_mode & I_TYPE) == I_DIRECTORY); + if (!mdir && rdir) r = EISDIR; + + /* If error, return the super block and both inodes; release the vmnt. */ + if (r != OK) { + unlock_vnode(vp); + unlock_vmnt(parent_vmp); + unlock_vnode(root_node); + unlock_vmnt(new_vmp); + put_vnode(vp); + put_vnode(root_node); + new_vmp->m_dev = NO_DEV; + unlock_bsf(); + return(r); + } + + /* Nothing else can go wrong. Perform the mount. */ + new_vmp->m_mounted_on = vp; + new_vmp->m_root_node = root_node; + strcpy(new_vmp->m_label, mount_label); + + /* Allocate the pseudo device that was found, if not using a real device. */ + if (is_nonedev(dev)) alloc_nonedev(dev); + + /* The new FS will handle block I/O requests for its device now. */ + update_bspec(dev, fs_e, 0 /* Don't send new driver endpoint */); + + unlock_vnode(vp); + unlock_vmnt(parent_vmp); + unlock_vnode(root_node); + unlock_vmnt(new_vmp); + unlock_bsf(); + + return(r); +} + + +/*===========================================================================* + * mount_pfs * + *===========================================================================*/ +PUBLIC void mount_pfs(void) +{ +/* Mount the Pipe File Server. It's not really mounted onto the file system, + but it's necessary it has a vmnt entry to make locking easier */ + + dev_t dev; + struct vmnt *vmp; + + if ((dev = find_free_nonedev()) == NO_DEV) + panic("VFS: no nonedev to initialize PFS"); + + if ((vmp = get_free_vmnt()) == NULL) + panic("VFS: no vmnt to initialize PFS"); + + alloc_nonedev(dev); + + vmp->m_dev = dev; + vmp->m_fs_e = PFS_PROC_NR; + strcpy(vmp->m_label, "pfs"); +} + +/*===========================================================================* + * do_umount * + *===========================================================================*/ +PUBLIC int do_umount(void) +{ +/* Perform the umount(name) system call. */ + char label[LABEL_MAX]; + dev_t dev; + int r; + char fullpath[PATH_MAX+1]; + + /* Only the super-user may do umount. */ + if (!super_user) return(EPERM); + + /* If 'name' is not for a block special file or mountpoint, return error. */ + if (fetch_name(m_in.name, m_in.name_length, M3, fullpath) != OK) + return(err_code); + if ((dev = name_to_dev(TRUE /*allow_mountpt*/, fullpath)) == NO_DEV) + return(err_code); + + if ((r = unmount(dev, label)) != OK) return(r); + + /* Return the label of the mounted file system, so that the caller + * can shut down the corresponding server process. + */ + if (strlen(label) >= M3_LONG_STRING) /* should never evaluate to true */ + label[M3_LONG_STRING-1] = 0; + strcpy(m_out.umount_label, label); + return(OK); +} + + +/*===========================================================================* + * unmount * + *===========================================================================*/ +PUBLIC int unmount( + dev_t dev, /* block-special device */ + char *label /* buffer to retrieve label, or NULL */ +) +{ + struct vnode *vp; + struct vmnt *vmp_i = NULL, *vmp = NULL; + int count, locks, r; + + /* Find vmnt that is to be unmounted */ + for (vmp_i = &vmnt[0]; vmp_i < &vmnt[NR_MNTS]; ++vmp_i) { + if (vmp_i->m_dev == dev) { + if(vmp) panic("device mounted more than once: %d", dev); + vmp = vmp_i; + } + } + + /* Did we find the vmnt (i.e., was dev a mounted device)? */ + if(!vmp) return(EINVAL); + + lock_bsf(); + + assert(lock_vmnt(vmp, VMNT_EXCL) == OK); + + /* See if the mounted device is busy. Only 1 vnode using it should be + * open -- the root vnode -- and that inode only 1 time. */ + locks = count = 0; + for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; vp++) + if (vp->v_ref_count > 0 && vp->v_dev == dev) { + count += vp->v_ref_count; + if (is_vnode_locked(vp)) locks++; + } + + if (count > 1 || locks > 1) { + unlock_vmnt(vmp); + unlock_bsf(); + return(EBUSY); /* can't umount a busy file system */ + } + + /* Tell FS to drop all inode references for root inode except 1. */ + vnode_clean_refs(vmp->m_root_node); + + if (vmp->m_mounted_on) { + put_vnode(vmp->m_mounted_on); + vmp->m_mounted_on = NULL; + } + + vmp->m_comm.c_max_reqs = 1; /* Force max concurrent reqs to just one, so + * we won't send any messages after the + * unmount request */ + + /* Tell FS to unmount */ + if ((r = req_unmount(vmp->m_fs_e)) != OK) /* Not recoverable. */ + printf("VFS: ignoring failed umount attempt FS endpoint: %d (%d)\n", + vmp->m_fs_e, r); + + if (is_nonedev(vmp->m_dev)) free_nonedev(vmp->m_dev); + + if (label != NULL) strcpy(label, vmp->m_label); + + if (vmp->m_root_node) { /* PFS lacks a root node */ + vmp->m_root_node->v_ref_count = 0; + vmp->m_root_node->v_fs_count = 0; + vmp->m_root_node->v_sdev = NO_DEV; + vmp->m_root_node = NULL; + } + vmp->m_dev = NO_DEV; + vmp->m_fs_e = NONE; + + /* The root FS will handle block I/O requests for this device now. */ + update_bspec(dev, ROOT_FS_E, 1 /* send new driver endpoint */); + + unlock_vmnt(vmp); + unlock_bsf(); + return(OK); +} + + +/*===========================================================================* + * unmount_all * + *===========================================================================*/ +PUBLIC void unmount_all(void) +{ +/* Unmount all filesystems. File systems are mounted on other file systems, + * so you have to pull off the loose bits repeatedly to get it all undone. + */ + + int i; + struct vmnt *vmp; + + /* Now unmount the rest */ + for (i = 0; i < NR_MNTS; i++) { + /* Unmount at least one. */ + for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; vmp++) { + if (vmp->m_dev != NO_DEV) + unmount(vmp->m_dev, NULL); + } + } + check_vnode_locks(); + check_vmnt_locks(); + check_filp_locks(); + check_bsf_lock(); +} + +/*===========================================================================* + * name_to_dev * + *===========================================================================*/ +PRIVATE dev_t name_to_dev(int allow_mountpt, char path[PATH_MAX+1]) +{ +/* Convert the block special file in 'user_fullpath' to a device number. + * If the given path is not a block special file, but 'allow_mountpt' is set + * and the path is the root node of a mounted file system, return that device + * number. In all other cases, return NO_DEV and an error code in 'err_code'. + */ + dev_t dev; + struct vnode *vp; + struct vmnt *vmp; + struct lookup resolve; + + lookup_init(&resolve, path, PATH_NOFLAGS, &vmp, &vp); + resolve.l_vmnt_lock = VMNT_READ; + resolve.l_vnode_lock = VNODE_READ; + + /* Request lookup */ + if ((vp = eat_path(&resolve, fp)) == NULL) return(NO_DEV); + + if ((vp->v_mode & I_TYPE) == I_BLOCK_SPECIAL) { + dev = vp->v_sdev; + } else if (allow_mountpt && vp->v_vmnt->m_root_node == vp) { + dev = vp->v_dev; + } else { + err_code = ENOTBLK; + dev = NO_DEV; + } + + unlock_vnode(vp); + unlock_vmnt(vmp); + put_vnode(vp); + return(dev); +} + + +/*===========================================================================* + * is_nonedev * + *===========================================================================*/ +PRIVATE int is_nonedev(dev_t dev) +{ +/* Return whether the given device is a "none" pseudo device. + */ + + return (major(dev) == NONE_MAJOR && + minor(dev) > 0 && minor(dev) <= NR_NONEDEVS); +} + + +/*===========================================================================* + * find_free_nonedev * + *===========================================================================*/ +PRIVATE dev_t find_free_nonedev(void) +{ +/* Find a free "none" pseudo device. Do not allocate it yet. + */ + int i; + + for (i = 0; i < NR_NONEDEVS; i++) + if (!GET_BIT(nonedev, i)) + return makedev(NONE_MAJOR, i + 1); + + err_code = EMFILE; + return NO_DEV; +} diff --git a/servers/avfs/open.c b/servers/avfs/open.c new file mode 100644 index 000000000..54b69a43b --- /dev/null +++ b/servers/avfs/open.c @@ -0,0 +1,734 @@ +/* This file contains the procedures for creating, opening, closing, and + * seeking on files. + * + * The entry points into this file are + * do_creat: perform the CREAT system call + * do_open: perform the OPEN system call + * do_mknod: perform the MKNOD system call + * do_mkdir: perform the MKDIR system call + * do_close: perform the CLOSE system call + * do_lseek: perform the LSEEK system call + * do_llseek: perform the LLSEEK system call + */ + +#include "fs.h" +#include +#include +#include +#include +#include +#include +#include +#include "file.h" +#include "fproc.h" +#include "dmap.h" +#include "lock.h" +#include "param.h" +#include +#include +#include +#include "vnode.h" +#include "vmnt.h" +#include "path.h" + +PRIVATE char mode_map[] = {R_BIT, W_BIT, R_BIT|W_BIT, 0}; + +FORWARD _PROTOTYPE( int common_open, (char path[PATH_MAX+1], int oflags, + mode_t omode) ); +FORWARD _PROTOTYPE( struct vnode *new_node, (struct lookup *resolve, + int oflags, mode_t bits) ); +FORWARD _PROTOTYPE( int pipe_open, (struct vnode *vp, mode_t bits, + int oflags) ); + + +/*===========================================================================* + * do_creat * + *===========================================================================*/ +PUBLIC int do_creat() +{ +/* Perform the creat(name, mode) system call. */ + int r; + char fullpath[PATH_MAX+1]; + + if (fetch_name(m_in.name, m_in.name_length, M3, fullpath) != OK) + return(err_code); + r = common_open(fullpath, O_WRONLY | O_CREAT | O_TRUNC, (mode_t) m_in.mode); + return(r); +} + + +/*===========================================================================* + * do_open * + *===========================================================================*/ +PUBLIC int do_open() +{ +/* Perform the open(name, flags,...) system call. */ + int create_mode = 0; /* is really mode_t but this gives problems */ + int r; + char fullpath[PATH_MAX+1]; + + /* If O_CREAT is set, open has three parameters, otherwise two. */ + if (m_in.mode & O_CREAT) { + create_mode = m_in.c_mode; + r = fetch_name(m_in.c_name, m_in.name1_length, M1, fullpath); + } else { + r = fetch_name(m_in.name, m_in.name_length, M3, fullpath); + } + + if (r != OK) return(err_code); /* name was bad */ + r = common_open(fullpath, m_in.mode, create_mode); + return(r); +} + + +/*===========================================================================* + * common_open * + *===========================================================================*/ +PRIVATE int common_open(char path[PATH_MAX+1], int oflags, mode_t omode) +{ +/* Common code from do_creat and do_open. */ + int b, r, exist = TRUE, major_dev; + dev_t dev; + mode_t bits; + struct filp *filp, *filp2; + struct vnode *vp; + struct vmnt *vmp; + struct dmap *dp; + struct lookup resolve; + + /* Remap the bottom two bits of oflags. */ + bits = (mode_t) mode_map[oflags & O_ACCMODE]; + if (!bits) return(EINVAL); + + /* See if file descriptor and filp slots are available. */ + if ((r = get_fd(0, bits, &m_in.fd, &filp)) != OK) return(r); + + lookup_init(&resolve, path, PATH_NOFLAGS, &vmp, &vp); + + /* If O_CREATE is set, try to make the file. */ + if (oflags & O_CREAT) { + omode = I_REGULAR | (omode & ALL_MODES & fp->fp_umask); + vp = new_node(&resolve, oflags, omode); + r = err_code; + if (r == OK) exist = FALSE; /* We just created the file */ + else if (r != EEXIST) { /* other error */ + if (vp) unlock_vnode(vp); + unlock_filp(filp); + return(r); + } + else exist = !(oflags & O_EXCL);/* file exists, if the O_EXCL + flag is set this is an error */ + } else { + /* Scan path name */ + resolve.l_vmnt_lock = VMNT_READ; + resolve.l_vnode_lock = VNODE_OPCL; + if ((vp = eat_path(&resolve, fp)) == NULL) { + unlock_filp(filp); + return(err_code); + } + + if (vmp != NULL) unlock_vmnt(vmp); + } + + /* Claim the file descriptor and filp slot and fill them in. */ + fp->fp_filp[m_in.fd] = filp; + FD_SET(m_in.fd, &fp->fp_filp_inuse); + filp->filp_count = 1; + filp->filp_vno = vp; + filp->filp_flags = oflags; + + /* Only do the normal open code if we didn't just create the file. */ + if (exist) { + /* Check protections. */ + if ((r = forbidden(vp, bits)) == OK) { + /* Opening reg. files, directories, and special files differ */ + switch (vp->v_mode & I_TYPE) { + case I_REGULAR: + /* Truncate regular file if O_TRUNC. */ + if (oflags & O_TRUNC) { + if ((r = forbidden(vp, W_BIT)) != OK) + break; + truncate_vnode(vp, 0); + } + break; + case I_DIRECTORY: + /* Directories may be read but not written. */ + r = (bits & W_BIT ? EISDIR : OK); + break; + case I_CHAR_SPECIAL: + /* Invoke the driver for special processing. */ + dev = (dev_t) vp->v_sdev; + r = dev_open(dev, who_e, bits | (oflags & ~O_ACCMODE)); + if (r == SUSPEND) suspend(FP_BLOCKED_ON_DOPEN); + else vp = filp->filp_vno; /* Might be updated by + * dev_open/clone_opcl */ + break; + case I_BLOCK_SPECIAL: + + lock_bsf(); + + /* Invoke the driver for special processing. */ + dev = (dev_t) vp->v_sdev; + r = dev_open(dev, who_e, bits | (oflags & ~O_ACCMODE)); + if (r != OK) { + unlock_bsf(); + break; + } + + /* Check whether the device is mounted or not. If so, + * then that FS is responsible for this device. Else + * we default to ROOT_FS. */ + vp->v_bfs_e = ROOT_FS_E; /* By default */ + for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) + if (vmp->m_dev == vp->v_sdev) + vp->v_bfs_e = vmp->m_fs_e; + + /* Get the driver endpoint of the block spec device */ + major_dev = major(vp->v_sdev); + if (major_dev < 0 || major_dev >= NR_DEVICES) + r = ENXIO; + else + dp = &dmap[major_dev]; + if (r != OK || dp->dmap_driver == NONE) { + printf("VFS: driver not found for device %d\n", + vp->v_sdev); + r = ENXIO; + unlock_bsf(); + break; + } + + /* Send the driver endpoint (even when known already)*/ + if (vp->v_bfs_e != ROOT_FS_E) { + /* but only when it's the ROOT_FS */ + unlock_bsf(); + break; + } + if ((r = req_newdriver(vp->v_bfs_e, vp->v_sdev, + dp->dmap_driver)) != OK) { + printf("VFS: error sending driver endpoint\n"); + r = ENXIO; + } + unlock_bsf(); + break; + + case I_NAMED_PIPE: + /* Create a mapped inode on PFS which handles reads + and writes to this named pipe. */ + tll_upgrade(&vp->v_lock); + r = map_vnode(vp, PFS_PROC_NR); + if (r == OK) { + vp->v_pipe = I_PIPE; + if (vp->v_ref_count == 1) { + vp->v_pipe_rd_pos = 0; + vp->v_pipe_wr_pos = 0; + if (vp->v_size != 0) + r = truncate_vnode(vp, 0); + } + oflags |= O_APPEND; /* force append mode */ + filp->filp_flags = oflags; + } + if (r == OK) { + r = pipe_open(vp, bits, oflags); + } + if (r != ENXIO) { + /* See if someone else is doing a rd or wt on + * the FIFO. If so, use its filp entry so the + * file position will be automatically shared. + */ + b = (bits & R_BIT ? R_BIT : W_BIT); + filp->filp_count = 0; /* don't find self */ + if ((filp2 = find_filp(vp, b)) != NULL) { + /* Co-reader or writer found. Use it.*/ + fp->fp_filp[m_in.fd] = filp2; + filp2->filp_count++; + filp2->filp_vno = vp; + filp2->filp_flags = oflags; + + /* v_count was incremented after the + * vnode has been found. i_count was + * incremented incorrectly in FS, not + * knowing that we were going to use an + * existing filp entry. Correct this + * error. + */ + unlock_vnode(vp); + put_vnode(vp); + } else { + /* Nobody else found. Restore filp. */ + filp->filp_count = 1; + } + } + break; + } + } + } + + unlock_filp(filp); + + /* If error, release inode. */ + if (r != OK) { + if (r != SUSPEND) { + fp->fp_filp[m_in.fd] = NULL; + FD_CLR(m_in.fd, &fp->fp_filp_inuse); + filp->filp_count = 0; + filp->filp_vno = NULL; + put_vnode(vp); + } + } else { + r = m_in.fd; + } + + return(r); +} + + +/*===========================================================================* + * new_node * + *===========================================================================*/ +PRIVATE struct vnode *new_node(struct lookup *resolve, int oflags, mode_t bits) +{ +/* Try to create a new inode and return a pointer to it. If the inode already + exists, return a pointer to it as well, but set err_code accordingly. + NULL is returned if the path cannot be resolved up to the last + directory, or when the inode cannot be created due to permissions or + otherwise. */ + struct vnode *dirp, *vp; + struct vmnt *dir_vmp, *vp_vmp; + int r; + struct node_details res; + struct lookup findnode; + char *path; + + path = resolve->l_path; /* For easy access */ + + lookup_init(&findnode, path, resolve->l_flags, &dir_vmp, &dirp); + findnode.l_vmnt_lock = VMNT_WRITE; + findnode.l_vnode_lock = VNODE_WRITE; /* dir node */ + + /* When O_CREAT and O_EXCL flags are set, the path may not be named by a + * symbolic link. */ + if (oflags & O_EXCL) findnode.l_flags |= PATH_RET_SYMLINK; + + /* See if the path can be opened down to the last directory. */ + if ((dirp = last_dir(&findnode, fp)) == NULL) return(NULL); + + /* The final directory is accessible. Get final component of the path. */ + findnode.l_vmp = &vp_vmp; + findnode.l_vnode = &vp; + findnode.l_vnode_lock = (oflags & O_TRUNC) ? VNODE_WRITE : VNODE_OPCL; + vp = advance(dirp, &findnode, fp); + assert(vp_vmp == NULL); /* Lookup to last dir should have yielded lock + * on vmp or final component does not exist. */ + + /* The combination of a symlink with absolute path followed by a danglink + * symlink results in a new path that needs to be re-resolved entirely. */ + if (path[0] == '/') { +printf("XXX: dangling symlink needs re-resolving\n"); + unlock_vnode(dirp); + unlock_vmnt(dir_vmp); + put_vnode(dirp); + if (vp != NULL) { + unlock_vnode(vp); + put_vnode(vp); + } + return new_node(resolve, oflags, bits); + } + + if (vp == NULL && err_code == ENOENT) { + /* Last path component does not exist. Make a new directory entry. */ + if ((vp = get_free_vnode()) == NULL) { + /* Can't create new entry: out of vnodes. */ + unlock_vnode(dirp); + unlock_vmnt(dir_vmp); + put_vnode(dirp); + return(NULL); + } + + lock_vnode(vp, VNODE_OPCL); + + if ((r = forbidden(dirp, W_BIT|X_BIT)) != OK || + (r = req_create(dirp->v_fs_e, dirp->v_inode_nr,bits, fp->fp_effuid, + fp->fp_effgid, path, &res)) != OK ) { + /* Can't create inode either due to permissions or some other + * problem. In case r is EEXIST, we might be dealing with a + * dangling symlink.*/ + if (r == EEXIST) { + struct vnode *slp, *old_wd; + + /* Resolve path up to symlink */ + findnode.l_flags = PATH_RET_SYMLINK; + findnode.l_vnode_lock = VNODE_READ; + findnode.l_vnode = &slp; + slp = advance(dirp, &findnode, fp); + if (slp != NULL) { + if (S_ISLNK(slp->v_mode)) { + /* Get contents of link */ + + r = req_rdlink(slp->v_fs_e, + slp->v_inode_nr, + VFS_PROC_NR, + path, + PATH_MAX, 0); + if (r < 0) { + /* Failed to read link */ + unlock_vnode(slp); + unlock_vnode(dirp); + unlock_vmnt(dir_vmp); + put_vnode(slp); + put_vnode(dirp); + err_code = r; + return(NULL); + } + path[r] = '\0'; /* Terminate path */ + } + unlock_vnode(slp); + put_vnode(slp); + } + + /* Try to create the inode the dangling symlink was + * pointing to. We have to use dirp as starting point + * as there might be multiple successive symlinks + * crossing multiple mountpoints. */ + old_wd = fp->fp_wd; /* Save orig. working dirp */ + fp->fp_wd = dirp; + vp = new_node(resolve, oflags, bits); + fp->fp_wd = old_wd; /* Restore */ + + if (vp != NULL) { + unlock_vnode(dirp); + unlock_vmnt(dir_vmp); + put_vnode(dirp); + *(resolve->l_vnode) = vp; + return(vp); + } + r = err_code; + } + + if (r == EEXIST) + err_code = EIO; /* Impossible, we have verified that + * the last component doesn't exist and + * is not a dangling symlink. */ + else + err_code = r; + + unlock_vnode(dirp); + unlock_vnode(vp); + unlock_vmnt(dir_vmp); + put_vnode(dirp); + return(NULL); + } + + /* Store results and mark vnode in use */ + + vp->v_fs_e = res.fs_e; + vp->v_inode_nr = res.inode_nr; + vp->v_mode = res.fmode; + vp->v_size = res.fsize; + vp->v_uid = res.uid; + vp->v_gid = res.gid; + vp->v_sdev = res.dev; + vp->v_vmnt = dirp->v_vmnt; + vp->v_dev = vp->v_vmnt->m_dev; + vp->v_fs_count = 1; + vp->v_ref_count = 1; + } else { + /* Either last component exists, or there is some other problem. */ + if (vp != NULL) { + r = EEXIST; /* File exists or a symlink names a file while + * O_EXCL is set. */ + } else + r = err_code; /* Other problem. */ + } + + err_code = r; + /* When dirp equals vp, we shouldn't release the lock as a vp is locked only + * once. Releasing the lock would cause the resulting vp not be locked and + * cause mayhem later on. */ + if (dirp != vp) { + unlock_vnode(dirp); + } + unlock_vmnt(dir_vmp); + put_vnode(dirp); + + *(resolve->l_vnode) = vp; + return(vp); +} + + +/*===========================================================================* + * pipe_open * + *===========================================================================*/ +PRIVATE int pipe_open(register struct vnode *vp, register mode_t bits, + register int oflags) +{ +/* This function is called from common_open. It checks if + * there is at least one reader/writer pair for the pipe, if not + * it suspends the caller, otherwise it revives all other blocked + * processes hanging on the pipe. + */ + + vp->v_pipe = I_PIPE; + + if((bits & (R_BIT|W_BIT)) == (R_BIT|W_BIT)) return(ENXIO); + + /* Find the reader/writer at the other end of the pipe */ + if (find_filp(vp, bits & W_BIT ? R_BIT : W_BIT) == NULL) { + /* Not found */ + if (oflags & O_NONBLOCK) { + if (bits & W_BIT) return(ENXIO); + } else { + /* Let's wait for the other side to show up */ + suspend(FP_BLOCKED_ON_POPEN); /* suspend caller */ + return(SUSPEND); + } + } else if (susp_count > 0) { /* revive blocked processes */ + release(vp, OPEN, susp_count); + release(vp, CREAT, susp_count); + } + return(OK); +} + + +/*===========================================================================* + * do_mknod * + *===========================================================================*/ +PUBLIC int do_mknod() +{ +/* Perform the mknod(name, mode, addr) system call. */ + register mode_t bits, mode_bits; + int r; + struct vnode *vp; + struct vmnt *vmp; + char fullpath[PATH_MAX+1]; + struct lookup resolve; + + lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); + resolve.l_vmnt_lock = VMNT_WRITE; + resolve.l_vnode_lock = VNODE_READ; + + /* Only the super_user may make nodes other than fifos. */ + mode_bits = (mode_t) m_in.mk_mode; /* mode of the inode */ + if (!super_user && (((mode_bits & I_TYPE) != I_NAMED_PIPE) && + ((mode_bits & I_TYPE) != I_UNIX_SOCKET))) { + return(EPERM); + } + bits = (mode_bits & I_TYPE) | (mode_bits & ALL_MODES & fp->fp_umask); + + /* Open directory that's going to hold the new node. */ + if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK) + return(err_code); + if ((vp = last_dir(&resolve, fp)) == NULL) return(err_code); + + /* Make sure that the object is a directory */ + if ((vp->v_mode & I_TYPE) != I_DIRECTORY) { + r = ENOTDIR; + } else if ((r = forbidden(vp, W_BIT|X_BIT)) == OK) { + r = req_mknod(vp->v_fs_e, vp->v_inode_nr, fullpath, fp->fp_effuid, + fp->fp_effgid, bits, m_in.mk_z0); + } + + unlock_vnode(vp); + unlock_vmnt(vmp); + put_vnode(vp); + return(r); +} + +/*===========================================================================* + * do_mkdir * + *===========================================================================*/ +PUBLIC int do_mkdir() +{ +/* Perform the mkdir(name, mode) system call. */ + mode_t bits; /* mode bits for the new inode */ + int r; + struct vnode *vp; + struct vmnt *vmp; + char fullpath[PATH_MAX+1]; + struct lookup resolve; + + lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); + resolve.l_vmnt_lock = VMNT_WRITE; + resolve.l_vnode_lock = VNODE_READ; + + if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK) + return(err_code); + bits = I_DIRECTORY | (m_in.mode & RWX_MODES & fp->fp_umask); + if ((vp = last_dir(&resolve, fp)) == NULL) return(err_code); + + /* Make sure that the object is a directory */ + if ((vp->v_mode & I_TYPE) != I_DIRECTORY) { + r = ENOTDIR; + } else if ((r = forbidden(vp, W_BIT|X_BIT)) == OK) { + r = req_mkdir(vp->v_fs_e, vp->v_inode_nr, fullpath, fp->fp_effuid, + fp->fp_effgid, bits); + } + + unlock_vnode(vp); + unlock_vmnt(vmp); + put_vnode(vp); + return(r); +} + +/*===========================================================================* + * do_lseek * + *===========================================================================*/ +PUBLIC int do_lseek() +{ +/* Perform the lseek(ls_fd, offset, whence) system call. */ + register struct filp *rfilp; + int r = OK; + long offset; + u64_t pos, newpos; + + /* Check to see if the file descriptor is valid. */ + if ( (rfilp = get_filp(m_in.ls_fd, VNODE_READ)) == NULL) return(err_code); + + /* No lseek on pipes. */ + if (rfilp->filp_vno->v_pipe == I_PIPE) { + unlock_filp(rfilp); + return(ESPIPE); + } + + /* The value of 'whence' determines the start position to use. */ + switch(m_in.whence) { + case SEEK_SET: pos = cvu64(0); break; + case SEEK_CUR: pos = rfilp->filp_pos; break; + case SEEK_END: pos = cvul64(rfilp->filp_vno->v_size); break; + default: unlock_filp(rfilp); return(EINVAL); + } + + offset = m_in.offset_lo; + if (offset >= 0) + newpos = add64ul(pos, offset); + else + newpos = sub64ul(pos, -offset); + + /* Check for overflow. */ + if (ex64hi(newpos) != 0) + r = EINVAL; + else { + rfilp->filp_pos = newpos; + + /* insert the new position into the output message */ + m_out.reply_l1 = ex64lo(newpos); + + if (cmp64(newpos, rfilp->filp_pos) != 0) { + /* Inhibit read ahead request */ + r = req_inhibread(rfilp->filp_vno->v_fs_e, + rfilp->filp_vno->v_inode_nr); + } + } + + unlock_filp(rfilp); + return(r); +} + +/*===========================================================================* + * do_llseek * + *===========================================================================*/ +PUBLIC int do_llseek() +{ +/* Perform the llseek(ls_fd, offset, whence) system call. */ + register struct filp *rfilp; + u64_t pos, newpos; + int r = OK; + + /* Check to see if the file descriptor is valid. */ + if ( (rfilp = get_filp(m_in.ls_fd, VNODE_READ)) == NULL) return(err_code); + + /* No lseek on pipes. */ + if (rfilp->filp_vno->v_pipe == I_PIPE) { + unlock_filp(rfilp); + return(ESPIPE); + } + + /* The value of 'whence' determines the start position to use. */ + switch(m_in.whence) { + case SEEK_SET: pos = cvu64(0); break; + case SEEK_CUR: pos = rfilp->filp_pos; break; + case SEEK_END: pos = cvul64(rfilp->filp_vno->v_size); break; + default: unlock_filp(rfilp); return(EINVAL); + } + + newpos = add64(pos, make64(m_in.offset_lo, m_in.offset_high)); + + /* Check for overflow. */ + if (( (long) m_in.offset_high > 0) && cmp64(newpos, pos) < 0) + r = EINVAL; + else if (( (long) m_in.offset_high < 0) && cmp64(newpos, pos) > 0) + r = EINVAL; + else { + rfilp->filp_pos = newpos; + + /* insert the new position into the output message */ + m_out.reply_l1 = ex64lo(newpos); + m_out.reply_l2 = ex64hi(newpos); + + if (cmp64(newpos, rfilp->filp_pos) != 0) { + /* Inhibit read ahead request */ + r = req_inhibread(rfilp->filp_vno->v_fs_e, + rfilp->filp_vno->v_inode_nr); + } + } + + unlock_filp(rfilp); + return(r); +} + +/*===========================================================================* + * do_close * + *===========================================================================*/ +PUBLIC int do_close() +{ +/* Perform the close(fd) system call. */ + + return close_fd(fp, m_in.fd); +} + + +/*===========================================================================* + * close_fd * + *===========================================================================*/ +PUBLIC int close_fd(rfp, fd_nr) +struct fproc *rfp; +int fd_nr; +{ +/* Perform the close(fd) system call. */ + register struct filp *rfilp; + register struct vnode *vp; + struct file_lock *flp; + int lock_count; + + /* First locate the vnode that belongs to the file descriptor. */ + if ( (rfilp = get_filp2(rfp, fd_nr, VNODE_OPCL)) == NULL) return(err_code); + vp = rfilp->filp_vno; + + close_filp(rfilp); + rfp->fp_filp[fd_nr] = NULL; + FD_CLR(fd_nr, &rfp->fp_cloexec_set); + FD_CLR(fd_nr, &rfp->fp_filp_inuse); + + /* Check to see if the file is locked. If so, release all locks. */ + if (nr_locks > 0) { + lock_count = nr_locks; /* save count of locks */ + for (flp = &file_lock[0]; flp < &file_lock[NR_LOCKS]; flp++) { + if (flp->lock_type == 0) continue; /* slot not in use */ + if (flp->lock_vnode == vp && flp->lock_pid == rfp->fp_pid) { + flp->lock_type = 0; + nr_locks--; + } + } + if (nr_locks < lock_count) + lock_revive(); /* one or more locks released */ + } + + return(OK); +} + +/*===========================================================================* + * close_reply * + *===========================================================================*/ +PUBLIC void close_reply() +{ + /* No need to do anything */ +} diff --git a/servers/avfs/param.h b/servers/avfs/param.h new file mode 100644 index 000000000..ad9107625 --- /dev/null +++ b/servers/avfs/param.h @@ -0,0 +1,63 @@ +#ifndef __VFS_PARAM_H__ +#define __VFS_PARAM_H__ + +/* The following names are synonyms for the variables in the input message. */ +#define addr m1_i3 +#define buffer m1_p1 +#define child_endpt m1_i2 +#define co_mode m1_i1 +#define fd m1_i1 +#define fd2 m1_i2 +#define group m1_i3 +#define ls_fd m2_i1 +#define mk_mode m1_i2 +#define mk_z0 m1_i3 +#define mode m3_i2 +#define c_mode m1_i3 +#define c_name m1_p1 +#define name m3_p1 +#define flength m2_l1 +#define name1 m1_p1 +#define name2 m1_p2 +#define name_length m3_i1 +#define name1_length m1_i1 +#define name2_length m1_i2 +#define nbytes m1_i2 +#define owner m1_i2 +#define pathname m3_ca1 +#define pid m1_i3 +#define ENDPT m1_i1 +#define offset_lo m2_l1 +#define offset_high m2_l2 +#define ctl_req m4_l1 +#define mount_flags m1_i3 +#define request m1_i2 +#define sig m1_i2 +#define endpt1 m1_i1 +#define fs_label m1_p3 +#define umount_label m3_ca1 +#define tp m2_l1 +#define utime_actime m2_l1 +#define utime_modtime m2_l2 +#define utime_file m2_p1 +#define utime_length m2_i1 +#define utime_strlen m2_i2 +#define whence m2_i2 +#define svrctl_req m2_i1 +#define svrctl_argp m2_p1 +#define info_what m1_i1 +#define info_where m1_p1 +#define md_label m2_p1 +#define md_label_len m2_l1 +#define md_major m2_i1 +#define md_style m2_i2 +#define md_flags m2_i3 + +/* The following names are synonyms for the variables in the output message. */ +#define reply_type m_type +#define reply_l1 m2_l1 +#define reply_l2 m2_l2 +#define reply_i1 m1_i1 +#define reply_i2 m1_i2 + +#endif diff --git a/servers/avfs/path.c b/servers/avfs/path.c new file mode 100644 index 000000000..1be54d9ef --- /dev/null +++ b/servers/avfs/path.c @@ -0,0 +1,687 @@ +/* lookup() is the main routine that controls the path name lookup. It + * handles mountpoints and symbolic links. The actual lookup requests + * are sent through the req_lookup wrapper function. + */ + +#include "fs.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "threads.h" +#include "vmnt.h" +#include "vnode.h" +#include "path.h" +#include "fproc.h" +#include "param.h" + +/* Set to following define to 1 if you really want to use the POSIX definition + * (IEEE Std 1003.1, 2004) of pathname resolution. POSIX requires pathnames + * with a traling slash (and that do not entirely consist of slash characters) + * to be treated as if a single dot is appended. This means that for example + * mkdir("dir/", ...) and rmdir("dir/") will fail because the call tries to + * create or remove the directory '.'. Historically, Unix systems just ignore + * trailing slashes. + */ +#define DO_POSIX_PATHNAME_RES 0 + +FORWARD _PROTOTYPE( int lookup, (struct vnode *dirp, struct lookup *resolve, + node_details_t *node, struct fproc *rfp)); +FORWARD _PROTOTYPE( int check_perms, (endpoint_t ep, cp_grant_id_t io_gr, + size_t pathlen) ); + +/*===========================================================================* + * advance * + *===========================================================================*/ +PUBLIC struct vnode *advance(dirp, resolve, rfp) +struct vnode *dirp; +struct lookup *resolve; +struct fproc *rfp; +{ +/* Resolve a path name starting at dirp to a vnode. */ + int r; + int do_downgrade = 1; + struct vnode *new_vp, *vp; + struct vmnt *vmp; + struct node_details res = {0,0,0,0,0,0,0}; + tll_access_t initial_locktype; + + assert(dirp); + assert(resolve->l_vnode_lock != TLL_NONE); + assert(resolve->l_vmnt_lock != TLL_NONE); + + if (resolve->l_vnode_lock == VNODE_READ) + initial_locktype = VNODE_OPCL; + else + initial_locktype = resolve->l_vnode_lock; + + /* Get a free vnode and lock it */ + if ((new_vp = get_free_vnode()) == NULL) return(NULL); + lock_vnode(new_vp, initial_locktype); + + /* Lookup vnode belonging to the file. */ + if ((r = lookup(dirp, resolve, &res, rfp)) != OK) { + err_code = r; + unlock_vnode(new_vp); + return(NULL); + } + + /* Check whether we already have a vnode for that file */ + if ((vp = find_vnode(res.fs_e, res.inode_nr)) != NULL) { + unlock_vnode(new_vp); /* Don't need this anymore */ + do_downgrade = (lock_vnode(vp, initial_locktype) != EBUSY); + + /* Unfortunately, by the time we get the lock, another thread might've + * rid of the vnode (e.g., find_vnode found the vnode while a + * req_putnode was being processed). */ + if (vp->v_ref_count == 0) { /* vnode vanished! */ + /* As the lookup before increased the usage counters in the FS, + * we can simply set the usage counters to 1 and proceed as + * normal, because the putnode resulted in a use count of 1 in + * the FS. Other data is still valid, because the vnode was + * marked as pending lock, so get_free_vnode hasn't + * reinitialized the vnode yet. */ + vp->v_fs_count = 1; + if (vp->v_mapfs_e != NONE) vp->v_mapfs_count = 1; + } else { + vp->v_fs_count++; /* We got a reference from the FS */ + } + + } else { + /* Vnode not found, fill in the free vnode's fields */ + + new_vp->v_fs_e = res.fs_e; + new_vp->v_inode_nr = res.inode_nr; + new_vp->v_mode = res.fmode; + new_vp->v_size = res.fsize; + new_vp->v_uid = res.uid; + new_vp->v_gid = res.gid; + new_vp->v_sdev = res.dev; + + if( (vmp = find_vmnt(new_vp->v_fs_e)) == NULL) + panic("advance: vmnt not found"); + + new_vp->v_vmnt = vmp; + new_vp->v_dev = vmp->m_dev; + new_vp->v_fs_count = 1; + + vp = new_vp; + } + + dup_vnode(vp); + if (do_downgrade) { + /* Only downgrade a lock if we managed to lock it in the first place */ + *(resolve->l_vnode) = vp; + + if (initial_locktype != resolve->l_vnode_lock) + tll_downgrade(&vp->v_lock); + +#if LOCK_DEBUG + if (resolve->l_vnode_lock == VNODE_READ) + fp->fp_vp_rdlocks++; +#endif + } + + return(vp); +} + + +/*===========================================================================* + * eat_path * + *===========================================================================*/ +PUBLIC struct vnode *eat_path(resolve, rfp) +struct lookup *resolve; +struct fproc *rfp; +{ +/* Resolve path to a vnode. advance does the actual work. */ + struct vnode *start_dir; + + start_dir = (resolve->l_path[0] == '/' ? rfp->fp_rd : rfp->fp_wd); + return advance(start_dir, resolve, rfp); +} + + +/*===========================================================================* + * last_dir * + *===========================================================================*/ +PUBLIC struct vnode *last_dir(resolve, rfp) +struct lookup *resolve; +struct fproc *rfp; +{ +/* Parse a path, as far as the last directory, fetch the vnode + * for the last directory into the vnode table, and return a pointer to the + * vnode. In addition, return the final component of the path in 'string'. If + * the last directory can't be opened, return NULL and the reason for + * failure in 'err_code'. We can't parse component by component as that would + * be too expensive. Alternatively, we cut off the last component of the path, + * and parse the path up to the penultimate component. + */ + + size_t len; + char *cp; + char dir_entry[PATH_MAX+1]; + struct vnode *start_dir, *res; + + /* Is the path absolute or relative? Initialize 'start_dir' accordingly. */ + start_dir = (resolve->l_path[0] == '/' ? rfp->fp_rd : rfp->fp_wd); + + len = strlen(resolve->l_path); + + /* If path is empty, return ENOENT. */ + if (len == 0) { + err_code = ENOENT; + return(NULL); + } + +#if !DO_POSIX_PATHNAME_RES + /* Remove trailing slashes */ + while (len > 1 && resolve->l_path[len-1] == '/') { + len--; + resolve->l_path[len]= '\0'; + } +#endif + + cp = strrchr(resolve->l_path, '/'); + if (cp == NULL) { + /* Just one entry in the current working directory */ + struct vmnt *vmp; + + vmp = find_vmnt(start_dir->v_fs_e); + if (lock_vmnt(vmp, resolve->l_vmnt_lock) != EBUSY) + *resolve->l_vmp = vmp; + lock_vnode(start_dir, resolve->l_vnode_lock); + *resolve->l_vnode = start_dir; + dup_vnode(start_dir); + return(start_dir); + + } else if (cp[1] == '\0') { + /* Path ends in a slash. The directory entry is '.' */ + strcpy(dir_entry, "."); + } else { + /* A path name for the directory and a directory entry */ + strcpy(dir_entry, cp+1); + cp[1] = '\0'; + } + + /* Remove trailing slashes */ + while(cp > resolve->l_path && cp[0] == '/') { + cp[0]= '\0'; + cp--; + } + + resolve->l_flags = PATH_NOFLAGS; + res = advance(start_dir, resolve, rfp); + if (res == NULL) return(NULL); + + /* Copy the directory entry back to user_fullpath */ + strncpy(resolve->l_path, dir_entry, PATH_MAX); + + return(res); +} + +/*===========================================================================* + * lookup * + *===========================================================================*/ +PRIVATE int lookup(start_node, resolve, result_node, rfp) +struct vnode *start_node; +struct lookup *resolve; +node_details_t *result_node; +struct fproc *rfp; +{ +/* Resolve a path name relative to start_node. */ + + int r, symloop; + endpoint_t fs_e; + size_t path_off, path_left_len; + ino_t dir_ino, root_ino; + uid_t uid; + gid_t gid; + struct vnode *dir_vp; + struct vmnt *vmp, *vmpres; + struct lookup_res res; + + assert(resolve->l_vmp); + assert(resolve->l_vnode); + + *(resolve->l_vmp) = vmpres = NULL; /* No vmnt found nor locked yet */ + + /* Empty (start) path? */ + if (resolve->l_path[0] == '\0') { + result_node->inode_nr = 0; + return(ENOENT); + } + + if (!rfp->fp_rd || !rfp->fp_wd) { + printf("VFS: lookup %d: no rd/wd\n", rfp->fp_endpoint); + return(ENOENT); + } + + fs_e = start_node->v_fs_e; + dir_ino = start_node->v_inode_nr; + vmpres = find_vmnt(fs_e); + + /* Is the process' root directory on the same partition?, + * if so, set the chroot directory too. */ + if (rfp->fp_rd->v_dev == rfp->fp_wd->v_dev) + root_ino = rfp->fp_rd->v_inode_nr; + else + root_ino = 0; + + /* Set user and group ids according to the system call */ + uid = (call_nr == ACCESS ? rfp->fp_realuid : rfp->fp_effuid); + gid = (call_nr == ACCESS ? rfp->fp_realgid : rfp->fp_effgid); + + symloop = 0; /* Number of symlinks seen so far */ + + /* Lock vmnt */ + if ((r = lock_vmnt(vmpres, resolve->l_vmnt_lock)) != OK) { + if (r == EBUSY) /* vmnt already locked */ + vmpres = NULL; + } + *(resolve->l_vmp) = vmpres; + + /* Issue the request */ + r = req_lookup(fs_e, dir_ino, root_ino, uid, gid, resolve, &res, rfp); + + if (r != OK && r != EENTERMOUNT && r != ELEAVEMOUNT && r != ESYMLINK) { + if (vmpres) unlock_vmnt(vmpres); + *(resolve->l_vmp) = NULL; + return(r); /* i.e., an error occured */ + } + + /* While the response is related to mount control set the + * new requests respectively */ + while (r == EENTERMOUNT || r == ELEAVEMOUNT || r == ESYMLINK) { + /* Update user_fullpath to reflect what's left to be parsed. */ + path_off = res.char_processed; + path_left_len = strlen(&resolve->l_path[path_off]); + memmove(resolve->l_path, &resolve->l_path[path_off], path_left_len); + resolve->l_path[path_left_len] = '\0'; /* terminate string */ + + /* Update the current value of the symloop counter */ + symloop += res.symloop; + if (symloop > SYMLOOP_MAX) { + if (vmpres) unlock_vmnt(vmpres); + *(resolve->l_vmp) = NULL; + return(ELOOP); + } + + /* Symlink encountered with absolute path */ + if (r == ESYMLINK) { + dir_vp = rfp->fp_rd; + vmp = NULL; + } else if (r == EENTERMOUNT) { + /* Entering a new partition */ + dir_vp = 0; + /* Start node is now the mounted partition's root node */ + for (vmp = &vmnt[0]; vmp != &vmnt[NR_MNTS]; ++vmp) { + if (vmp->m_dev != NO_DEV && vmp->m_mounted_on) { + if (vmp->m_mounted_on->v_inode_nr == res.inode_nr && + vmp->m_mounted_on->v_fs_e == res.fs_e) { + dir_vp = vmp->m_root_node; + break; + } + } + } + assert(dir_vp); + } else { + /* Climbing up mount */ + /* Find the vmnt that represents the partition on + * which we "climb up". */ + if ((vmp = find_vmnt(res.fs_e)) == NULL) { + panic("VFS lookup: can't find parent vmnt"); + } + + /* Make sure that the child FS does not feed a bogus path + * to the parent FS. That is, when we climb up the tree, we + * must've encountered ".." in the path, and that is exactly + * what we're going to feed to the parent */ + if(strncmp(resolve->l_path, "..", 2) != 0 || + (resolve->l_path[2] != '\0' && resolve->l_path[2] != '/')) { + printf("VFS: bogus path: %s\n", resolve->l_path); + if (vmpres) unlock_vmnt(vmpres); + *(resolve->l_vmp) = NULL; + return(ENOENT); + } + + /* Start node is the vnode on which the partition is + * mounted */ + dir_vp = vmp->m_mounted_on; + } + + /* Set the starting directories inode number and FS endpoint */ + fs_e = dir_vp->v_fs_e; + dir_ino = dir_vp->v_inode_nr; + + /* Is the process' root directory on the same partition?, + * if so, set the chroot directory too. */ + if (dir_vp->v_dev == rfp->fp_rd->v_dev) + root_ino = rfp->fp_rd->v_inode_nr; + else + root_ino = 0; + + /* Unlock a previously locked vmnt if locked and lock new vmnt */ + if (vmpres) unlock_vmnt(vmpres); + vmpres = find_vmnt(fs_e); + if ((r = lock_vmnt(vmpres, resolve->l_vmnt_lock)) != OK) { + if (r == EBUSY) + vmpres = NULL; /* Already locked */ + } + *(resolve->l_vmp) = vmpres; + + r = req_lookup(fs_e, dir_ino, root_ino, uid, gid, resolve, &res, rfp); + + if (r != OK && r != EENTERMOUNT && r != ELEAVEMOUNT && r != ESYMLINK) { + if (vmpres) unlock_vmnt(vmpres); + *(resolve->l_vmp) = NULL; + return(r); + } + } + + /* Fill in response fields */ + result_node->inode_nr = res.inode_nr; + result_node->fmode = res.fmode; + result_node->fsize = res.fsize; + result_node->dev = res.dev; + result_node->fs_e = res.fs_e; + result_node->uid = res.uid; + result_node->gid = res.gid; + + return(r); +} + +/*===========================================================================* + * lookup_init * + *===========================================================================*/ +PUBLIC void lookup_init(resolve, path, flags, vmp, vp) +struct lookup *resolve; +char *path; +int flags; +struct vmnt **vmp; +struct vnode **vp; +{ + assert(vmp != NULL); + assert(vp != NULL); + + resolve->l_path = path; + resolve->l_flags = flags; + resolve->l_vmp = vmp; + resolve->l_vnode = vp; + resolve->l_vmnt_lock = TLL_NONE; + resolve->l_vnode_lock = TLL_NONE; + *vmp = NULL; /* Initialize lookup result to NULL */ + *vp = NULL; +} + +/*===========================================================================* + * get_name * + *===========================================================================*/ +PUBLIC int get_name(dirp, entry, ename) +struct vnode *dirp; +struct vnode *entry; +char ename[NAME_MAX + 1]; +{ + u64_t pos, new_pos; + int r, consumed, totalbytes; + char buf[(sizeof(struct dirent) + NAME_MAX) * 8]; + struct dirent *cur; + + pos = make64(0, 0); + + if ((dirp->v_mode & I_TYPE) != I_DIRECTORY) { + return(EBADF); + } + + do { + r = req_getdents(dirp->v_fs_e, dirp->v_inode_nr, pos, buf, sizeof(buf), + &new_pos, 1); + + if (r == 0) { + return(ENOENT); /* end of entries -- matching inode !found */ + } else if (r < 0) { + return(r); /* error */ + } + + consumed = 0; /* bytes consumed */ + totalbytes = r; /* number of bytes to consume */ + + do { + cur = (struct dirent *) (buf + consumed); + if (entry->v_inode_nr == cur->d_ino) { + /* found the entry we were looking for */ + strncpy(ename, cur->d_name, NAME_MAX); + ename[NAME_MAX] = '\0'; + return(OK); + } + + /* not a match -- move on to the next dirent */ + consumed += cur->d_reclen; + } while (consumed < totalbytes); + + pos = new_pos; + } while (1); +} + +/*===========================================================================* + * canonical_path * + *===========================================================================*/ +PUBLIC int canonical_path(orig_path, canon_path, rfp) +char *orig_path; +char canon_path[PATH_MAX+1]; /* should have length PATH_MAX+1 */ +struct fproc *rfp; +{ + int len = 0; + int r, symloop = 0; + struct vnode *dir_vp, *parent_dir; + struct vmnt *dir_vmp, *parent_vmp; + char component[NAME_MAX+1]; + char link_path[PATH_MAX+1]; + char temp_path[PATH_MAX+1]; + struct lookup resolve; + + dir_vp = NULL; + strncpy(temp_path, orig_path, PATH_MAX); + + do { + if (dir_vp) { + unlock_vnode(dir_vp); + unlock_vmnt(dir_vmp); + put_vnode(dir_vp); + } + + /* Resolve to the last directory holding the file */ + lookup_init(&resolve, temp_path, PATH_NOFLAGS, &dir_vmp, &dir_vp); + resolve.l_vmnt_lock = VMNT_READ; + resolve.l_vnode_lock = VNODE_READ; + if ((dir_vp = last_dir(&resolve, rfp)) == NULL) return(err_code); + + /* dir_vp points to dir and resolve path now contains only the + * filename. + */ + strcpy(canon_path, resolve.l_path); /* Store file name */ + + /* check if the file is a symlink, if so resolve it */ + r = rdlink_direct(canon_path, link_path, rfp); + if (r <= 0) { + strcpy(temp_path, canon_path); + break; + } + + /* encountered a symlink -- loop again */ + strcpy(temp_path, link_path); + + symloop++; + } while (symloop < SYMLOOP_MAX); + + if (symloop >= SYMLOOP_MAX) { + if (dir_vp) { + unlock_vnode(dir_vp); + unlock_vmnt(dir_vmp); + put_vnode(dir_vp); + } + return(ELOOP); + } + + while(dir_vp != rfp->fp_rd) { + + strcpy(temp_path, ".."); + + /* check if we're at the root node of the file system */ + if (dir_vp->v_vmnt->m_root_node == dir_vp) { + unlock_vnode(dir_vp); + unlock_vmnt(dir_vmp); + put_vnode(dir_vp); + dir_vp = dir_vp->v_vmnt->m_mounted_on; + dir_vmp = dir_vp->v_vmnt; + assert(lock_vmnt(dir_vmp, VMNT_READ) == OK); + assert(lock_vnode(dir_vp, VNODE_READ) == OK); + dup_vnode(dir_vp); + } + + lookup_init(&resolve, temp_path, PATH_NOFLAGS, &parent_vmp, + &parent_dir); + resolve.l_vmnt_lock = VMNT_READ; + resolve.l_vnode_lock = VNODE_READ; + + if ((parent_dir = advance(dir_vp, &resolve, rfp)) == NULL) { + unlock_vnode(dir_vp); + unlock_vmnt(dir_vmp); + put_vnode(dir_vp); + return(err_code); + } + + /* now we have to retrieve the name of the parent directory */ + if (get_name(parent_dir, dir_vp, component) != OK) { + unlock_vnode(parent_dir); + unlock_vmnt(parent_vmp); + unlock_vnode(dir_vp); + unlock_vmnt(dir_vmp); + put_vnode(parent_dir); + put_vnode(dir_vp); + return(ENOENT); + } + + len += strlen(component) + 1; + if (len > PATH_MAX) { + /* adding the component to canon_path would exceed PATH_MAX */ + unlock_vnode(parent_dir); + unlock_vmnt(parent_vmp); + unlock_vnode(dir_vp); + unlock_vmnt(dir_vmp); + put_vnode(parent_dir); + put_vnode(dir_vp); + return(ENOMEM); + } + + /* store result of component in canon_path */ + + /* first make space by moving the contents of canon_path to + * the right. Move strlen + 1 bytes to include the terminating '\0'. + */ + memmove(canon_path+strlen(component)+1, canon_path, + strlen(canon_path) + 1); + + /* Copy component into canon_path */ + memmove(canon_path, component, strlen(component)); + + /* Put slash into place */ + canon_path[strlen(component)] = '/'; + + /* Store parent_dir result, and continue the loop once more */ + unlock_vnode(dir_vp); + unlock_vmnt(dir_vmp); + put_vnode(dir_vp); + dir_vp = parent_dir; + } + + unlock_vnode(dir_vp); + unlock_vmnt(parent_vmp); + + put_vnode(dir_vp); + + /* add the leading slash */ + if (strlen(canon_path) >= PATH_MAX) return(ENAMETOOLONG); + memmove(canon_path+1, canon_path, strlen(canon_path)); + canon_path[0] = '/'; + + return(OK); +} + +/*===========================================================================* + * check_perms * + *===========================================================================*/ +PRIVATE int check_perms(ep, io_gr, pathlen) +endpoint_t ep; +cp_grant_id_t io_gr; +size_t pathlen; +{ + int r, slot; + struct vnode *vp; + struct vmnt *vmp; + struct fproc *rfp; + char orig_path[PATH_MAX+1]; + char canon_path[PATH_MAX+1]; + char temp_path[PATH_MAX+1]; + struct lookup resolve; + + if (isokendpt(ep, &slot) != OK) return(EINVAL); + if (pathlen < UNIX_PATH_MAX || pathlen > PATH_MAX) return(EINVAL); + + rfp = &(fproc[slot]); + memset(canon_path, '\0', PATH_MAX+1); + + r = sys_safecopyfrom(PFS_PROC_NR, io_gr, (vir_bytes) 0, + (vir_bytes) temp_path, pathlen, D); + if (r != OK) return(r); + + temp_path[pathlen] = '\0'; + + /* save path from pfs before permissions checking modifies it */ + memcpy(orig_path, temp_path, PATH_MAX+1); + + /* get the canonical path to the socket file */ + if ((r = canonical_path(orig_path, canon_path, rfp)) != OK) + return(r); + + if (strlen(canon_path) >= pathlen) return(ENAMETOOLONG); + + /* copy canon_path back to PFS */ + r = sys_safecopyto(PFS_PROC_NR, (cp_grant_id_t) io_gr, (vir_bytes) 0, + (vir_bytes) canon_path, strlen(canon_path)+1, + D); + if (r != OK) return(r); + + /* reload user_fullpath for permissions checking */ + memcpy(temp_path, orig_path, PATH_MAX+1); + lookup_init(&resolve, temp_path, PATH_NOFLAGS, &vmp, &vp); + resolve.l_vmnt_lock = VMNT_READ; + resolve.l_vnode_lock = VNODE_READ; + + if ((vp = eat_path(&resolve, rfp)) == NULL) return(err_code); + + /* check permissions */ + r = forbidden(vp, (R_BIT | W_BIT)); + + unlock_vnode(vp); + unlock_vmnt(vmp); + + put_vnode(vp); + return(r); +} + +/*===========================================================================* + * do_check_perms * + *===========================================================================*/ +PUBLIC int do_check_perms(void) +{ + return check_perms(m_in.USER_ENDPT, (cp_grant_id_t) m_in.IO_GRANT, + (size_t) m_in.COUNT); +} diff --git a/servers/avfs/path.h b/servers/avfs/path.h new file mode 100644 index 000000000..0ba68b349 --- /dev/null +++ b/servers/avfs/path.h @@ -0,0 +1,13 @@ +#ifndef __VFS_PATH_H__ +#define __VFS_PATH_H__ + +struct lookup { + char *l_path; /* Path to lookup */ + int l_flags; /* VFS/FS flags (see ) */ + tll_access_t l_vmnt_lock; /* Lock to obtain on vmnt */ + tll_access_t l_vnode_lock; /* Lock to obtain on vnode */ + struct vmnt **l_vmp; /* vmnt object that was locked */ + struct vnode **l_vnode; /* vnode object that was locked */ +}; + +#endif diff --git a/servers/avfs/pipe.c b/servers/avfs/pipe.c new file mode 100644 index 000000000..de2128069 --- /dev/null +++ b/servers/avfs/pipe.c @@ -0,0 +1,637 @@ +/* This file deals with the suspension and revival of processes. A process can + * be suspended because it wants to read or write from a pipe and can't, or + * because it wants to read or write from a special file and can't. When a + * process can't continue it is suspended, and revived later when it is able + * to continue. + * + * The entry points into this file are + * do_pipe: perform the PIPE system call + * pipe_check: check to see that a read or write on a pipe is feasible now + * suspend: suspend a process that cannot do a requested read or write + * release: check to see if a suspended process can be released and do + * it + * revive: mark a suspended process as able to run again + * unsuspend_by_endpt: revive all processes blocking on a given process + * do_unpause: a signal has been sent to a process; see if it suspended + */ + +#include "fs.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "file.h" +#include "fproc.h" +#include "dmap.h" +#include "param.h" +#include "select.h" +#include +#include "vnode.h" +#include "vmnt.h" + + +/*===========================================================================* + * do_pipe * + *===========================================================================*/ +PUBLIC int do_pipe() +{ +/* Perform the pipe(fil_des) system call. */ + + register struct fproc *rfp; + int r; + struct filp *fil_ptr0, *fil_ptr1; + int fil_des[2]; /* reply goes here */ + struct vnode *vp; + struct vmnt *vmp; + struct node_details res; + + /* See if a free vnode is available */ + if ((vp = get_free_vnode()) == NULL) return(err_code); + lock_vnode(vp, VNODE_OPCL); + + /* Get a lock on PFS */ + if ((vmp = find_vmnt(PFS_PROC_NR)) == NULL) panic("PFS gone"); + lock_vmnt(vmp, VMNT_WRITE); + + /* Acquire two file descriptors. */ + rfp = fp; + if ((r = get_fd(0, R_BIT, &fil_des[0], &fil_ptr0)) != OK) { + unlock_vnode(vp); + unlock_vmnt(vmp); + return(r); + } + rfp->fp_filp[fil_des[0]] = fil_ptr0; + FD_SET(fil_des[0], &rfp->fp_filp_inuse); + fil_ptr0->filp_count = 1; /* mark filp in use */ + if ((r = get_fd(0, W_BIT, &fil_des[1], &fil_ptr1)) != OK) { + rfp->fp_filp[fil_des[0]] = NULL; + FD_CLR(fil_des[0], &rfp->fp_filp_inuse); + fil_ptr0->filp_count = 0; /* mark filp free */ + unlock_filp(fil_ptr0); + unlock_vnode(vp); + unlock_vmnt(vmp); + return(r); + } + rfp->fp_filp[fil_des[1]] = fil_ptr1; + FD_SET(fil_des[1], &rfp->fp_filp_inuse); + fil_ptr1->filp_count = 1; + + /* Create a named pipe inode on PipeFS */ + r = req_newnode(PFS_PROC_NR, fp->fp_effuid, fp->fp_effgid, I_NAMED_PIPE, + NO_DEV, &res); + + if (r != OK) { + rfp->fp_filp[fil_des[0]] = NULL; + FD_CLR(fil_des[0], &rfp->fp_filp_inuse); + fil_ptr0->filp_count = 0; + rfp->fp_filp[fil_des[1]] = NULL; + FD_CLR(fil_des[1], &rfp->fp_filp_inuse); + fil_ptr1->filp_count = 0; + unlock_filp(fil_ptr1); + unlock_filp(fil_ptr0); + unlock_vnode(vp); + unlock_vmnt(vmp); + return(r); + } + + /* Fill in vnode */ + vp->v_fs_e = res.fs_e; + vp->v_mapfs_e = res.fs_e; + vp->v_inode_nr = res.inode_nr; + vp->v_mapinode_nr = res.inode_nr; + vp->v_mode = res.fmode; + vp->v_pipe = I_PIPE; + vp->v_pipe_rd_pos= 0; + vp->v_pipe_wr_pos= 0; + vp->v_fs_count = 1; + vp->v_mapfs_count = 1; + vp->v_ref_count = 1; + vp->v_size = 0; + vp->v_vmnt = NULL; + vp->v_dev = NO_DEV; + + /* Fill in filp objects */ + fil_ptr0->filp_vno = vp; + dup_vnode(vp); + fil_ptr1->filp_vno = vp; + fil_ptr0->filp_flags = O_RDONLY; + fil_ptr1->filp_flags = O_WRONLY; + + m_out.reply_i1 = fil_des[0]; + m_out.reply_i2 = fil_des[1]; + + unlock_filps(fil_ptr0, fil_ptr1); + unlock_vmnt(vmp); + + return(OK); +} + + +/*===========================================================================* + * map_vnode * + *===========================================================================*/ +PUBLIC int map_vnode(vp, map_to_fs_e) +struct vnode *vp; +endpoint_t map_to_fs_e; +{ + int r; + struct vmnt *vmp; + struct node_details res; + + if(vp->v_mapfs_e != NONE) return(OK); /* Already mapped; nothing to do. */ + + if ((vmp = find_vmnt(map_to_fs_e)) == NULL) + panic("Can't map to unknown endpoint"); + if (lock_vmnt(vmp, VMNT_WRITE) == EBUSY) + vmp = NULL; /* Already locked, do not unlock */ + + /* Create a temporary mapping of this inode to another FS. Read and write + * operations on data will be handled by that FS. The rest by the 'original' + * FS that holds the inode. */ + if ((r = req_newnode(map_to_fs_e, fp->fp_effuid, fp->fp_effgid, I_NAMED_PIPE, + vp->v_dev, &res)) == OK) { + vp->v_mapfs_e = res.fs_e; + vp->v_mapinode_nr = res.inode_nr; + vp->v_mapfs_count = 1; + } + + if (vmp) unlock_vmnt(vmp); + + return(r); +} + +/*===========================================================================* + * pipe_check * + *===========================================================================*/ +PUBLIC int pipe_check(vp, rw_flag, oflags, bytes, position, notouch) +register struct vnode *vp; /* the inode of the pipe */ +int rw_flag; /* READING or WRITING */ +int oflags; /* flags set by open or fcntl */ +register int bytes; /* bytes to be read or written (all chunks) */ +u64_t position; /* current file position */ +int notouch; /* check only */ +{ +/* Pipes are a little different. If a process reads from an empty pipe for + * which a writer still exists, suspend the reader. If the pipe is empty + * and there is no writer, return 0 bytes. If a process is writing to a + * pipe and no one is reading from it, give a broken pipe error. + */ + off_t pos; + int r = OK; + + if (ex64hi(position) != 0) + panic("pipe_check: position too large in pipe"); + pos = ex64lo(position); + + /* If reading, check for empty pipe. */ + if (rw_flag == READING) { + if (pos >= vp->v_size) { + /* Process is reading from an empty pipe. */ + if (find_filp(vp, W_BIT) != NULL) { + /* Writer exists */ + if (oflags & O_NONBLOCK) + r = EAGAIN; + else + r = SUSPEND; + + /* If need be, activate sleeping writers. */ + if (susp_count > 0) + release(vp, WRITE, susp_count); + } + return(r); + } + return(bytes); + } + + /* Process is writing to a pipe. */ + if (find_filp(vp, R_BIT) == NULL) { + /* Process is writing, but there is no reader. Tell kernel to generate + * a SIGPIPE signal. */ + if (!notouch) sys_kill(fp->fp_endpoint, SIGPIPE); + + return(EPIPE); + } + + /* Calculate how many bytes can be written. */ + if (pos + bytes > PIPE_BUF) { + if (oflags & O_NONBLOCK) { + if (bytes <= PIPE_BUF) { + /* Write has to be atomic */ + return(EAGAIN); + } + + /* Compute available space */ + bytes = PIPE_BUF - pos; + + if (bytes > 0) { + /* Do a partial write. Need to wakeup reader */ + if (!notouch) + release(vp, READ, susp_count); + return(bytes); + } else { + /* Pipe is full */ + return(EAGAIN); + } + } + + if (bytes > PIPE_BUF) { + /* Compute available space */ + bytes = PIPE_BUF - pos; + + if (bytes > 0) { + /* Do a partial write. Need to wakeup reader + * since we'll suspend ourself in read_write() + */ + if (!notouch) + release(vp, READ, susp_count); + return(bytes); + } + } + + /* Pipe is full */ + return(SUSPEND); + } + + /* Writing to an empty pipe. Search for suspended reader. */ + if (pos == 0 && !notouch) + release(vp, READ, susp_count); + + /* Requested amount fits */ + return(bytes); +} + + +/*===========================================================================* + * suspend * + *===========================================================================*/ +PUBLIC void suspend(int why) +{ +/* Take measures to suspend the processing of the present system call. + * Store the parameters to be used upon resuming in the process table. + * (Actually they are not used when a process is waiting for an I/O device, + * but they are needed for pipes, and it is not worth making the distinction.) + * The SUSPEND pseudo error should be returned after calling suspend(). + */ + +#if DO_SANITYCHECKS + if (why == FP_BLOCKED_ON_PIPE) + panic("suspend: called for FP_BLOCKED_ON_PIPE"); + + if(fp_is_blocked(fp)) + panic("suspend: called for suspended process"); + + if(why == FP_BLOCKED_ON_NONE) + panic("suspend: called for FP_BLOCKED_ON_NONE"); +#endif + + if (why == FP_BLOCKED_ON_POPEN) + /* #procs susp'ed on pipe*/ + susp_count++; + + fp->fp_blocked_on = why; + assert(fp->fp_grant == GRANT_INVALID || !GRANT_VALID(fp->fp_grant)); + fp->fp_block_fd = m_in.fd; + fp->fp_block_callnr = call_nr; + fp->fp_flags &= ~FP_SUSP_REOPEN; /* Clear this flag. The caller + * can set it when needed. + */ + if (why == FP_BLOCKED_ON_LOCK) { + fp->fp_buffer = (char *) m_in.name1; /* third arg to fcntl() */ + fp->fp_nbytes = m_in.request; /* second arg to fcntl() */ + } else { + fp->fp_buffer = m_in.buffer; /* for reads and writes */ + fp->fp_nbytes = m_in.nbytes; + } +} + +/*===========================================================================* + * wait_for * + *===========================================================================*/ +PUBLIC void wait_for(endpoint_t who) +{ + if(who == NONE || who == ANY) + panic("suspend on NONE or ANY"); + suspend(FP_BLOCKED_ON_OTHER); + fp->fp_task = who; +} + + +/*===========================================================================* + * pipe_suspend * + *===========================================================================*/ +PUBLIC void pipe_suspend(rw_flag, fd_nr, buf, size) +int rw_flag; +int fd_nr; +char *buf; +size_t size; +{ +/* Take measures to suspend the processing of the present system call. + * Store the parameters to be used upon resuming in the process table. + * (Actually they are not used when a process is waiting for an I/O device, + * but they are needed for pipes, and it is not worth making the distinction.) + * The SUSPEND pseudo error should be returned after calling suspend(). + */ +#if DO_SANITYCHECKS + if(fp_is_blocked(fp)) + panic("pipe_suspend: called for suspended process"); +#endif + + susp_count++; /* #procs susp'ed on pipe*/ + fp->fp_blocked_on = FP_BLOCKED_ON_PIPE; + assert(!GRANT_VALID(fp->fp_grant)); + fp->fp_block_fd = fd_nr; + fp->fp_block_callnr = ((rw_flag == READING) ? READ : WRITE); + fp->fp_buffer = buf; + fp->fp_nbytes = size; +} + + +/*===========================================================================* + * unsuspend_by_endpt * + *===========================================================================*/ +PUBLIC void unsuspend_by_endpt(endpoint_t proc_e) +{ +/* Revive processes waiting for drivers (SUSPENDed) that have disappeared with + * return code EAGAIN. + */ + struct fproc *rp; + + for (rp = &fproc[0]; rp < &fproc[NR_PROCS]; rp++) { + if (rp->fp_pid == PID_FREE) continue; + if (rp->fp_blocked_on == FP_BLOCKED_ON_OTHER && rp->fp_task == proc_e) + revive(rp->fp_endpoint, EAGAIN); + } + + /* Revive processes waiting in drivers on select()s with EAGAIN too */ + select_unsuspend_by_endpt(proc_e); + + return; +} + + +/*===========================================================================* + * release * + *===========================================================================*/ +PUBLIC void release(vp, op, count) +register struct vnode *vp; /* inode of pipe */ +int op; /* READ, WRITE, OPEN or CREAT */ +int count; /* max number of processes to release */ +{ +/* Check to see if any process is hanging on the pipe whose inode is in 'ip'. + * If one is, and it was trying to perform the call indicated by 'call_nr', + * release it. + */ + + register struct fproc *rp; + struct filp *f; + int selop; + + /* Trying to perform the call also includes SELECTing on it with that + * operation. + */ + if (op == READ || op == WRITE) { + if (op == READ) + selop = SEL_RD; + else + selop = SEL_WR; + + for (f = &filp[0]; f < &filp[NR_FILPS]; f++) { + if (f->filp_count < 1 || !(f->filp_pipe_select_ops & selop) || + f->filp_vno != vp) + continue; + select_callback(f, selop); + f->filp_pipe_select_ops &= ~selop; + } + } + + /* Search the proc table. */ + for (rp = &fproc[0]; rp < &fproc[NR_PROCS] && count > 0; rp++) { + if (rp->fp_pid != PID_FREE && fp_is_blocked(rp) && + !(rp->fp_flags & FP_REVIVED) && rp->fp_block_callnr == op && + rp->fp_filp[rp->fp_block_fd] != NULL && + rp->fp_filp[rp->fp_block_fd]->filp_vno == vp) { + revive(rp->fp_endpoint, 0); + susp_count--; /* keep track of who is suspended */ + if(susp_count < 0) + panic("susp_count now negative: %d", susp_count); + if (--count == 0) return; + } + } +} + + +/*===========================================================================* + * revive * + *===========================================================================*/ +PUBLIC void revive(proc_nr_e, returned) +int proc_nr_e; /* process to revive */ +int returned; /* if hanging on task, how many bytes read */ +{ +/* Revive a previously blocked process. When a process hangs on tty, this + * is the way it is eventually released. + */ + register struct fproc *rfp; + int blocked_on; + int fd_nr, slot; + struct filp *fil_ptr; + + if (proc_nr_e == NONE || isokendpt(proc_nr_e, &slot) != OK) return; + + rfp = &fproc[slot]; + if (!fp_is_blocked(rfp) || (rfp->fp_flags & FP_REVIVED)) return; + + /* The 'reviving' flag only applies to pipes. Processes waiting for TTY get + * a message right away. The revival process is different for TTY and pipes. + * For select and TTY revival, the work is already done, for pipes it is not: + * the proc must be restarted so it can try again. + */ + blocked_on = rfp->fp_blocked_on; + if (blocked_on == FP_BLOCKED_ON_PIPE || blocked_on == FP_BLOCKED_ON_LOCK) { + /* Revive a process suspended on a pipe or lock. */ + rfp->fp_flags |= FP_REVIVED; + reviving++; /* process was waiting on pipe or lock */ + } else if (blocked_on == FP_BLOCKED_ON_DOPEN) { + rfp->fp_blocked_on = FP_BLOCKED_ON_NONE; + fd_nr = rfp->fp_block_fd; + if (returned < 0) { + fil_ptr = rfp->fp_filp[fd_nr]; + lock_filp(fil_ptr, VNODE_OPCL); + rfp->fp_filp[fd_nr] = NULL; + FD_CLR(fd_nr, &rfp->fp_filp_inuse); + if (fil_ptr->filp_count != 1) { + panic("VFS: revive: bad count in filp: %d", + fil_ptr->filp_count); + } + fil_ptr->filp_count = 0; + unlock_filp(fil_ptr); + put_vnode(fil_ptr->filp_vno); + fil_ptr->filp_vno = NULL; + reply(proc_nr_e, returned); + } else { + reply(proc_nr_e, fd_nr); + } + } else { + rfp->fp_blocked_on = FP_BLOCKED_ON_NONE; + if (blocked_on == FP_BLOCKED_ON_POPEN) { + /* process blocked in open or create */ + reply(proc_nr_e, rfp->fp_block_fd); + } else if (blocked_on == FP_BLOCKED_ON_SELECT) { + reply(proc_nr_e, returned); + } else { + /* Revive a process suspended on TTY or other device. + * Pretend it wants only what there is. + */ + rfp->fp_nbytes = returned; + /* If a grant has been issued by FS for this I/O, revoke + * it again now that I/O is done. + */ + if (GRANT_VALID(rfp->fp_grant)) { + if(cpf_revoke(rfp->fp_grant)) { + panic("VFS: revoke failed for grant: %d", + rfp->fp_grant); + } + rfp->fp_grant = GRANT_INVALID; + } + reply(proc_nr_e, returned); /* unblock the process */ + } + } +} + + +/*===========================================================================* + * unpause * + *===========================================================================*/ +PUBLIC void unpause(proc_nr_e) +int proc_nr_e; +{ +/* A signal has been sent to a user who is paused on the file system. + * Abort the system call with the EINTR error message. + */ + + register struct fproc *rfp, *org_fp; + int slot, blocked_on, fild, status = EINTR, major_dev, minor_dev; + struct filp *f; + dev_t dev; + message mess; + int wasreviving = 0; + + if (isokendpt(proc_nr_e, &slot) != OK) { + printf("VFS: ignoring unpause for bogus endpoint %d\n", proc_nr_e); + return; + } + + rfp = &fproc[slot]; + if (!fp_is_blocked(rfp)) return; + blocked_on = rfp->fp_blocked_on; + + if (rfp->fp_flags & FP_REVIVED) { + rfp->fp_flags &= ~FP_REVIVED; + reviving--; + wasreviving = 1; + } + + switch (blocked_on) { + case FP_BLOCKED_ON_PIPE:/* process trying to read or write a pipe */ + break; + + case FP_BLOCKED_ON_LOCK:/* process trying to set a lock with FCNTL */ + break; + + case FP_BLOCKED_ON_SELECT:/* process blocking on select() */ + select_forget(proc_nr_e); + break; + + case FP_BLOCKED_ON_POPEN: /* process trying to open a fifo */ + break; + + case FP_BLOCKED_ON_DOPEN:/* process trying to open a device */ + /* Don't cancel OPEN. Just wait until the open completes. */ + return; + + case FP_BLOCKED_ON_OTHER:/* process trying to do device I/O (e.g. tty)*/ + if (rfp->fp_flags & FP_SUSP_REOPEN) { + /* Process is suspended while waiting for a reopen. + * Just reply EINTR. + */ + rfp->fp_flags &= ~FP_SUSP_REOPEN; + status = EINTR; + break; + } + + fild = rfp->fp_block_fd; + if (fild < 0 || fild >= OPEN_MAX) + panic("file descriptor out-of-range"); + f = rfp->fp_filp[fild]; + dev = (dev_t) f->filp_vno->v_sdev; /* device hung on */ + major_dev = major(dev); + minor_dev = minor(dev); + mess.TTY_LINE = minor_dev; + mess.USER_ENDPT = rfp->fp_ioproc; + mess.IO_GRANT = (char *) rfp->fp_grant; + + /* Tell kernel R or W. Mode is from current call, not open. */ + mess.COUNT = rfp->fp_block_callnr == READ ? R_BIT : W_BIT; + mess.m_type = CANCEL; + + org_fp = fp; + fp = rfp; /* hack - ctty_io uses fp */ + (*dmap[major_dev].dmap_io)(rfp->fp_task, &mess); + fp = org_fp; + status = mess.REP_STATUS; + if (status == SUSPEND) + return; /* Process will be revived at a + * later time. + */ + + if (status == EAGAIN) status = EINTR; + if (GRANT_VALID(rfp->fp_grant)) { + (void) cpf_revoke(rfp->fp_grant); + rfp->fp_grant = GRANT_INVALID; + } + break; + default : + panic("VFS: unknown block reason: %d", blocked_on); + } + + rfp->fp_blocked_on = FP_BLOCKED_ON_NONE; + + if ((blocked_on == FP_BLOCKED_ON_PIPE || blocked_on == FP_BLOCKED_ON_POPEN)&& + !wasreviving) { + susp_count--; + } + + reply(proc_nr_e, status); /* signal interrupted call */ +} + +#if DO_SANITYCHECKS +/*===========================================================================* + * check_pipe * + *===========================================================================*/ +PUBLIC int check_pipe(void) +{ +/* Integrity check; verify that susp_count equals what the fproc table thinks + * is suspended on a pipe */ + struct fproc *rfp; + int count = 0; + for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) { + if (rfp->fp_pid == PID_FREE) continue; + if ( !(rfp->fp_flags & FP_REVIVED) && + (rfp->fp_blocked_on == FP_BLOCKED_ON_PIPE || + rfp->fp_blocked_on == FP_BLOCKED_ON_POPEN)) { + count++; + } + } + + if (count != susp_count) { + printf("check_pipe: count %d susp_count %d\n", count, susp_count); + return(0); + } + + return(l); +} +#endif diff --git a/servers/avfs/protect.c b/servers/avfs/protect.c new file mode 100644 index 000000000..300a47d31 --- /dev/null +++ b/servers/avfs/protect.c @@ -0,0 +1,274 @@ +/* This file deals with protection in the file system. It contains the code + * for four system calls that relate to protection. + * + * The entry points into this file are + * do_chmod: perform the CHMOD and FCHMOD system calls + * do_chown: perform the CHOWN and FCHOWN system calls + * do_umask: perform the UMASK system call + * do_access: perform the ACCESS system call + */ + +#include "fs.h" +#include +#include +#include "file.h" +#include "fproc.h" +#include "path.h" +#include "param.h" +#include +#include "vnode.h" +#include "vmnt.h" + +/*===========================================================================* + * do_chmod * + *===========================================================================*/ +PUBLIC int do_chmod() +{ +/* Perform the chmod(name, mode) and fchmod(fd, mode) system calls. */ + + struct filp *flp; + struct vnode *vp; + struct vmnt *vmp; + int r; + mode_t new_mode; + char fullpath[PATH_MAX+1]; + struct lookup resolve; + + flp = NULL; + + lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); + resolve.l_vmnt_lock = VMNT_WRITE; + resolve.l_vnode_lock = VNODE_WRITE; + + if (call_nr == CHMOD) { + /* Temporarily open the file */ + if (fetch_name(m_in.name, m_in.name_length, M3, fullpath) != OK) + return(err_code); + if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code); + } else { /* call_nr == FCHMOD */ + /* File is already opened; get a pointer to vnode from filp. */ + if ((flp = get_filp(m_in.fd, VNODE_WRITE)) == NULL) + return(err_code); + vp = flp->filp_vno; + dup_vnode(vp); + } + + /* Only the owner or the super_user may change the mode of a file. + * No one may change the mode of a file on a read-only file system. + */ + if (vp->v_uid != fp->fp_effuid && fp->fp_effuid != SU_UID) + r = EPERM; + else + r = read_only(vp); + + if (r == OK) { + /* Now make the change. Clear setgid bit if file is not in caller's + * group */ + if (fp->fp_effuid != SU_UID && vp->v_gid != fp->fp_effgid) + m_in.mode &= ~I_SET_GID_BIT; + + r = req_chmod(vp->v_fs_e, vp->v_inode_nr, m_in.mode, &new_mode); + if (r == OK) + vp->v_mode = new_mode; + } + + if (call_nr == CHMOD) { + unlock_vnode(vp); + unlock_vmnt(vmp); + } else { /* FCHMOD */ + unlock_filp(flp); + } + + put_vnode(vp); + return(r); +} + + +/*===========================================================================* + * do_chown * + *===========================================================================*/ +PUBLIC int do_chown() +{ +/* Perform the chown(path, owner, group) and fchmod(fd, owner, group) system + * calls. */ + struct filp *flp; + struct vnode *vp; + struct vmnt *vmp; + int r; + uid_t uid; + gid_t gid; + mode_t new_mode; + char fullpath[PATH_MAX+1]; + struct lookup resolve; + + flp = NULL; + + lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); + resolve.l_vmnt_lock = VMNT_WRITE; + resolve.l_vnode_lock = VNODE_WRITE; + + if (call_nr == CHOWN) { + /* Temporarily open the file. */ + if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK) + return(err_code); + if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code); + } else { /* call_nr == FCHOWN */ + /* File is already opened; get a pointer to the vnode from filp. */ + if ((flp = get_filp(m_in.fd, VNODE_WRITE)) == NULL) + return(err_code); + vp = flp->filp_vno; + dup_vnode(vp); + } + + r = read_only(vp); + if (r == OK) { + /* FS is R/W. Whether call is allowed depends on ownership, etc. */ + /* The super user can do anything, so check permissions only if we're + a regular user. */ + if (fp->fp_effuid != SU_UID) { + /* Regular users can only change groups of their own files. */ + if (vp->v_uid != fp->fp_effuid) r = EPERM; + if (vp->v_uid != m_in.owner) r = EPERM; /* no giving away */ + if (fp->fp_effgid != m_in.group) r = EPERM; + } + } + + if (r == OK) { + /* Do not change uid/gid if new uid/gid is -1. */ + uid = (m_in.owner == (uid_t)-1 ? vp->v_uid : m_in.owner); + gid = (m_in.group == (gid_t)-1 ? vp->v_gid : m_in.group); + if ((r = req_chown(vp->v_fs_e, vp->v_inode_nr, uid, gid, + &new_mode)) == OK) { + vp->v_uid = uid; + vp->v_gid = gid; + vp->v_mode = new_mode; + } + } + + if (call_nr == CHOWN) { + unlock_vnode(vp); + unlock_vmnt(vmp); + } else { /* FCHOWN */ + unlock_filp(flp); + } + + put_vnode(vp); + return(r); +} + + +/*===========================================================================* + * do_umask * + *===========================================================================*/ +PUBLIC int do_umask() +{ +/* Perform the umask(co_mode) system call. */ + register mode_t r; + + r = ~fp->fp_umask; /* set 'r' to complement of old mask */ + fp->fp_umask = ~(m_in.co_mode & RWX_MODES); + return(r); /* return complement of old mask */ +} + + +/*===========================================================================* + * do_access * + *===========================================================================*/ +PUBLIC int do_access() +{ +/* Perform the access(name, mode) system call. */ + int r; + struct vnode *vp; + struct vmnt *vmp; + char fullpath[PATH_MAX+1]; + struct lookup resolve; + + lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); + resolve.l_vmnt_lock = VMNT_READ; + resolve.l_vnode_lock = VNODE_READ; + + /* First check to see if the mode is correct. */ + if ( (m_in.mode & ~(R_OK | W_OK | X_OK)) != 0 && m_in.mode != F_OK) + return(EINVAL); + + /* Temporarily open the file. */ + if (fetch_name(m_in.name, m_in.name_length, M3, fullpath) != OK) + return(err_code); + if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code); + + r = forbidden(vp, m_in.mode); + + unlock_vnode(vp); + unlock_vmnt(vmp); + + put_vnode(vp); + return(r); +} + + +/*===========================================================================* + * forbidden * + *===========================================================================*/ +PUBLIC int forbidden(struct vnode *vp, mode_t access_desired) +{ +/* Given a pointer to an vnode, 'vp', and the access desired, determine + * if the access is allowed, and if not why not. The routine looks up the + * caller's uid in the 'fproc' table. If access is allowed, OK is returned + * if it is forbidden, EACCES is returned. + */ + + register mode_t bits, perm_bits; + uid_t uid; + gid_t gid; + int r, shift; + + if (vp->v_uid == (uid_t) -1 || vp->v_gid == (gid_t) -1) return(EACCES); + + /* Isolate the relevant rwx bits from the mode. */ + bits = vp->v_mode; + uid = (call_nr == ACCESS ? fp->fp_realuid : fp->fp_effuid); + gid = (call_nr == ACCESS ? fp->fp_realgid : fp->fp_effgid); + + if (uid == SU_UID) { + /* Grant read and write permission. Grant search permission for + * directories. Grant execute permission (for non-directories) if + * and only if one of the 'X' bits is set. + */ + if ( (bits & I_TYPE) == I_DIRECTORY || + bits & ((X_BIT << 6) | (X_BIT << 3) | X_BIT)) + perm_bits = R_BIT | W_BIT | X_BIT; + else + perm_bits = R_BIT | W_BIT; + } else { + if (uid == vp->v_uid) shift = 6; /* owner */ + else if (gid == vp->v_gid) shift = 3; /* group */ + else if (in_group(fp, vp->v_gid) == OK) shift = 3; /* suppl. groups */ + else shift = 0; /* other */ + perm_bits = (bits >> shift) & (R_BIT | W_BIT | X_BIT); + } + + /* If access desired is not a subset of what is allowed, it is refused. */ + r = OK; + if ((perm_bits | access_desired) != perm_bits) r = EACCES; + + /* Check to see if someone is trying to write on a file system that is + * mounted read-only. + */ + if (r == OK) + if (access_desired & W_BIT) + r = read_only(vp); + + return(r); +} + +/*===========================================================================* + * read_only * + *===========================================================================*/ +PUBLIC int read_only(vp) +struct vnode *vp; /* ptr to inode whose file sys is to be cked */ +{ +/* Check to see if the file system on which the inode 'ip' resides is mounted + * read only. If so, return EROFS, else return OK. + */ + return((vp->v_vmnt->m_flags & VMNT_READONLY) ? EROFS : OK); +} diff --git a/servers/avfs/proto.h b/servers/avfs/proto.h new file mode 100644 index 000000000..31f7ca2b9 --- /dev/null +++ b/servers/avfs/proto.h @@ -0,0 +1,372 @@ +#ifndef __VFS_PROTO_H__ +#define __VFS_PROTO_H__ + +/* Function prototypes. */ + +#include "timers.h" +#include "request.h" +#include "tll.h" +#include "threads.h" +#include + +/* Structs used in prototypes must be declared as such first. */ +struct filp; +struct fproc; +struct vmnt; +struct vnode; +struct lookup; +struct worker_thread; +struct job; + +typedef struct filp * filp_id_t; + +/* comm.c */ +_PROTOTYPE(int fs_sendrec, (endpoint_t fs_e, message *reqm) ); +_PROTOTYPE(void fs_sendmore, (struct vmnt *vmp) ); +_PROTOTYPE(void send_work, (void) ); + +/* device.c */ +_PROTOTYPE( int dev_open, (dev_t dev, endpoint_t proc_e, int flags) ); +_PROTOTYPE( int dev_reopen, (dev_t dev, int filp_no, int flags) ); +_PROTOTYPE( int dev_close, (dev_t dev, int filp_no) ); +_PROTOTYPE( int dev_io, (int op, dev_t dev, endpoint_t proc_e, void *buf, + u64_t pos, size_t bytes, int flags, int suspend_reopen) ); +_PROTOTYPE( int gen_opcl, (int op, dev_t dev, endpoint_t task_nr, int flags)); +_PROTOTYPE( int gen_io, (int task_nr, message *mess_ptr) ); +_PROTOTYPE( int asyn_io, (int task_nr, message *mess_ptr) ); +_PROTOTYPE( int no_dev, (int op, dev_t dev, int proc, int flags) ); +_PROTOTYPE( int no_dev_io, (int, message *) ); +_PROTOTYPE( int tty_opcl, (int op, dev_t dev, endpoint_t proc, int flags)); +_PROTOTYPE( int ctty_opcl, (int op, dev_t dev, endpoint_t proc, int flags)); +_PROTOTYPE( int clone_opcl, (int op, dev_t dev, int proc, int flags) ); +_PROTOTYPE( int ctty_io, (int task_nr, message *mess_ptr) ); +_PROTOTYPE( int do_ioctl, (void) ); +_PROTOTYPE( void pm_setsid, (int proc_e) ); +_PROTOTYPE( void dev_status, (message *) ); +_PROTOTYPE( void dev_up, (int major) ); +_PROTOTYPE( endpoint_t find_suspended_ep, (endpoint_t driver, + cp_grant_id_t g) ); +_PROTOTYPE( void reopen_reply, (void) ); +_PROTOTYPE( void open_reply, (void) ); + +/* dmap.c */ +_PROTOTYPE( int do_mapdriver, (void) ); +_PROTOTYPE( void init_dmap, (void) ); +_PROTOTYPE( int dmap_driver_match, (endpoint_t proc, int major) ); +_PROTOTYPE( void dmap_endpt_up, (int proc_nr) ); +_PROTOTYPE( void dmap_unmap_by_endpt, (int proc_nr) ); +_PROTOTYPE( struct dmap *get_dmap, (endpoint_t proc_e) ); +_PROTOTYPE( int do_mapdriver, (void) ); +_PROTOTYPE( int map_service, (struct rprocpub *rpub) ); +_PROTOTYPE( void dmap_unmap_by_endpt, (int proc_nr) ); +_PROTOTYPE( struct dmap *get_dmap, (endpoint_t proc_e) ); +_PROTOTYPE( int map_driver, (const char *label, int major, endpoint_t proc_nr, + int dev_style, int flags) ); +_PROTOTYPE( int map_service, (struct rprocpub *rpub) ); + +/* exec.c */ +_PROTOTYPE( int pm_exec, (int proc_e, char *path, vir_bytes path_len, + char *frame, vir_bytes frame_len, vir_bytes *pc)); +#define check_bsf_lock() do { \ + assert(mutex_trylock(&bsf_lock) == 0); \ + unlock_bsf(); \ + } while(0) + +/* filedes.c */ +_PROTOTYPE( void check_filp_locks, (void) ); +_PROTOTYPE( void check_filp_locks_by_me, (void) ); +_PROTOTYPE( void init_filps, (void) ); +_PROTOTYPE( struct filp *find_filp, (struct vnode *vp, mode_t bits) ); +_PROTOTYPE( int get_fd, (int start, mode_t bits, int *k, + struct filp **fpt) ); +_PROTOTYPE( struct filp *get_filp, (int fild, tll_access_t locktype) ); +_PROTOTYPE( struct filp *get_filp2, (struct fproc *rfp, int fild, + tll_access_t locktype) ); +_PROTOTYPE( void lock_filp, (struct filp *filp, tll_access_t locktype) ); +_PROTOTYPE( void unlock_filp, (struct filp *filp) ); +_PROTOTYPE( void unlock_filps, (struct filp *filp1, struct filp *filp2) ); +_PROTOTYPE( int invalidate, (struct filp *) ); +_PROTOTYPE( int do_verify_fd, (void) ); +_PROTOTYPE( int set_filp, (filp_id_t sfilp) ); +_PROTOTYPE( int do_set_filp, (void) ); +_PROTOTYPE( int copy_filp, (endpoint_t to_ep, filp_id_t cfilp) ); +_PROTOTYPE( int do_copy_filp, (void) ); +_PROTOTYPE( int put_filp, (filp_id_t pfilp) ); +_PROTOTYPE( int do_put_filp, (void) ); +_PROTOTYPE( int cancel_fd, (endpoint_t ep, int fd) ); +_PROTOTYPE( int do_cancel_fd, (void) ); +_PROTOTYPE( void close_filp, (struct filp *fp) ); + +/* fscall.c */ +_PROTOTYPE( void nested_fs_call, (message *m) ); + +/* link.c */ +_PROTOTYPE( int do_link, (void) ); +_PROTOTYPE( int do_unlink, (void) ); +_PROTOTYPE( int do_rename, (void) ); +_PROTOTYPE( int do_truncate, (void) ); +_PROTOTYPE( int do_ftruncate, (void) ); +_PROTOTYPE( int truncate_vnode, (struct vnode *vp, off_t newsize) ); +_PROTOTYPE( int rdlink_direct, (char *orig_path, char *link_path, + struct fproc *rfp) ); + +/* lock.c */ +_PROTOTYPE( int lock_op, (struct filp *f, int req) ); +_PROTOTYPE( void lock_revive, (void) ); + +/* main.c */ +_PROTOTYPE( int main, (void) ); +_PROTOTYPE( void reply, (int whom, int result) ); +_PROTOTYPE( void lock_proc, (struct fproc *rfp, int force_lock) ); +_PROTOTYPE( void unlock_proc, (struct fproc *rfp) ); +_PROTOTYPE( void *do_dummy, (void *arg) ); + +/* misc.c */ +_PROTOTYPE( int do_dup, (void) ); +_PROTOTYPE( void pm_exit, (int proc) ); +_PROTOTYPE( int do_fcntl, (void) ); +_PROTOTYPE( void pm_fork, (int pproc, int cproc, int cpid) ); +_PROTOTYPE( void pm_setgid, (int proc_e, int egid, int rgid) ); +_PROTOTYPE( void pm_setuid, (int proc_e, int euid, int ruid) ); +_PROTOTYPE( void pm_setgroups, (int proc_e, int ngroups, gid_t *addr) ); +_PROTOTYPE( int do_sync, (void) ); +_PROTOTYPE( int do_fsync, (void) ); +_PROTOTYPE( void pm_reboot, (void) ); +_PROTOTYPE( int do_svrctl, (void) ); +_PROTOTYPE( int do_getsysinfo, (void) ); +_PROTOTYPE( int pm_dumpcore, (int proc_e, struct mem_map *seg_ptr) ); +_PROTOTYPE( void ds_event, (void) ); + +/* mount.c */ +_PROTOTYPE( int do_fsready, (void) ); +_PROTOTYPE( int do_mount, (void) ); +_PROTOTYPE( int do_umount, (void) ); +_PROTOTYPE( void mount_pfs, (void) ); +_PROTOTYPE( int mount_fs, (dev_t dev, char fullpath[PATH_MAX+1], + endpoint_t fs_e, int rdonly, + char mount_label[LABEL_MAX]) ); +_PROTOTYPE( int unmount, (dev_t dev, char *label) ); +_PROTOTYPE( void unmount_all, (void) ); + +/* open.c */ +_PROTOTYPE( int do_close, (void) ); +_PROTOTYPE( int close_fd, (struct fproc *rfp, int fd_nr) ); +_PROTOTYPE( void close_reply, (void) ); +_PROTOTYPE( int do_creat, (void) ); +_PROTOTYPE( int do_lseek, (void) ); +_PROTOTYPE( int do_llseek, (void) ); +_PROTOTYPE( int do_mknod, (void) ); +_PROTOTYPE( int do_mkdir, (void) ); +_PROTOTYPE( int do_open, (void) ); +_PROTOTYPE( int do_slink, (void) ); +_PROTOTYPE( int do_vm_open, (void) ); +_PROTOTYPE( int do_vm_close, (void) ); + +/* path.c */ +_PROTOTYPE( struct vnode *advance, (struct vnode *dirp, struct lookup *resolve, + struct fproc *rfp) ); +_PROTOTYPE( struct vnode *eat_path, (struct lookup *resolve, + struct fproc *rfp) ); +_PROTOTYPE( struct vnode *last_dir, (struct lookup *resolve, + struct fproc *rfp) ); +_PROTOTYPE( void lookup_init, (struct lookup *resolve, char *path, int flags, + struct vmnt **vmp, struct vnode **vp) ); +_PROTOTYPE( int get_name, (struct vnode *dirp, struct vnode *entry, + char *_name) ); +_PROTOTYPE( int canonical_path, (char *orig_path, char *canon_path, + struct fproc *rfp) ); +_PROTOTYPE( int do_check_perms, (void) ); + +/* pipe.c */ +_PROTOTYPE( int do_pipe, (void) ); +_PROTOTYPE( int map_vnode, (struct vnode *vp, endpoint_t fs_e) ); +_PROTOTYPE( void unpause, (int proc_nr_e) ); +_PROTOTYPE( int pipe_check, (struct vnode *vp, int rw_flag, + int oflags, int bytes, u64_t position, int notouch) ); +_PROTOTYPE( void release, (struct vnode *vp, int call_nr, int count) ); +_PROTOTYPE( void revive, (int proc_nr, int bytes) ); +_PROTOTYPE( void suspend, (int task) ); +_PROTOTYPE( void pipe_suspend, (int rw_flag, int fd_nr, char *buf, + size_t size) ); +_PROTOTYPE( void unsuspend_by_endpt, (endpoint_t) ); +_PROTOTYPE( void wait_for, (endpoint_t) ); +#if DO_SANITYCHECKS +_PROTOTYPE( int check_pipe, (void) ); +#endif + +/* protect.c */ +_PROTOTYPE( int do_access, (void) ); +_PROTOTYPE( int do_chmod, (void) ); +_PROTOTYPE( int do_chown, (void) ); +_PROTOTYPE( int do_umask, (void) ); +_PROTOTYPE( int forbidden, (struct vnode *vp, mode_t access_desired) ); +_PROTOTYPE( int read_only, (struct vnode *vp) ); + +/* read.c */ +_PROTOTYPE( int do_read, (void) ); +_PROTOTYPE( int do_getdents, (void) ); +_PROTOTYPE( void lock_bsf, (void) ); +_PROTOTYPE( void unlock_bsf, (void) ); +_PROTOTYPE( int read_write, (int rw_flag) ); +_PROTOTYPE( int rw_pipe, (int rw_flag, endpoint_t usr, + int fd_nr, struct filp *f, char *buf, size_t req_size) ); + +/* request.c */ +_PROTOTYPE( int req_breadwrite, (endpoint_t fs_e, endpoint_t user_e, + dev_t dev, u64_t pos, unsigned int num_of_bytes, + char *user_addr, int rw_flag, + u64_t *new_posp, unsigned int *cum_iop) ); +_PROTOTYPE( int req_chmod, (int fs_e, ino_t inode_nr, mode_t rmode, + mode_t *new_modep) ); +_PROTOTYPE( int req_chown, (endpoint_t fs_e, ino_t inode_nr, + uid_t newuid, gid_t newgid, mode_t *new_modep) ); +_PROTOTYPE( int req_create, (int fs_e, ino_t inode_nr, int omode, + uid_t uid, gid_t gid, char *path, node_details_t *res) ); +_PROTOTYPE( int req_flush, (endpoint_t fs_e, dev_t dev) ); +_PROTOTYPE( int req_fstatfs, (int fs_e, int who_e, char *buf) ); +_PROTOTYPE( int req_statvfs, (int fs_e, int who_e, char *buf) ); +_PROTOTYPE( int req_ftrunc, (endpoint_t fs_e, ino_t inode_nr, + off_t start, off_t end) ); +_PROTOTYPE( int req_getdents, (endpoint_t fs_e, ino_t inode_nr, + u64_t pos, char *buf, size_t size, + u64_t *new_pos, int direct) ); +_PROTOTYPE( int req_inhibread, (endpoint_t fs_e, ino_t inode_nr) ); +_PROTOTYPE( int req_link, (endpoint_t fs_e, ino_t link_parent, + char *lastc, ino_t linked_file) ); +_PROTOTYPE( int req_lookup, (endpoint_t fs_e, ino_t dir_ino, ino_t root_ino, + uid_t uid, gid_t gid, struct lookup *resolve, + lookup_res_t *res, struct fproc *rfp) ); +_PROTOTYPE( int req_mkdir, (endpoint_t fs_e, ino_t inode_nr, + char *lastc, uid_t uid, gid_t gid, mode_t dmode) ); +_PROTOTYPE( int req_mknod, (endpoint_t fs_e, ino_t inode_nr, + char *lastc, uid_t uid, gid_t gid, + mode_t dmode, dev_t dev) ); +_PROTOTYPE( int req_mountpoint, (endpoint_t fs_e, ino_t inode_nr) ); +_PROTOTYPE( int req_newnode, (endpoint_t fs_e, uid_t uid, + gid_t gid, mode_t dmode, + dev_t dev, struct node_details *res) ); +_PROTOTYPE( int req_putnode, (int fs_e, ino_t inode_nr, int count) ); +_PROTOTYPE( int req_rdlink, (endpoint_t fs_e, ino_t inode_nr, + endpoint_t who_e, char *buf, size_t len, + int direct) ); +_PROTOTYPE( int req_readsuper, (endpoint_t fs_e, char *driver_name, + dev_t dev, int readonly, int isroot, + struct node_details *res_nodep) ); +_PROTOTYPE( int req_readwrite, (endpoint_t fs_e, ino_t inode_nr, + u64_t pos, int rw_flag, + endpoint_t user_e, char *user_addr, + unsigned int num_of_bytes, u64_t *new_posp, + unsigned int *cum_iop) ); +_PROTOTYPE( int req_rename, (endpoint_t fs_e, ino_t old_dir, + char *old_name, ino_t new_dir, char *new_name) ); +_PROTOTYPE( int req_rmdir, (endpoint_t fs_e, ino_t inode_nr, + char *lastc) ); +_PROTOTYPE(int req_slink, (endpoint_t fs_e, ino_t inode_nr, char *lastc, + endpoint_t who_e, char *path_addr, + unsigned short path_length, uid_t uid, gid_t gid) ); +_PROTOTYPE( int req_stat, (int fs_e, ino_t inode_nr, int who_e, + char *buf, int pos, int stat_version) ); +_PROTOTYPE( int req_sync, (endpoint_t fs_e) ); +_PROTOTYPE( int req_unlink, (endpoint_t fs_e, ino_t inode_nr, + char *lastc) ); +_PROTOTYPE( int req_unmount, (endpoint_t fs_e) ); +_PROTOTYPE( int req_utime, (endpoint_t fs_e, ino_t inode_nr, + time_t actime, time_t modtime) ); +_PROTOTYPE( int req_newdriver, (endpoint_t fs_e, dev_t dev, + endpoint_t driver_e) ); + +/* stadir.c */ +_PROTOTYPE( int do_chdir, (void) ); +_PROTOTYPE( int do_fchdir, (void) ); +_PROTOTYPE( int do_chroot, (void) ); +_PROTOTYPE( int do_fstat, (void) ); +_PROTOTYPE( int do_stat, (void) ); +_PROTOTYPE( int do_fstatfs, (void) ); +_PROTOTYPE( int do_statvfs, (void) ); +_PROTOTYPE( int do_fstatvfs, (void) ); +_PROTOTYPE( int do_rdlink, (void) ); +_PROTOTYPE( int do_lstat, (void) ); + +/* time.c */ +_PROTOTYPE( int do_utime, (void) ); + +/* tll.c */ +_PROTOTYPE( void tll_downgrade, (tll_t *tllp) ); +_PROTOTYPE( int tll_haspendinglock, (tll_t *tllp) ); +_PROTOTYPE( void tll_init, (tll_t *tllp) ); +_PROTOTYPE( int tll_islocked, (tll_t *tllp) ); +_PROTOTYPE( int tll_lock, (tll_t *tllp, tll_access_t locktype) ); +_PROTOTYPE( int tll_locked_by_me, (tll_t *tllp) ); +_PROTOTYPE( void tll_lockstat, (tll_t *tllp) ); +_PROTOTYPE( int tll_unlock, (tll_t *tllp) ); +_PROTOTYPE( void tll_upgrade, (tll_t *tllp) ); + +/* utility.c */ +_PROTOTYPE( time_t clock_time, (void) ); +_PROTOTYPE( unsigned conv2, (int norm, int w) ); +_PROTOTYPE( long conv4, (int norm, long x) ); +_PROTOTYPE( int fetch_name, (char *path, int len, int flag, char *dest) ); +_PROTOTYPE( int no_sys, (void) ); +_PROTOTYPE( int isokendpt_f, (char *f, int l, endpoint_t e, int *p, int ft)); +_PROTOTYPE( int in_group, (struct fproc *rfp, gid_t grp) ); + +#define okendpt(e, p) isokendpt_f(__FILE__, __LINE__, (e), (p), 1) +#define isokendpt(e, p) isokendpt_f(__FILE__, __LINE__, (e), (p), 0) + +/* vmnt.c */ +_PROTOTYPE( void check_vmnt_locks, (void) ); +_PROTOTYPE( void check_vmnt_locks_by_me, (struct fproc *rfp) ); +_PROTOTYPE( struct vmnt *get_free_vmnt, (void) ); +_PROTOTYPE( struct vmnt *find_vmnt, (endpoint_t fs_e) ); +_PROTOTYPE( struct vmnt *get_locked_vmnt, (struct fproc *rfp) ); +_PROTOTYPE( void init_vmnts, (void) ); +_PROTOTYPE( int lock_vmnt, (struct vmnt *vp, tll_access_t locktype) ); +_PROTOTYPE( void unlock_vmnt, (struct vmnt *vp) ); + +/* vnode.c */ +_PROTOTYPE( void check_vnode_locks, (void) ); +_PROTOTYPE( void check_vnode_locks_by_me, (struct fproc *rfp) ); +_PROTOTYPE( struct vnode *get_free_vnode, (void) ); +_PROTOTYPE( struct vnode *find_vnode, (int fs_e, int numb) ); +_PROTOTYPE( void init_vnodes, (void) ); +_PROTOTYPE( int is_vnode_locked, (struct vnode *vp) ); +_PROTOTYPE( int lock_vnode, (struct vnode *vp, tll_access_t locktype) ); +_PROTOTYPE( void unlock_vnode, (struct vnode *vp) ); +_PROTOTYPE( void dup_vnode, (struct vnode *vp) ); +_PROTOTYPE( void put_vnode, (struct vnode *vp) ); +_PROTOTYPE( void vnode_clean_refs, (struct vnode *vp) ); +#if DO_SANITYCHECKS +_PROTOTYPE( int check_vrefs, (void) ); +#endif + +/* write.c */ +_PROTOTYPE( int do_write, (void) ); + +/* gcov.c */ +_PROTOTYPE( int do_gcov_flush, (void) ); +#if ! USE_COVERAGE +#define do_gcov_flush no_sys +#endif + +/* select.c */ +_PROTOTYPE( int do_select, (void) ); +_PROTOTYPE( void init_select, (void) ); +_PROTOTYPE( void select_callback, (struct filp *, int ops) ); +_PROTOTYPE( void select_forget, (endpoint_t proc_e) ); +_PROTOTYPE( void select_reply1, (endpoint_t driver_e, int minor, int status)); +_PROTOTYPE( void select_reply2, (endpoint_t driver_e, int minor, int status)); +_PROTOTYPE( void select_timeout_check, (timer_t *) ); +_PROTOTYPE( void select_unsuspend_by_endpt, (endpoint_t proc) ); + +/* worker.c */ +_PROTOTYPE( int worker_available, (void) ); +_PROTOTYPE( struct worker_thread *worker_get, (thread_t worker_tid) ); +_PROTOTYPE( struct job *worker_getjob, (thread_t worker_tid) ); +_PROTOTYPE( void worker_init, (struct worker_thread *worker) ); +_PROTOTYPE( struct worker_thread *worker_self, (void) ); +_PROTOTYPE( void worker_start, (void *(*func)(void *arg)) ); +_PROTOTYPE( void worker_signal, (struct worker_thread *worker) ); +_PROTOTYPE( void worker_wait, (void) ); +_PROTOTYPE( void sys_worker_start, (void *(*func)(void *arg)) ); +_PROTOTYPE( void dl_worker_start, (void *(*func)(void *arg)) ); +#endif diff --git a/servers/avfs/read.c b/servers/avfs/read.c new file mode 100644 index 000000000..ffde1f9db --- /dev/null +++ b/servers/avfs/read.c @@ -0,0 +1,326 @@ +/* This file contains the heart of the mechanism used to read (and write) + * files. Read and write requests are split up into chunks that do not cross + * block boundaries. Each chunk is then processed in turn. Reads on special + * files are also detected and handled. + * + * The entry points into this file are + * do_read: perform the READ system call by calling read_write + * do_getdents: read entries from a directory (GETDENTS) + * read_write: actually do the work of READ and WRITE + * + */ + +#include "fs.h" +#include +#include +#include +#include +#include "file.h" +#include "fproc.h" +#include "param.h" +#include +#include +#include +#include "vnode.h" +#include "vmnt.h" + + +/*===========================================================================* + * do_read * + *===========================================================================*/ +PUBLIC int do_read() +{ + return(read_write(READING)); +} + + +/*===========================================================================* + * lock_bsf * + *===========================================================================*/ +PUBLIC void lock_bsf(void) +{ + message org_m_in; + struct fproc *org_fp; + struct worker_thread *org_self; + + if (mutex_trylock(&bsf_lock) == 0) + return; + + org_m_in = m_in; + org_fp = fp; + org_self = self; + assert(mutex_lock(&bsf_lock) == 0); + m_in = org_m_in; + fp = org_fp; + self = org_self; +} + +/*===========================================================================* + * unlock_bsf * + *===========================================================================*/ +PUBLIC void unlock_bsf(void) +{ + assert(mutex_unlock(&bsf_lock) == 0); +} + +/*===========================================================================* + * read_write * + *===========================================================================*/ +PUBLIC int read_write(rw_flag) +int rw_flag; /* READING or WRITING */ +{ +/* Perform read(fd, buffer, nbytes) or write(fd, buffer, nbytes) call. */ + register struct filp *f; + register struct vnode *vp; + u64_t position, res_pos, new_pos; + unsigned int cum_io, cum_io_incr, res_cum_io; + int op, oflags, r, block_spec, char_spec, regular; + tll_access_t locktype; + mode_t mode_word; + + /* If the file descriptor is valid, get the vnode, size and mode. */ + if (m_in.nbytes < 0) return(EINVAL); + locktype = (rw_flag == READING) ? VNODE_READ : VNODE_WRITE; + if ((f = get_filp(m_in.fd, locktype)) == NULL) return(err_code); + if (((f->filp_mode) & (rw_flag == READING ? R_BIT : W_BIT)) == 0) { + unlock_filp(f); + return(f->filp_mode == FILP_CLOSED ? EIO : EBADF); + } + if (m_in.nbytes == 0) { + unlock_filp(f); + return(0); /* so char special files need not check for 0*/ + } + + position = f->filp_pos; + oflags = f->filp_flags; + vp = f->filp_vno; + r = OK; + cum_io = 0; + + if (vp->v_pipe == I_PIPE) { + if (fp->fp_cum_io_partial != 0) { + panic("VFS: read_write: fp_cum_io_partial not clear"); + } + r = rw_pipe(rw_flag, who_e, m_in.fd, f, m_in.buffer, m_in.nbytes); + unlock_filp(f); + return(r); + } + + op = (rw_flag == READING ? VFS_DEV_READ : VFS_DEV_WRITE); + mode_word = vp->v_mode & I_TYPE; + regular = mode_word == I_REGULAR; + + if ((char_spec = (mode_word == I_CHAR_SPECIAL ? 1 : 0))) { + if (vp->v_sdev == NO_DEV) + panic("VFS: read_write tries to access char dev NO_DEV"); + } + + if ((block_spec = (mode_word == I_BLOCK_SPECIAL ? 1 : 0))) { + if (vp->v_sdev == NO_DEV) + panic("VFS: read_write tries to access block dev NO_DEV"); + } + + if (char_spec) { /* Character special files. */ + dev_t dev; + int suspend_reopen; + + suspend_reopen = (f->filp_state != FS_NORMAL); + dev = (dev_t) vp->v_sdev; + + r = dev_io(op, dev, who_e, m_in.buffer, position, m_in.nbytes, oflags, + suspend_reopen); + if (r >= 0) { + cum_io = r; + position = add64ul(position, r); + r = OK; + } + } else if (block_spec) { /* Block special files. */ + lock_bsf(); + + r = req_breadwrite(vp->v_bfs_e, who_e, vp->v_sdev, position, + m_in.nbytes, m_in.buffer, rw_flag, &res_pos, &res_cum_io); + if (r == OK) { + position = res_pos; + cum_io += res_cum_io; + } + + unlock_bsf(); + } else { /* Regular files */ + if (rw_flag == WRITING && block_spec == 0) { + /* Check for O_APPEND flag. */ + if (oflags & O_APPEND) position = cvul64(vp->v_size); + } + + /* Issue request */ + r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, position, rw_flag, who_e, + m_in.buffer, m_in.nbytes, &new_pos, &cum_io_incr); + + if (r >= 0) { + if (ex64hi(new_pos)) + panic("read_write: bad new pos"); + + position = new_pos; + cum_io += cum_io_incr; + } + } + + /* On write, update file size and access time. */ + if (rw_flag == WRITING) { + if (regular || mode_word == I_DIRECTORY) { + if (cmp64ul(position, vp->v_size) > 0) { + if (ex64hi(position) != 0) { + panic("read_write: file size too big "); + } + vp->v_size = ex64lo(position); + } + } + } + + f->filp_pos = position; + unlock_filp(f); + + if (r == OK) return(cum_io); + return(r); +} + + +/*===========================================================================* + * do_getdents * + *===========================================================================*/ +PUBLIC int do_getdents() +{ +/* Perform the getdents(fd, buf, size) system call. */ + int r = OK; + u64_t new_pos; + register struct filp *rfilp; + + /* Is the file descriptor valid? */ + if ( (rfilp = get_filp(m_in.fd, VNODE_READ)) == NULL) return(err_code); + + if (!(rfilp->filp_mode & R_BIT)) + r = EBADF; + else if ((rfilp->filp_vno->v_mode & I_TYPE) != I_DIRECTORY) + r = EBADF; + + if (r == OK) { + if (ex64hi(rfilp->filp_pos) != 0) + panic("do_getdents: can't handle large offsets"); + + r = req_getdents(rfilp->filp_vno->v_fs_e, rfilp->filp_vno->v_inode_nr, + rfilp->filp_pos, m_in.buffer, m_in.nbytes,&new_pos,0); + + if (r > 0) rfilp->filp_pos = new_pos; + } + + unlock_filp(rfilp); + return(r); +} + + +/*===========================================================================* + * rw_pipe * + *===========================================================================*/ +PUBLIC int rw_pipe(rw_flag, usr_e, fd_nr, f, buf, req_size) +int rw_flag; /* READING or WRITING */ +endpoint_t usr_e; +int fd_nr; +struct filp *f; +char *buf; +size_t req_size; +{ + int r, oflags, partial_pipe = 0; + size_t size, cum_io, cum_io_incr; + struct vnode *vp; + u64_t position, new_pos; + + /* Must make sure we're operating on locked filp and vnode */ + assert(tll_islocked(&f->filp_vno->v_lock)); + assert(mutex_trylock(&f->filp_lock) == -EDEADLK); + + oflags = f->filp_flags; + vp = f->filp_vno; + position = cvu64((rw_flag == READING) ? vp->v_pipe_rd_pos : + vp->v_pipe_wr_pos); + /* fp->fp_cum_io_partial is only nonzero when doing partial writes */ + cum_io = fp->fp_cum_io_partial; + + r = pipe_check(vp, rw_flag, oflags, req_size, position, 0); + if (r <= 0) { + if (r == SUSPEND) pipe_suspend(rw_flag, fd_nr, buf, req_size); + return(r); + } + + size = r; + if (size < req_size) partial_pipe = 1; + + /* Truncate read request at size. */ + if((rw_flag == READING) && + cmp64ul(add64ul(position, size), vp->v_size) > 0) { + /* Position always should fit in an off_t (LONG_MAX). */ + off_t pos32; + + assert(cmp64ul(position, LONG_MAX) <= 0); + pos32 = cv64ul(position); + assert(pos32 >= 0); + assert(pos32 <= LONG_MAX); + size = vp->v_size - pos32; + } + + if (vp->v_mapfs_e == 0) + panic("unmapped pipe"); + + r = req_readwrite(vp->v_mapfs_e, vp->v_mapinode_nr, position, rw_flag, usr_e, + buf, size, &new_pos, &cum_io_incr); + + if (r >= 0) { + if (ex64hi(new_pos)) + panic("rw_pipe: bad new pos"); + + position = new_pos; + cum_io += cum_io_incr; + buf += cum_io_incr; + req_size -= cum_io_incr; + } + + /* On write, update file size and access time. */ + if (rw_flag == WRITING) { + if (cmp64ul(position, vp->v_size) > 0) { + if (ex64hi(position) != 0) { + panic("read_write: file size too big for v_size"); + } + vp->v_size = ex64lo(position); + } + } else { + if (cmp64ul(position, vp->v_size) >= 0) { + /* Reset pipe pointers */ + vp->v_size = 0; + vp->v_pipe_rd_pos= 0; + vp->v_pipe_wr_pos= 0; + position = cvu64(0); + } + } + + if (rw_flag == READING) + vp->v_pipe_rd_pos= cv64ul(position); + else + vp->v_pipe_wr_pos= cv64ul(position); + + if (r == OK) { + if (partial_pipe) { + /* partial write on pipe with */ + /* O_NONBLOCK, return write count */ + if (!(oflags & O_NONBLOCK)) { + /* partial write on pipe with req_size > PIPE_SIZE, + * non-atomic + */ + fp->fp_cum_io_partial = cum_io; + pipe_suspend(rw_flag, fd_nr, buf, req_size); + return(SUSPEND); + } + } + fp->fp_cum_io_partial = 0; + return(cum_io); + } + + return(r); +} diff --git a/servers/avfs/request.c b/servers/avfs/request.c new file mode 100644 index 000000000..6f31777a0 --- /dev/null +++ b/servers/avfs/request.c @@ -0,0 +1,1093 @@ +/* This file contains the wrapper functions for issueing a request + * and receiving response from FS processes. + * Each function builds a request message according to the request + * parameter, calls the most low-level fs_sendrec and copies + * back the response. + * The low-level fs_sendrec handles the recovery mechanism from + * a dead driver and reissues the request. + */ + +#include "fs.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "fproc.h" +#include "vmnt.h" +#include "vnode.h" +#include "path.h" +#include "param.h" + + +/*===========================================================================* + * req_breadwrite * + *===========================================================================*/ +PUBLIC int req_breadwrite( + endpoint_t fs_e, + endpoint_t user_e, + dev_t dev, + u64_t pos, + unsigned int num_of_bytes, + char *user_addr, + int rw_flag, + u64_t *new_posp, + unsigned int *cum_iop +) +{ + int r; + cp_grant_id_t grant_id; + message m; + + grant_id = cpf_grant_magic(fs_e, user_e, (vir_bytes) user_addr, num_of_bytes, + (rw_flag == READING ? CPF_WRITE : CPF_READ)); + if(grant_id == -1) + panic("req_breadwrite: cpf_grant_magic failed"); + + /* Fill in request message */ + m.m_type = rw_flag == READING ? REQ_BREAD : REQ_BWRITE; + m.REQ_DEV2 = dev; + m.REQ_GRANT = grant_id; + m.REQ_SEEK_POS_LO = ex64lo(pos); + m.REQ_SEEK_POS_HI = ex64hi(pos); + m.REQ_NBYTES = num_of_bytes; + + /* Send/rec request */ + r = fs_sendrec(fs_e, &m); + cpf_revoke(grant_id); + if (r != OK) return(r); + + /* Fill in response structure */ + *new_posp = make64(m.RES_SEEK_POS_LO, m.RES_SEEK_POS_HI); + *cum_iop = m.RES_NBYTES; + + return(OK); +} + + +/*===========================================================================* + * req_chmod * + *===========================================================================*/ +PUBLIC int req_chmod( + int fs_e, + ino_t inode_nr, + mode_t rmode, + mode_t *new_modep +) +{ + message m; + int r; + + /* Fill in request message */ + m.m_type = REQ_CHMOD; + m.REQ_INODE_NR = inode_nr; + m.REQ_MODE = rmode; + + /* Send/rec request */ + r = fs_sendrec(fs_e, &m); + + /* Copy back actual mode. */ + *new_modep = m.RES_MODE; + + return(r); +} + + +/*===========================================================================* + * req_chown * + *===========================================================================*/ +PUBLIC int req_chown( + endpoint_t fs_e, + ino_t inode_nr, + uid_t newuid, + gid_t newgid, + mode_t *new_modep +) +{ + message m; + int r; + + /* Fill in request message */ + m.m_type = REQ_CHOWN; + m.REQ_INODE_NR = inode_nr; + m.REQ_UID = newuid; + m.REQ_GID = newgid; + + /* Send/rec request */ + r = fs_sendrec(fs_e, &m); + + /* Return new mode to caller. */ + *new_modep = m.RES_MODE; + + return(r); +} + + +/*===========================================================================* + * req_create * + *===========================================================================*/ +PUBLIC int req_create( + int fs_e, + ino_t inode_nr, + int omode, + uid_t uid, + gid_t gid, + char *path, + node_details_t *res +) +{ + int r; + cp_grant_id_t grant_id; + size_t len; + message m; + + if (path[0] == '/') + panic("req_create: filename starts with '/'"); + + len = strlen(path) + 1; + grant_id = cpf_grant_direct(fs_e, (vir_bytes) path, len, CPF_READ); + if (grant_id == -1) + panic("req_create: cpf_grant_direct failed"); + + /* Fill in request message */ + m.m_type = REQ_CREATE; + m.REQ_INODE_NR = inode_nr; + m.REQ_MODE = omode; + m.REQ_UID = uid; + m.REQ_GID = gid; + m.REQ_GRANT = grant_id; + m.REQ_PATH_LEN = len; + + /* Send/rec request */ + r = fs_sendrec(fs_e, &m); + cpf_revoke(grant_id); + if (r != OK) return(r); + + /* Fill in response structure */ + res->fs_e = m.m_source; + res->inode_nr = m.RES_INODE_NR; + res->fmode = m.RES_MODE; + res->fsize = m.RES_FILE_SIZE_LO; + res->uid = m.RES_UID; + res->gid = m.RES_GID; + res->dev = m.RES_DEV; + + return(OK); +} + + +/*===========================================================================* + * req_flush * + *===========================================================================*/ +PUBLIC int req_flush(endpoint_t fs_e, dev_t dev) +{ + message m; + + /* Fill in request message */ + m.m_type = REQ_FLUSH; + m.REQ_DEV = dev; + + /* Send/rec request */ + return fs_sendrec(fs_e, &m); +} + + +/*===========================================================================* + * req_fstatfs * + *===========================================================================*/ +PUBLIC int req_fstatfs(int fs_e, int proc_e, char *buf) +{ + int r; + cp_grant_id_t grant_id; + message m; + + grant_id = cpf_grant_magic(fs_e, proc_e, (vir_bytes) buf, sizeof(struct statfs), + CPF_WRITE); + if(grant_id == -1) + panic("req_fstatfs: cpf_grant_magic failed"); + + /* Fill in request message */ + m.m_type = REQ_FSTATFS; + m.REQ_GRANT = grant_id; + + /* Send/rec request */ + r = fs_sendrec(fs_e, &m); + cpf_revoke(grant_id); + + return(r); +} + + +/*===========================================================================* + * req_statvfs * + *===========================================================================*/ +PUBLIC int req_statvfs(int fs_e, int proc_e, char *buf) +{ + int r; + cp_grant_id_t grant_id; + message m; + + grant_id = cpf_grant_magic(fs_e, proc_e, (vir_bytes) buf, sizeof(struct statvfs), + CPF_WRITE); + if(grant_id == -1) + panic("req_statvfs: cpf_grant_magic failed"); + + /* Fill in request message */ + m.m_type = REQ_STATVFS; + m.REQ_GRANT = grant_id; + + /* Send/rec request */ + r = fs_sendrec(fs_e, &m); + cpf_revoke(grant_id); + + return(r); +} + + +/*===========================================================================* + * req_ftrunc * + *===========================================================================*/ +PUBLIC int req_ftrunc(endpoint_t fs_e, ino_t inode_nr, off_t start, off_t end) +{ + message m; + + /* Fill in request message */ + m.m_type = REQ_FTRUNC; + m.REQ_INODE_NR = inode_nr; + m.REQ_TRC_START_LO = start; + m.REQ_TRC_START_HI = 0; /* Not used for now, so clear it. */ + m.REQ_TRC_END_LO = end; + m.REQ_TRC_END_HI = 0; /* Not used for now, so clear it. */ + + /* Send/rec request */ + return fs_sendrec(fs_e, &m); +} + + +/*===========================================================================* + * req_getdents * + *===========================================================================*/ +PUBLIC int req_getdents( + endpoint_t fs_e, + ino_t inode_nr, + u64_t pos, + char *buf, + size_t size, + u64_t *new_pos, + int direct +) +{ + int r; + message m; + cp_grant_id_t grant_id; + + if (direct) { + grant_id = cpf_grant_direct(fs_e, (vir_bytes) buf, size, + CPF_WRITE); + } else { + grant_id = cpf_grant_magic(fs_e, who_e, (vir_bytes) buf, size, + CPF_WRITE); + } + + if (grant_id < 0) + panic("req_getdents: cpf_grant_direct/cpf_grant_magic failed: %d", + grant_id); + + m.m_type = REQ_GETDENTS; + m.REQ_INODE_NR = inode_nr; + m.REQ_GRANT = grant_id; + m.REQ_MEM_SIZE = size; + m.REQ_SEEK_POS_LO = ex64lo(pos); + m.REQ_SEEK_POS_HI = 0; /* Not used for now, so clear it. */ + + r = fs_sendrec(fs_e, &m); + cpf_revoke(grant_id); + + if (r == OK) { + *new_pos = cvul64(m.RES_SEEK_POS_LO); + r = m.RES_NBYTES; + } + + return(r); +} + +/*===========================================================================* + * req_inhibread * + *===========================================================================*/ +PUBLIC int req_inhibread(endpoint_t fs_e, ino_t inode_nr) +{ + message m; + + /* Fill in request message */ + m.m_type = REQ_INHIBREAD; + m.REQ_INODE_NR = inode_nr; + + /* Send/rec request */ + return fs_sendrec(fs_e, &m); +} + + +/*===========================================================================* + * req_link * + *===========================================================================*/ +PUBLIC int req_link( + endpoint_t fs_e, + ino_t link_parent, + char *lastc, + ino_t linked_file +) +{ + int r; + cp_grant_id_t grant_id; + const size_t len = strlen(lastc) + 1; + message m; + + grant_id = cpf_grant_direct(fs_e, (vir_bytes)lastc, len, CPF_READ); + if(grant_id == -1) + panic("req_link: cpf_grant_direct failed"); + + /* Fill in request message */ + m.m_type = REQ_LINK; + m.REQ_INODE_NR = linked_file; + m.REQ_DIR_INO = link_parent; + m.REQ_GRANT = grant_id; + m.REQ_PATH_LEN = len; + + /* Send/rec request */ + r = fs_sendrec(fs_e, &m); + cpf_revoke(grant_id); + + return(r); +} + + +/*===========================================================================* + * req_lookup * + *===========================================================================*/ +PUBLIC int req_lookup( + endpoint_t fs_e, + ino_t dir_ino, + ino_t root_ino, + uid_t uid, + gid_t gid, + struct lookup *resolve, + lookup_res_t *res, + struct fproc *rfp +) +{ + int r; + size_t len; + cp_grant_id_t grant_id=0, grant_id2=0; + message m; + vfs_ucred_t credentials; + int flags; + + grant_id = cpf_grant_direct(fs_e, (vir_bytes) resolve->l_path, PATH_MAX+1, + CPF_READ | CPF_WRITE); + if(grant_id == -1) + panic("req_lookup: cpf_grant_direct failed"); + + flags = resolve->l_flags; + len = strlen(resolve->l_path) + 1; + + m.m_type = REQ_LOOKUP; + m.REQ_GRANT = grant_id; + m.REQ_PATH_LEN = len; + m.REQ_PATH_SIZE = PATH_MAX + 1; + m.REQ_DIR_INO = dir_ino; + m.REQ_ROOT_INO = root_ino; + + if(rfp->fp_ngroups > 0) { /* Is the process member of multiple groups? */ + /* In that case the FS has to copy the uid/gid credentials */ + int i; + + /* Set credentials */ + credentials.vu_uid = rfp->fp_effuid; + credentials.vu_gid = rfp->fp_effgid; + credentials.vu_ngroups = rfp->fp_ngroups; + for (i = 0; i < rfp->fp_ngroups; i++) + credentials.vu_sgroups[i] = rfp->fp_sgroups[i]; + + grant_id2 = cpf_grant_direct(fs_e, (vir_bytes) &credentials, + sizeof(credentials), CPF_READ); + if(grant_id2 == -1) + panic("req_lookup: cpf_grant_direct failed"); + + m.REQ_GRANT2 = grant_id2; + m.REQ_UCRED_SIZE= sizeof(credentials); + flags |= PATH_GET_UCRED; + } else { + /* When there's only one gid, we can send it directly */ + m.REQ_UID = uid; + m.REQ_GID = gid; + flags &= ~PATH_GET_UCRED; + } + + m.REQ_FLAGS = flags; + + /* Send/rec request */ + r = fs_sendrec(fs_e, &m); + cpf_revoke(grant_id); + if(rfp->fp_ngroups > 0) cpf_revoke(grant_id2); + + /* Fill in response according to the return value */ + res->fs_e = m.m_source; + + switch (r) { + case OK: + res->inode_nr = m.RES_INODE_NR; + res->fmode = m.RES_MODE; + res->fsize = m.RES_FILE_SIZE_LO; + res->dev = m.RES_DEV; + res->uid= m.RES_UID; + res->gid= m.RES_GID; + break; + case EENTERMOUNT: + res->inode_nr = m.RES_INODE_NR; + res->char_processed = m.RES_OFFSET; + res->symloop = m.RES_SYMLOOP; + break; + case ELEAVEMOUNT: + res->char_processed = m.RES_OFFSET; + res->symloop = m.RES_SYMLOOP; + break; + case ESYMLINK: + res->char_processed = m.RES_OFFSET; + res->symloop = m.RES_SYMLOOP; + break; + default: + break; + } + + return(r); +} + + +/*===========================================================================* + * req_mkdir * + *===========================================================================*/ +PUBLIC int req_mkdir( + endpoint_t fs_e, + ino_t inode_nr, + char *lastc, + uid_t uid, + gid_t gid, + mode_t dmode +) +{ + int r; + cp_grant_id_t grant_id; + size_t len; + message m; + + len = strlen(lastc) + 1; + grant_id = cpf_grant_direct(fs_e, (vir_bytes)lastc, len, CPF_READ); + if(grant_id == -1) + panic("req_mkdir: cpf_grant_direct failed"); + + /* Fill in request message */ + m.m_type = REQ_MKDIR; + m.REQ_INODE_NR = inode_nr; + m.REQ_MODE = dmode; + m.REQ_UID = uid; + m.REQ_GID = gid; + m.REQ_GRANT = grant_id; + m.REQ_PATH_LEN = len; + + /* Send/rec request */ + r = fs_sendrec(fs_e, &m); + cpf_revoke(grant_id); + + return(r); +} + + +/*===========================================================================* + * req_mknod * + *===========================================================================*/ +PUBLIC int req_mknod( + endpoint_t fs_e, + ino_t inode_nr, + char *lastc, + uid_t uid, + gid_t gid, + mode_t dmode, + dev_t dev +) +{ + int r; + size_t len; + cp_grant_id_t grant_id; + message m; + + len = strlen(lastc) + 1; + grant_id = cpf_grant_direct(fs_e, (vir_bytes)lastc, len, CPF_READ); + if(grant_id == -1) + panic("req_mknod: cpf_grant_direct failed"); + + /* Fill in request message */ + m.m_type = REQ_MKNOD; + m.REQ_INODE_NR = inode_nr; + m.REQ_MODE = dmode; + m.REQ_DEV = dev; + m.REQ_UID = uid; + m.REQ_GID = gid; + m.REQ_GRANT = grant_id; + m.REQ_PATH_LEN = len; + + /* Send/rec request */ + r = fs_sendrec(fs_e, &m); + cpf_revoke(grant_id); + + return(r); +} + + +/*===========================================================================* + * req_mountpoint * + *===========================================================================*/ +PUBLIC int req_mountpoint(endpoint_t fs_e, ino_t inode_nr) +{ + message m; + + /* Fill in request message */ + m.m_type = REQ_MOUNTPOINT; + m.REQ_INODE_NR = inode_nr; + + /* Send/rec request */ + return fs_sendrec(fs_e, &m); +} + + +/*===========================================================================* + * req_newnode * + *===========================================================================*/ +PUBLIC int req_newnode( + endpoint_t fs_e, + uid_t uid, + gid_t gid, + mode_t dmode, + dev_t dev, + struct node_details *res +) +{ + int r; + message m; + + /* Fill in request message */ + m.m_type = REQ_NEWNODE; + m.REQ_MODE = dmode; + m.REQ_DEV = dev; + m.REQ_UID = uid; + m.REQ_GID = gid; + + /* Send/rec request */ + r = fs_sendrec(fs_e, &m); + + res->fs_e = m.m_source; + res->inode_nr = m.RES_INODE_NR; + res->fmode = m.RES_MODE; + res->fsize = m.RES_FILE_SIZE_LO; + res->dev = m.RES_DEV; + res->uid = m.RES_UID; + res->gid = m.RES_GID; + + return(r); +} + + +/*===========================================================================* + * req_newdriver * + *===========================================================================*/ +PUBLIC int req_newdriver( + endpoint_t fs_e, + dev_t dev, + endpoint_t driver_e +) +{ +/* Note: this is the only request function that doesn't use the + * fs_sendrec internal routine, since we want to avoid the dead + * driver recovery mechanism here. This function is actually called + * during the recovery. + */ + message m; + int r; + + /* Fill in request message */ + m.m_type = REQ_NEW_DRIVER; + m.REQ_DEV = dev; + m.REQ_DRIVER_E = driver_e; + + /* Issue request */ + if((r = sendrec(fs_e, &m)) != OK) { + printf("%s:%d VFS req_newdriver: error sending message %d to %d\n", + __FILE__, __LINE__, r, fs_e); + util_stacktrace(); + return(r); + } + + return(OK); +} + + + +/*===========================================================================* + * req_putnode * + *===========================================================================*/ +PUBLIC int req_putnode(fs_e, inode_nr, count) +int fs_e; +ino_t inode_nr; +int count; +{ + message m; + + /* Fill in request message */ + m.m_type = REQ_PUTNODE; + m.REQ_INODE_NR = inode_nr; + m.REQ_COUNT = count; + + /* Send/rec request */ + return fs_sendrec(fs_e, &m); +} + + +/*===========================================================================* + * req_rdlink * + *===========================================================================*/ +PUBLIC int req_rdlink(fs_e, inode_nr, proc_e, buf, len, direct) +endpoint_t fs_e; +ino_t inode_nr; +endpoint_t proc_e; +char *buf; +size_t len; +int direct; /* set to 1 to use direct grants instead of magic grants */ +{ + message m; + int r; + cp_grant_id_t grant_id; + + if (direct) { + grant_id = cpf_grant_direct(fs_e, (vir_bytes) buf, len, CPF_WRITE); + } else { + grant_id = cpf_grant_magic(fs_e, proc_e, (vir_bytes) buf, len, + CPF_WRITE); + } + if(grant_id == -1) + panic("req_rdlink: cpf_grant_magic failed"); + + /* Fill in request message */ + m.m_type = REQ_RDLINK; + m.REQ_INODE_NR = inode_nr; + m.REQ_GRANT = grant_id; + m.REQ_MEM_SIZE = len; + + /* Send/rec request */ + r = fs_sendrec(fs_e, &m); + cpf_revoke(grant_id); + + if(r == OK) r = m.RES_NBYTES; + + return(r); +} + + +/*===========================================================================* + * req_readsuper * + *===========================================================================*/ +PUBLIC int req_readsuper( + endpoint_t fs_e, + char *label, + dev_t dev, + int readonly, + int isroot, + struct node_details *res_nodep +) +{ + int r; + cp_grant_id_t grant_id; + size_t len; + message m; + + len = strlen(label)+1; + grant_id = cpf_grant_direct(fs_e, (vir_bytes) label, len, CPF_READ); + if (grant_id == -1) + panic("req_readsuper: cpf_grant_direct failed"); + + /* Fill in request message */ + m.m_type = REQ_READSUPER; + m.REQ_FLAGS = 0; + if(readonly) m.REQ_FLAGS |= REQ_RDONLY; + if(isroot) m.REQ_FLAGS |= REQ_ISROOT; + m.REQ_GRANT = grant_id; + m.REQ_DEV = dev; + m.REQ_PATH_LEN = len; + + /* Send/rec request */ + r = fs_sendrec(fs_e, &m); + cpf_revoke(grant_id); + + if(r == OK) { + /* Fill in response structure */ + res_nodep->fs_e = m.m_source; + res_nodep->inode_nr = m.RES_INODE_NR; + res_nodep->fmode = m.RES_MODE; + res_nodep->fsize = m.RES_FILE_SIZE_LO; + res_nodep->uid = m.RES_UID; + res_nodep->gid = m.RES_GID; + } + + return(r); +} + + +/*===========================================================================* + * req_readwrite * + *===========================================================================*/ +PUBLIC int req_readwrite(fs_e, inode_nr, pos, rw_flag, user_e, + user_addr, num_of_bytes, new_posp, cum_iop) +endpoint_t fs_e; +ino_t inode_nr; +u64_t pos; +int rw_flag; +endpoint_t user_e; +char *user_addr; +unsigned int num_of_bytes; +u64_t *new_posp; +unsigned int *cum_iop; +{ + int r; + cp_grant_id_t grant_id; + message m; + + if (ex64hi(pos) != 0) + panic("req_readwrite: pos too large"); + + grant_id = cpf_grant_magic(fs_e, user_e, (vir_bytes) user_addr, num_of_bytes, + (rw_flag==READING ? CPF_WRITE:CPF_READ)); + if (grant_id == -1) + panic("req_readwrite: cpf_grant_magic failed"); + + /* Fill in request message */ + m.m_type = rw_flag == READING ? REQ_READ : REQ_WRITE; + m.REQ_INODE_NR = inode_nr; + m.REQ_GRANT = grant_id; + m.REQ_SEEK_POS_LO = ex64lo(pos); + m.REQ_SEEK_POS_HI = 0; /* Not used for now, so clear it. */ + m.REQ_NBYTES = num_of_bytes; + + /* Send/rec request */ + r = fs_sendrec(fs_e, &m); + cpf_revoke(grant_id); + + if (r == OK) { + /* Fill in response structure */ + *new_posp = cvul64(m.RES_SEEK_POS_LO); + *cum_iop = m.RES_NBYTES; + } + + return(r); +} + + +/*===========================================================================* + * req_rename * + *===========================================================================*/ +PUBLIC int req_rename(fs_e, old_dir, old_name, new_dir, new_name) +endpoint_t fs_e; +ino_t old_dir; +char *old_name; +ino_t new_dir; +char *new_name; +{ + int r; + cp_grant_id_t gid_old, gid_new; + size_t len_old, len_new; + message m; + + len_old = strlen(old_name) + 1; + gid_old = cpf_grant_direct(fs_e, (vir_bytes) old_name, len_old, CPF_READ); + if(gid_old == -1) + panic("req_rename: cpf_grant_direct failed"); + + len_new = strlen(new_name) + 1; + gid_new = cpf_grant_direct(fs_e, (vir_bytes) new_name, len_new, CPF_READ); + if(gid_new == -1) + panic("req_rename: cpf_grant_direct failed"); + + /* Fill in request message */ + m.m_type = REQ_RENAME; + m.REQ_REN_OLD_DIR = old_dir; + m.REQ_REN_NEW_DIR = new_dir; + m.REQ_REN_GRANT_OLD = gid_old; + m.REQ_REN_LEN_OLD = len_old; + m.REQ_REN_GRANT_NEW = gid_new; + m.REQ_REN_LEN_NEW = len_new; + + /* Send/rec request */ + r = fs_sendrec(fs_e, &m); + cpf_revoke(gid_old); + cpf_revoke(gid_new); + + return(r); +} + + +/*===========================================================================* + * req_rmdir * + *===========================================================================*/ +PUBLIC int req_rmdir(fs_e, inode_nr, lastc) +endpoint_t fs_e; +ino_t inode_nr; +char *lastc; +{ + int r; + cp_grant_id_t grant_id; + size_t len; + message m; + + len = strlen(lastc) + 1; + grant_id = cpf_grant_direct(fs_e, (vir_bytes) lastc, len, CPF_READ); + if(grant_id == -1) + panic("req_rmdir: cpf_grant_direct failed"); + + /* Fill in request message */ + m.m_type = REQ_RMDIR; + m.REQ_INODE_NR = inode_nr; + m.REQ_GRANT = grant_id; + m.REQ_PATH_LEN = len; + + /* Send/rec request */ + r = fs_sendrec(fs_e, &m); + cpf_revoke(grant_id); + + return(r); +} + + +/*===========================================================================* + * req_slink * + *===========================================================================*/ +PUBLIC int req_slink( + endpoint_t fs_e, + ino_t inode_nr, + char *lastc, + endpoint_t proc_e, + char *path_addr, + unsigned short path_length, + uid_t uid, + gid_t gid +) +{ + int r; + size_t len; + cp_grant_id_t gid_name, gid_buf; + message m; + + len = strlen(lastc) + 1; + gid_name = cpf_grant_direct(fs_e, (vir_bytes) lastc, len, CPF_READ); + if(gid_name == -1) + panic("req_slink: cpf_grant_direct failed"); + + gid_buf = cpf_grant_magic(fs_e, proc_e, (vir_bytes) path_addr, path_length, + CPF_READ); + if(gid_buf == -1) { + cpf_revoke(gid_name); + panic("req_slink: cpf_grant_magic failed"); + } + + /* Fill in request message */ + m.m_type = REQ_SLINK; + m.REQ_INODE_NR = inode_nr; + m.REQ_UID = uid; + m.REQ_GID = gid; + m.REQ_GRANT = gid_name; + m.REQ_PATH_LEN = len; + m.REQ_GRANT3 = gid_buf; + m.REQ_MEM_SIZE = path_length; + + /* Send/rec request */ + r = fs_sendrec(fs_e, &m); + cpf_revoke(gid_name); + cpf_revoke(gid_buf); + + return(r); +} + + +/*===========================================================================* + * req_stat * + *===========================================================================*/ +PUBLIC int req_stat(fs_e, inode_nr, proc_e, buf, pos, stat_version) +int fs_e; +ino_t inode_nr; +int proc_e; +char *buf; +int pos; +int stat_version; +{ + cp_grant_id_t grant_id; + int r; + message m; + struct stat sb; + struct minix_prev_stat old_sb; /* for backward compatibility */ + + if (pos != 0 || stat_version != 0) + grant_id = cpf_grant_direct(fs_e, (vir_bytes) &sb, + sizeof(struct stat), CPF_WRITE); + else + grant_id = cpf_grant_magic(fs_e, proc_e, (vir_bytes) buf, + sizeof(struct stat), CPF_WRITE); + + if (grant_id < 0) + panic("req_stat: cpf_grant_* failed"); + + /* Fill in request message */ + m.m_type = REQ_STAT; + m.REQ_INODE_NR = inode_nr; + m.REQ_GRANT = grant_id; + + /* Send/rec request */ + r = fs_sendrec(fs_e, &m); + cpf_revoke(grant_id); + + if (r != OK || (pos == 0 && stat_version == 0)) + return(r); + + if (pos != 0) + sb.st_size -= pos; + if (stat_version == 0) { + r = sys_vircopy(SELF, D, (vir_bytes) &sb, proc_e, D, (vir_bytes) buf, + sizeof(struct stat)); + return(r); + } + + /* User needs old struct stat. + * Just 1 prev version at this moment */ + assert(stat_version == 1); + +/* XXX until that st_Xtime macroses used, we have to undefine them, + * because of minix_prev_stat + */ +#undef st_atime +#undef st_ctime +#undef st_mtime + +/* Copy field by field because of st_gid type mismath and + * difference in order after atime. + */ + old_sb.st_dev = sb.st_dev; + old_sb.st_ino = sb.st_ino; + old_sb.st_mode = sb.st_mode; + old_sb.st_nlink = sb.st_nlink; + old_sb.st_uid = sb.st_uid; + old_sb.st_gid = sb.st_gid; + old_sb.st_rdev = sb.st_rdev; + old_sb.st_size = sb.st_size; +#if defined(_NETBSD_SOURCE) + old_sb.st_atime = sb.st_atimespec.tv_sec; + old_sb.st_mtime = sb.st_mtimespec.tv_sec; + old_sb.st_ctime = sb.st_ctimespec.tv_sec; +#else + old_sb.st_atime = sb.st_atime; + old_sb.st_mtime = sb.st_mtime; + old_sb.st_ctime = sb.st_ctime; +#endif + + r = sys_vircopy(SELF, D, (vir_bytes) &old_sb, proc_e, D, (vir_bytes) buf, + sizeof(struct minix_prev_stat)); + + return(r); +} + + +/*===========================================================================* + * req_sync * + *===========================================================================*/ +PUBLIC int req_sync(fs_e) +endpoint_t fs_e; +{ + message m; + + /* Fill in request message */ + m.m_type = REQ_SYNC; + + /* Send/rec request */ + return fs_sendrec(fs_e, &m); +} + + +/*===========================================================================* + * req_unlink * + *===========================================================================*/ +PUBLIC int req_unlink(fs_e, inode_nr, lastc) +endpoint_t fs_e; +ino_t inode_nr; +char *lastc; +{ + cp_grant_id_t grant_id; + size_t len; + int r; + message m; + + len = strlen(lastc) + 1; + grant_id = cpf_grant_direct(fs_e, (vir_bytes) lastc, len, CPF_READ); + if(grant_id == -1) + panic("req_unlink: cpf_grant_direct failed"); + + /* Fill in request message */ + m.m_type = REQ_UNLINK; + m.REQ_INODE_NR = inode_nr; + m.REQ_GRANT = grant_id; + m.REQ_PATH_LEN = len; + + /* Send/rec request */ + r = fs_sendrec(fs_e, &m); + cpf_revoke(grant_id); + + return(r); +} + + +/*===========================================================================* + * req_unmount * + *===========================================================================*/ +PUBLIC int req_unmount(fs_e) +endpoint_t fs_e; +{ + message m; + + /* Fill in request message */ + m.m_type = REQ_UNMOUNT; + + /* Send/rec request */ + return fs_sendrec(fs_e, &m); +} + + +/*===========================================================================* + * req_utime * + *===========================================================================*/ +PUBLIC int req_utime(fs_e, inode_nr, actime, modtime) +endpoint_t fs_e; +ino_t inode_nr; +time_t actime; +time_t modtime; +{ + message m; + + /* Fill in request message */ + m.m_type = REQ_UTIME; + m.REQ_INODE_NR = inode_nr; + m.REQ_ACTIME = actime; + m.REQ_MODTIME = modtime; + + /* Send/rec request */ + return fs_sendrec(fs_e, &m); +} diff --git a/servers/avfs/request.h b/servers/avfs/request.h new file mode 100644 index 000000000..74f612bf9 --- /dev/null +++ b/servers/avfs/request.h @@ -0,0 +1,41 @@ +#ifndef __VFS_REQUEST_H__ +#define __VFS_REQUEST_H__ + +/* Low level request messages are built and sent by wrapper functions. + * This file contains the request and response structures for accessing + * those wrappers functions. + */ + +#include + +/* Structure for response that contains inode details */ +typedef struct node_details { + endpoint_t fs_e; + ino_t inode_nr; + mode_t fmode; + off_t fsize; + uid_t uid; + gid_t gid; + + /* For char/block special files */ + dev_t dev; +} node_details_t; + +/* Structure for a lookup response */ +typedef struct lookup_res { + endpoint_t fs_e; + ino_t inode_nr; + mode_t fmode; + off_t fsize; + uid_t uid; + gid_t gid; + /* For char/block special files */ + dev_t dev; + + /* Fields used for handling mount point and symbolic links */ + int char_processed; + unsigned char symloop; +} lookup_res_t; + + +#endif diff --git a/servers/avfs/select.c b/servers/avfs/select.c new file mode 100644 index 000000000..76780ac5d --- /dev/null +++ b/servers/avfs/select.c @@ -0,0 +1,1058 @@ +/* Implement entry point to select system call. + * + * The entry points into this file are + * do_select: perform the SELECT system call + * select_callback: notify select system of possible fd operation + * select_unsuspend_by_endpt: cancel a blocking select on exiting driver + */ + +#include "fs.h" +#include +#include +#include +#include +#include +#include + +#include "select.h" +#include "file.h" +#include "fproc.h" +#include "dmap.h" +#include "vnode.h" + +/* max. number of simultaneously pending select() calls */ +#define MAXSELECTS 25 +#define FROM_PROC 0 +#define TO_PROC 1 + +PRIVATE struct selectentry { + struct fproc *requestor; /* slot is free iff this is NULL */ + endpoint_t req_endpt; + fd_set readfds, writefds, errorfds; + fd_set ready_readfds, ready_writefds, ready_errorfds; + fd_set *vir_readfds, *vir_writefds, *vir_errorfds; + struct filp *filps[OPEN_MAX]; + int type[OPEN_MAX]; + int nfds, nreadyfds; + int error; + char block; + clock_t expiry; + timer_t timer; /* if expiry > 0 */ +} selecttab[MAXSELECTS]; + +FORWARD _PROTOTYPE(int copy_fdsets, (struct selectentry *se, int nfds, + int direction) ); +FORWARD _PROTOTYPE(int do_select_request, (struct selectentry *se, int fd, + int *ops) ); +FORWARD _PROTOTYPE(void filp_status, (struct filp *fp, int status) ); +FORWARD _PROTOTYPE(int is_deferred, (struct selectentry *se) ); +FORWARD _PROTOTYPE(void restart_proc, (struct selectentry *se) ); +FORWARD _PROTOTYPE(void ops2tab, (int ops, int fd, struct selectentry *e)); +FORWARD _PROTOTYPE(int is_regular_file, (struct filp *f) ); +FORWARD _PROTOTYPE(int is_pipe, (struct filp *f) ); +FORWARD _PROTOTYPE(int is_supported_major, (struct filp *f) ); +FORWARD _PROTOTYPE(void select_lock_filp, (struct filp *f, int ops) ); +FORWARD _PROTOTYPE(int select_request_async, (struct filp *f, int *ops, + int block) ); +FORWARD _PROTOTYPE(int select_request_file, (struct filp *f, int *ops, + int block) ); +FORWARD _PROTOTYPE(int select_request_major, (struct filp *f, int *ops, + int block) ); +FORWARD _PROTOTYPE(int select_request_pipe, (struct filp *f, int *ops, + int block) ); +FORWARD _PROTOTYPE(int select_request_sync, (struct filp *f, int *ops, + int block) ); +FORWARD _PROTOTYPE(void select_cancel_all, (struct selectentry *e) ); +FORWARD _PROTOTYPE(void select_cancel_filp, (struct filp *f) ); +FORWARD _PROTOTYPE(void select_return, (struct selectentry *) ); +FORWARD _PROTOTYPE(void select_restart_filps, (void) ); +FORWARD _PROTOTYPE(int tab2ops, (int fd, struct selectentry *e) ); +FORWARD _PROTOTYPE(void wipe_select, (struct selectentry *s) ); + +PRIVATE struct fdtype { + int (*select_request)(struct filp *, int *ops, int block); + int (*type_match)(struct filp *f); +} fdtypes[] = { + { select_request_major, is_supported_major }, + { select_request_file, is_regular_file }, + { select_request_pipe, is_pipe }, +}; +#define SEL_FDS (sizeof(fdtypes) / sizeof(fdtypes[0])) +PRIVATE int select_majors[] = { /* List of majors that support selecting on */ + TTY_MAJOR, + INET_MAJOR, + UDS_MAJOR, + LOG_MAJOR, +}; +#define SEL_MAJORS (sizeof(select_majors) / sizeof(select_majors[0])) + +/*===========================================================================* + * do_select * + *===========================================================================*/ +PUBLIC int do_select(void) +{ +/* Implement the select(nfds, readfds, writefds, errorfds, timeout) system + * call. First we copy the arguments and verify their sanity. Then we check + * whether there are file descriptors that satisfy the select call right of the + * bat. If so, or if there are no ready file descriptors but the process + * requested to return immediately, we return the result. Otherwise we set a + * timeout and wait for either the file descriptors to become ready or the + * timer to go off. If no timeout value was provided, we wait indefinitely. */ + + int r, nfds, do_timeout = 0, fd, s; + struct timeval timeout; + struct selectentry *se; + + nfds = m_in.SEL_NFDS; + + /* Sane amount of file descriptors? */ + if (nfds < 0 || nfds > OPEN_MAX) return(EINVAL); + + /* Find a slot to store this select request */ + for (s = 0; s < MAXSELECTS; s++) + if (selecttab[s].requestor == NULL) /* Unused slot */ + break; + if (s >= MAXSELECTS) return(ENOSPC); + + se = &selecttab[s]; + wipe_select(se); /* Clear results of previous usage */ + se->req_endpt = who_e; + se->vir_readfds = (fd_set *) m_in.SEL_READFDS; + se->vir_writefds = (fd_set *) m_in.SEL_WRITEFDS; + se->vir_errorfds = (fd_set *) m_in.SEL_ERRORFDS; + + /* Copy fdsets from the process */ + if ((r = copy_fdsets(se, nfds, FROM_PROC)) != OK) return(r); + + /* Did the process set a timeout value? If so, retrieve it. */ + if (m_in.SEL_TIMEOUT != NULL) { + do_timeout = 1; + r = sys_vircopy(who_e, D, (vir_bytes) m_in.SEL_TIMEOUT, SELF, D, + (vir_bytes) &timeout, sizeof(timeout)); + if (r != OK) return(r); + } + + /* No nonsense in the timeval */ + if (do_timeout && (timeout.tv_sec < 0 || timeout.tv_usec < 0)) + return(EINVAL); + + /* If there is no timeout, we block forever. Otherwise, we block up to the + * specified time interval. + */ + if (!do_timeout) /* No timeout value set */ + se->block = 1; + else if (do_timeout && (timeout.tv_sec > 0 || timeout.tv_usec > 0)) + se->block = 1; + else /* timeout set as (0,0) - this effects a poll */ + se->block = 0; + se->expiry = 0; /* no timer set (yet) */ + + /* Verify that file descriptors are okay to select on */ + for (fd = 0; fd < nfds; fd++) { + struct filp *f; + int type, ops; + + /* Because the select() interface implicitly includes file descriptors + * you might not want to select on, we have to figure out whether we're + * interested in them. Typically, these file descriptors include fd's + * inherited from the parent proc and file descriptors that have been + * close()d, but had a lower fd than one in the current set. + */ + if (!(ops = tab2ops(fd, se))) + continue; /* No operations set; nothing to do for this fd */ + + /* Get filp belonging to this fd */ + f = se->filps[fd] = get_filp(fd, VNODE_READ); + if (f == NULL) { + if (err_code == EBADF) + r = err_code; + else /* File descriptor is 'ready' to return EIO */ + r = EINTR; + + return(r); + } + + /* Check file types. According to POSIX 2008: + * "The pselect() and select() functions shall support regular files, + * terminal and pseudo-terminal devices, FIFOs, pipes, and sockets. The + * behavior of pselect() and select() on file descriptors that refer to + * other types of file is unspecified." + * + * In our case, terminal and pseudo-terminal devices are handled by the + * TTY major and sockets by either INET major (socket type AF_INET) or + * PFS major (socket type AF_UNIX). PFS acts as an FS when it handles + * pipes and as a driver when it handles sockets. Additionally, we + * support select on the LOG major to handle kernel logging, which is + * beyond the POSIX spec. */ + + se->type[fd] = -1; + for (type = 0; type < SEL_FDS; type++) { + if (fdtypes[type].type_match(f)) { + se->type[fd] = type; + se->nfds = fd+1; + se->filps[fd]->filp_selectors++; + break; + } + } + unlock_filp(f); + if (se->type[fd] == -1) /* Type not found */ + return(EBADF); + } + + /* Check all file descriptors in the set whether one is 'ready' now */ + for (fd = 0; fd < nfds; fd++) { + int ops, r; + struct filp *f; + + /* Again, check for involuntarily selected fd's */ + if (!(ops = tab2ops(fd, se))) + continue; /* No operations set; nothing to do for this fd */ + + /* Test filp for select operations if not already done so. e.g., + * processes sharing a filp and both doing a select on that filp. */ + f = se->filps[fd]; + select_lock_filp(f, f->filp_select_ops | ops); + if ((f->filp_select_ops & ops) != ops) { + int wantops; + + wantops = (f->filp_select_ops |= ops); + r = do_select_request(se, fd, &wantops); + unlock_filp(f); + if (r != SEL_OK) { + if (r == SEL_DEFERRED) continue; + else break; /* Error or bogus return code; abort */ + } + + /* The select request above might have turned on/off some + * operations because they were 'ready' or not meaningful. + * Either way, we might have a result and we need to store them + * in the select table entry. */ + if (wantops & ops) ops2tab(wantops, fd, se); + } else { + unlock_filp(f); + } + } + + if ((se->nreadyfds > 0 || !se->block) && !is_deferred(se)) { + /* fd's were found that were ready to go right away, and/or + * we were instructed not to block at all. Must return + * immediately. + */ + r = copy_fdsets(se, se->nfds, TO_PROC); + select_cancel_all(se); + + if (r != OK) + return(r); + else if (se->error != OK) + return(se->error); + + return(se->nreadyfds); + } + + /* Convert timeval to ticks and set the timer. If it fails, undo + * all, return error. + */ + if (do_timeout) { + int ticks; + /* Open Group: + * "If the requested timeout interval requires a finer + * granularity than the implementation supports, the + * actual timeout interval shall be rounded up to the next + * supported value." + */ +#define USECPERSEC 1000000 + while(timeout.tv_usec >= USECPERSEC) { + /* this is to avoid overflow with *system_hz below */ + timeout.tv_usec -= USECPERSEC; + timeout.tv_sec++; + } + ticks = timeout.tv_sec * system_hz + + (timeout.tv_usec * system_hz + USECPERSEC-1) / USECPERSEC; + se->expiry = ticks; + set_timer(&se->timer, ticks, select_timeout_check, s); + } + + /* If we're blocking, the table entry is now valid */ + se->requestor = fp; + + /* process now blocked */ + suspend(FP_BLOCKED_ON_SELECT); + return(SUSPEND); +} + +/*===========================================================================* + * is_deferred * + *===========================================================================*/ +PRIVATE int is_deferred(struct selectentry *se) +{ +/* Find out whether this select has pending initial replies */ + + int fd; + struct filp *f; + + for (fd = 0; fd < se->nfds; fd++) { + if ((f = se->filps[fd]) == NULL) continue; + if (f->filp_select_flags & (FSF_UPDATE|FSF_BUSY)) return(TRUE); + } + + return(FALSE); +} + + +/*===========================================================================* + * is_regular_file * + *===========================================================================*/ +PRIVATE int is_regular_file(struct filp *f) +{ + return(f && f->filp_vno && (f->filp_vno->v_mode & I_TYPE) == I_REGULAR); +} + +/*===========================================================================* + * is_pipe * + *===========================================================================*/ +PRIVATE int is_pipe(struct filp *f) +{ +/* Recognize either anonymous pipe or named pipe (FIFO) */ + return(f && f->filp_vno && (f->filp_vno->v_mode & I_TYPE) == I_NAMED_PIPE); +} + +/*===========================================================================* + * is_supported_major * + *===========================================================================*/ +PRIVATE int is_supported_major(struct filp *f) +{ +/* See if this filp is a handle on a device on which we support select() */ + int m; + + if (!(f && f->filp_vno)) return(FALSE); + if ((f->filp_vno->v_mode & I_TYPE) != I_CHAR_SPECIAL) return(FALSE); + + for (m = 0; m < SEL_MAJORS; m++) + if (major(f->filp_vno->v_sdev) == select_majors[m]) + return(TRUE); + + return(FALSE); +} + +/*===========================================================================* + * select_request_async * + *===========================================================================*/ +PRIVATE int select_request_async(struct filp *f, int *ops, int block) +{ + int r, rops, major; + struct dmap *dp; + + rops = *ops; + + if (!block && (f->filp_select_flags & FSF_BLOCKED)) { + /* This filp is blocked waiting for a reply, but we don't want to + * block ourselves. Unless we're awaiting the initial reply, these + * operations won't be ready */ + if (!(f->filp_select_flags & FSF_BUSY)) { + if ((rops & SEL_RD) && (f->filp_select_flags & FSF_RD_BLOCK)) + rops &= ~SEL_RD; + if ((rops & SEL_WR) && (f->filp_select_flags & FSF_WR_BLOCK)) + rops &= ~SEL_WR; + if ((rops & SEL_ERR) && (f->filp_select_flags & FSF_ERR_BLOCK)) + rops &= ~SEL_ERR; + if (!(rops & (SEL_RD|SEL_WR|SEL_ERR))) { + /* Nothing left to do */ + *ops = 0; + return(SEL_OK); + } + } + } + + f->filp_select_flags |= FSF_UPDATE; + if (block) { + rops |= SEL_NOTIFY; + if (rops & SEL_RD) f->filp_select_flags |= FSF_RD_BLOCK; + if (rops & SEL_WR) f->filp_select_flags |= FSF_WR_BLOCK; + if (rops & SEL_ERR) f->filp_select_flags |= FSF_ERR_BLOCK; + } + + if (f->filp_select_flags & FSF_BUSY) + return(SEL_DEFERRED); + + major = major(f->filp_vno->v_sdev); + if (major < 0 || major >= NR_DEVICES) return(SEL_ERROR); + dp = &dmap[major]; + if (dp->dmap_sel_filp) + return(SEL_DEFERRED); + + f->filp_select_flags &= ~FSF_UPDATE; + r = dev_io(VFS_DEV_SELECT, f->filp_vno->v_sdev, rops, NULL, + cvu64(0), 0, 0, FALSE); + if (r < 0 && r != SUSPEND) + return(SEL_ERROR); + + if (r != SUSPEND) + panic("select_request_asynch: expected SUSPEND got: %d", r); + + dp->dmap_sel_filp = f; + f->filp_select_flags |= FSF_BUSY; + + return(SEL_DEFERRED); +} + +/*===========================================================================* + * select_request_file * + *===========================================================================*/ +PRIVATE int select_request_file(struct filp *f, int *ops, int block) +{ + /* Files are always ready, so output *ops is input *ops */ + return(SEL_OK); +} + +/*===========================================================================* + * select_request_major * + *===========================================================================*/ +PRIVATE int select_request_major(struct filp *f, int *ops, int block) +{ + int major, r; + + major = major(f->filp_vno->v_sdev); + if (major < 0 || major >= NR_DEVICES) return(SEL_ERROR); + + if (dmap[major].dmap_style == STYLE_DEVA || + dmap[major].dmap_style == STYLE_CLONE_A) + r = select_request_async(f, ops, block); + else + r = select_request_sync(f, ops, block); + + return(r); +} + +/*===========================================================================* + * select_request_sync * + *===========================================================================*/ +PRIVATE int select_request_sync(struct filp *f, int *ops, int block) +{ + int rops; + + rops = *ops; + if (block) rops |= SEL_NOTIFY; + *ops = dev_io(VFS_DEV_SELECT, f->filp_vno->v_sdev, rops, NULL, + cvu64(0), 0, 0, FALSE); + if (*ops < 0) + return(SEL_ERROR); + + return(SEL_OK); +} + +/*===========================================================================* + * select_request_pipe * + *===========================================================================*/ +PRIVATE int select_request_pipe(struct filp *f, int *ops, int block) +{ + int orig_ops, r = 0, err; + + orig_ops = *ops; + + if ((*ops & (SEL_RD|SEL_ERR))) { + err = pipe_check(f->filp_vno, READING, 0, 1, f->filp_pos, 1); + + if (err != SUSPEND) + r |= SEL_RD; + if (err < 0 && err != SUSPEND) + r |= SEL_ERR; + if (err == SUSPEND && !(f->filp_mode & R_BIT)) { + /* A "meaningless" read select, therefore ready + * for reading and no error set. */ + r |= SEL_RD; + r &= ~SEL_ERR; + } + } + + if ((*ops & (SEL_WR|SEL_ERR))) { + err = pipe_check(f->filp_vno, WRITING, 0, 1, f->filp_pos, 1); + + if (err != SUSPEND) + r |= SEL_WR; + if (err < 0 && err != SUSPEND) + r |= SEL_ERR; + if (err == SUSPEND && !(f->filp_mode & W_BIT)) { + /* A "meaningless" write select, therefore ready + for writing and no error set. */ + r |= SEL_WR; + r &= ~SEL_ERR; + } + } + + /* Some options we collected might not be requested. */ + *ops = r & orig_ops; + + if (!*ops && block) + f->filp_pipe_select_ops |= orig_ops; + + return(SEL_OK); +} + +/*===========================================================================* + * tab2ops * + *===========================================================================*/ +PRIVATE int tab2ops(int fd, struct selectentry *e) +{ + int ops = 0; + if (FD_ISSET(fd, &e->readfds)) ops |= SEL_RD; + if (FD_ISSET(fd, &e->writefds)) ops |= SEL_WR; + if (FD_ISSET(fd, &e->errorfds)) ops |= SEL_ERR; + + return(ops); +} + + +/*===========================================================================* + * ops2tab * + *===========================================================================*/ +PRIVATE void ops2tab(int ops, int fd, struct selectentry *e) +{ + if ((ops & SEL_RD) && e->vir_readfds && FD_ISSET(fd, &e->readfds) && + !FD_ISSET(fd, &e->ready_readfds)) { + FD_SET(fd, &e->ready_readfds); + e->nreadyfds++; + } + + if ((ops & SEL_WR) && e->vir_writefds && FD_ISSET(fd, &e->writefds) && + !FD_ISSET(fd, &e->ready_writefds)) { + FD_SET(fd, &e->ready_writefds); + e->nreadyfds++; + } + + if ((ops & SEL_ERR) && e->vir_errorfds && FD_ISSET(fd, &e->errorfds) && + !FD_ISSET(fd, &e->ready_errorfds)) { + FD_SET(fd, &e->ready_errorfds); + e->nreadyfds++; + } +} + + +/*===========================================================================* + * copy_fdsets * + *===========================================================================*/ +PRIVATE int copy_fdsets(struct selectentry *se, int nfds, int direction) +{ + int r; + size_t fd_setsize; + endpoint_t src_e, dst_e; + fd_set *src_fds, *dst_fds; + + if (nfds < 0 || nfds > OPEN_MAX) + panic("select copy_fdsets: nfds wrong: %d", nfds); + + /* Only copy back as many bits as the user expects. */ +#ifdef __NBSD_LIBC + fd_setsize = (size_t) (howmany(nfds, __NFDBITS) * sizeof(__fd_mask)); +#else + fd_setsize = (size_t) (_FDSETWORDS(nfds) * _FDSETBITSPERWORD/8); +#endif + + /* Set source and destination endpoints */ + src_e = (direction == FROM_PROC) ? se->req_endpt : SELF; + dst_e = (direction == FROM_PROC) ? SELF : se->req_endpt; + + /* read set */ + src_fds = (direction == FROM_PROC) ? se->vir_readfds : &se->ready_readfds; + dst_fds = (direction == FROM_PROC) ? &se->readfds : se->vir_readfds; + if (se->vir_readfds) { + r = sys_vircopy(src_e, D, (vir_bytes) src_fds, dst_e, D, + (vir_bytes) dst_fds, fd_setsize); + if (r != OK) return(r); + } + + /* write set */ + src_fds = (direction == FROM_PROC) ? se->vir_writefds : &se->ready_writefds; + dst_fds = (direction == FROM_PROC) ? &se->writefds : se->vir_writefds; + if (se->vir_writefds) { + r = sys_vircopy(src_e, D, (vir_bytes) src_fds, dst_e, D, + (vir_bytes) dst_fds, fd_setsize); + if (r != OK) return(r); + } + + /* error set */ + src_fds = (direction == FROM_PROC) ? se->vir_errorfds : &se->ready_errorfds; + dst_fds = (direction == FROM_PROC) ? &se->errorfds : se->vir_errorfds; + if (se->vir_errorfds) { + r = sys_vircopy(src_e, D, (vir_bytes) src_fds, dst_e, D, + (vir_bytes) dst_fds, fd_setsize); + if (r != OK) return(r); + } + + return(OK); +} + + +/*===========================================================================* + * select_cancel_all * + *===========================================================================*/ +PRIVATE void select_cancel_all(struct selectentry *se) +{ +/* Cancel select. Decrease select usage and cancel timer */ + + int fd; + struct filp *f; + + /* Always await results of asynchronous requests */ + assert(!is_deferred(se)); + + for (fd = 0; fd < se->nfds; fd++) { + if ((f = se->filps[fd]) == NULL) continue; + se->filps[fd] = NULL; + select_cancel_filp(f); + } + + if (se->expiry > 0) { + cancel_timer(&se->timer); + se->expiry = 0; + } + + se->requestor = NULL; +} + +/*===========================================================================* + * select_cancel_filp * + *===========================================================================*/ +PRIVATE void select_cancel_filp(struct filp *f) +{ +/* Reduce number of select users of this filp */ + + assert(f); + assert(f->filp_selectors >= 0); + if (f->filp_selectors == 0) return; + + select_lock_filp(f, f->filp_select_ops); + + f->filp_selectors--; + if (f->filp_selectors == 0) { + /* No one selecting on this filp anymore, forget about select state */ + f->filp_select_ops = 0; + f->filp_select_flags = 0; + f->filp_pipe_select_ops = 0; + } + + unlock_filp(f); +} + +/*===========================================================================* + * select_return * + *===========================================================================*/ +PRIVATE void select_return(struct selectentry *se) +{ + int r, r1; + + assert(!is_deferred(se)); /* Not done yet, first wait for async reply */ + + select_cancel_all(se); + r1 = copy_fdsets(se, se->nfds, TO_PROC); + if (r1 != OK) + r = r1; + else if (se->error != OK) + r = se->error; + else + r = se->nreadyfds; + + revive(se->req_endpt, r); +} + + +/*===========================================================================* + * select_callback * + *===========================================================================*/ +PUBLIC void select_callback(struct filp *f, int status) +{ + filp_status(f, status); +} + +/*===========================================================================* + * init_select * + *===========================================================================*/ +PUBLIC void init_select(void) +{ + int s; + + for (s = 0; s < MAXSELECTS; s++) + init_timer(&selecttab[s].timer); +} + + +/*===========================================================================* + * select_forget * + *===========================================================================*/ +PUBLIC void select_forget(endpoint_t proc_e) +{ +/* Something has happened (e.g. signal delivered that interrupts select()). + * Totally forget about the select(). */ + + int slot; + struct selectentry *se; + + for (slot = 0; slot < MAXSELECTS; slot++) { + se = &selecttab[slot]; + if (se->requestor != NULL && se->req_endpt == proc_e) + break; + } + + if (slot >= MAXSELECTS) return; /* Entry not found */ + se->error = EINTR; + if (is_deferred(se)) return; /* Still awaiting initial reply */ + + select_cancel_all(se); +} + + +/*===========================================================================* + * select_timeout_check * + *===========================================================================*/ +PUBLIC void select_timeout_check(timer_t *timer) +{ + int s; + struct selectentry *se; + + s = tmr_arg(timer)->ta_int; + if (s < 0 || s >= MAXSELECTS) return; /* Entry does not exist */ + + se = &selecttab[s]; + if (se->requestor == NULL) return; + fp = se->requestor; + if (se->expiry <= 0) return; /* Strange, did we even ask for a timeout? */ + se->expiry = 0; + if (is_deferred(se)) return; /* Wait for initial replies to DEV_SELECT */ + select_return(se); +} + + +/*===========================================================================* + * select_unsuspend_by_endpt * + *===========================================================================*/ +PUBLIC void select_unsuspend_by_endpt(endpoint_t proc_e) +{ +/* Revive blocked processes when a driver has disappeared */ + + int fd, s, major; + struct selectentry *se; + struct filp *f; + + for (s = 0; s < MAXSELECTS; s++) { + int wakehim = 0; + se = &selecttab[s]; + if (se->requestor == NULL) continue; + + for (fd = 0; fd < se->nfds; fd++) { + if ((f = se->filps[fd]) == NULL || f->filp_vno == NULL) + continue; + + major = major(f->filp_vno->v_sdev); + if (dmap_driver_match(proc_e, major)) { + se->filps[fd] = NULL; + se->error = EINTR; + select_cancel_filp(f); + wakehim = 1; + } + } + + if (wakehim && !is_deferred(se)) + select_return(se); + } +} + + +/*===========================================================================* + * select_reply1 * + *===========================================================================*/ +PUBLIC void select_reply1(driver_e, minor, status) +endpoint_t driver_e; +int minor; +int status; +{ +/* Handle reply to DEV_SELECT request */ + + int major; + dev_t dev; + struct filp *f; + struct dmap *dp; + struct vnode *vp; + + /* Figure out which device is replying */ + if ((dp = get_dmap(driver_e)) == NULL) { + printf("VFS (%s:%d): endpoint %d is not a known driver endpoint\n", + __FILE__, __LINE__, driver_e); + return; + } + major = dp-dmap; + dev = makedev(major, minor); + + /* Get filp belonging to character special file */ + if ((f = dp->dmap_sel_filp) == NULL) { + printf("VFS (%s:%d): major %d was not expecting a DEV_SELECT reply\n", + __FILE__, __LINE__, major); + return; + } + + /* Is the filp still in use and busy waiting for a reply? The owner might + * have vanished before the driver was able to reply. */ + if (f->filp_count >= 1 && (f->filp_select_flags & FSF_BUSY)) { + /* Find vnode and check we got a reply from the device we expected */ + vp = f->filp_vno; + assert(vp != NULL); + assert((vp->v_mode & I_TYPE) == I_CHAR_SPECIAL); /* Must be char. special */ + if (vp->v_sdev != dev) { + printf("VFS (%s:%d): expected reply from dev %d not %d\n", + __FILE__, __LINE__, vp->v_sdev, dev); + return; + } + } + + select_lock_filp(f, f->filp_select_ops); + + /* No longer waiting for a reply from this device */ + f->filp_select_flags &= ~FSF_BUSY; + dp->dmap_sel_filp = NULL; + + /* The select call is done now, except when + * - another process started a select on the same filp with possibly a + * different set of operations. + * - a process does a select on the same filp but using different file + * descriptors. + * - the select has a timeout. Upon receiving this reply the operations might + * not be ready yet, so we want to wait for that to ultimately happen. + * Therefore we need to keep remembering what the operations are. */ + if (!(f->filp_select_flags & (FSF_UPDATE|FSF_BLOCKED))) + f->filp_select_ops = 0; /* done selecting */ + else if (!(f->filp_select_flags & FSF_UPDATE)) + f->filp_select_ops &= ~status; /* there may be operations pending */ + + /* Tell filp owners about result unless we need to wait longer */ + if (!(status == 0 && (f->filp_select_flags & FSF_BLOCKED))) { + if (status > 0) { /* operations ready */ + if (status & SEL_RD) f->filp_select_flags &= ~FSF_RD_BLOCK; + if (status & SEL_WR) f->filp_select_flags &= ~FSF_WR_BLOCK; + if (status & SEL_ERR) f->filp_select_flags &= ~FSF_ERR_BLOCK; + } else if (status < 0) { /* error */ + f->filp_select_flags &= ~FSF_BLOCKED; /* No longer blocking */ + } + + unlock_filp(f); + filp_status(f, status); /* Tell filp owners about the results */ + } else { + unlock_filp(f); + } + + select_restart_filps(); +} + + +/*===========================================================================* + * select_reply2 * + *===========================================================================*/ +PUBLIC void select_reply2(driver_e, minor, status) +endpoint_t driver_e; +int minor; +int status; +{ +/* Handle secondary reply to DEV_SELECT request. A secondary reply occurs when + * the select request is 'blocking' until an operation becomes ready. */ + int major, slot, fd; + dev_t dev; + struct filp *f; + struct dmap *dp; + struct vnode *vp; + struct selectentry *se; + + if (status == 0) { + printf("VFS (%s:%d): weird status (%d) to report\n", + __FILE__, __LINE__, status); + return; + } + + /* Figure out which device is replying */ + if ((dp = get_dmap(driver_e)) == NULL) { + printf("VFS (%s:%d): endpoint %d is not a known driver endpoint\n", + __FILE__, __LINE__, driver_e); + return; + } + major = dp-dmap; + dev = makedev(major, minor); + + /* Find all file descriptors selecting for this device */ + for (slot = 0; slot < MAXSELECTS; slot++) { + se = &selecttab[slot]; + if (se->requestor == NULL) continue; /* empty slot */ + + for (fd = 0; fd < se->nfds; fd++) { + if ((f = se->filps[fd]) == NULL) continue; + if ((vp = f->filp_vno) == NULL) continue; + if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL) continue; + if (vp->v_sdev != dev) continue; + + select_lock_filp(f, f->filp_select_ops); + if (status > 0) { /* Operations ready */ + /* Clear the replied bits from the request + * mask unless FSF_UPDATE is set. + */ + if (!(f->filp_select_flags & FSF_UPDATE)) + f->filp_select_ops &= ~status; + if (status & SEL_RD) + f->filp_select_flags &= ~FSF_RD_BLOCK; + if (status & SEL_WR) + f->filp_select_flags &= ~FSF_WR_BLOCK; + if (status & SEL_ERR) + f->filp_select_flags &= ~FSF_ERR_BLOCK; + + ops2tab(status, fd, se); + } else { + f->filp_select_flags &= ~FSF_BLOCKED; + ops2tab(SEL_RD|SEL_WR|SEL_ERR, fd, se); + } + unlock_filp(f); + if (se->nreadyfds > 0) restart_proc(se); + } + } + + select_restart_filps(); +} + +/*===========================================================================* + * select_restart_filps * + *===========================================================================*/ +PRIVATE void select_restart_filps() +{ + int fd, slot; + struct filp *f; + struct vnode *vp; + struct selectentry *se; + + /* Locate filps that can be restarted */ + for (slot = 0; slot < MAXSELECTS; slot++) { + se = &selecttab[slot]; + if (se->requestor == NULL) continue; /* empty slot */ + + /* Only 'deferred' processes are eligible to restart */ + if (!is_deferred(se)) continue; + + /* Find filps that are not waiting for a reply, but have an updated + * status (i.e., another select on the same filp with possibly a + * different set of operations is to be done), and thus requires the + * select request to be sent again). + */ + for (fd = 0; fd < se->nfds; fd++) { + int r, wantops, ops; + if ((f = se->filps[fd]) == NULL) continue; + if (f->filp_select_flags & FSF_BUSY) /* Still waiting for */ + continue; /* initial reply */ + if (!(f->filp_select_flags & FSF_UPDATE)) /* Must be in */ + continue; /* 'update' state */ + + wantops = ops = f->filp_select_ops; + select_lock_filp(f, ops); + vp = f->filp_vno; + assert((vp->v_mode & I_TYPE) == I_CHAR_SPECIAL); + r = do_select_request(se, fd, &wantops); + unlock_filp(f); + if (r != SEL_OK) { + if (r == SEL_DEFERRED) continue; + else break; /* Error or bogus return code; abort */ + } + if (wantops & ops) ops2tab(wantops, fd, se); + } + } +} + +/*===========================================================================* + * do_select_request * + *===========================================================================*/ +PRIVATE int do_select_request(se, fd, ops) +struct selectentry *se; +int fd; +int *ops; +{ +/* Perform actual select request for file descriptor fd */ + + int r, type; + struct filp *f; + + type = se->type[fd]; + f = se->filps[fd]; + r = fdtypes[type].select_request(f, ops, se->block); + if (r != SEL_OK && r != SEL_DEFERRED) { + se->error = EINTR; + se->block = 0; /* Stop blocking to return asap */ + if (!is_deferred(se)) select_cancel_all(se); + } + + return(r); +} + +/*===========================================================================* + * filp_status * + *===========================================================================*/ +PRIVATE void filp_status(f, status) +struct filp *f; +int status; +{ +/* Tell processes that need to know about the status of this filp */ + int fd, slot; + struct selectentry *se; + + for (slot = 0; slot < MAXSELECTS; slot++) { + se = &selecttab[slot]; + if (se->requestor == NULL) continue; /* empty slot */ + + for (fd = 0; fd < se->nfds; fd++) { + if (se->filps[fd] != f) continue; + if (status < 0) + ops2tab(SEL_RD|SEL_WR|SEL_ERR, fd, se); + else + ops2tab(status, fd, se); + restart_proc(se); + } + } +} + +/*===========================================================================* + * restart_proc * + *===========================================================================*/ +PRIVATE void restart_proc(se) +struct selectentry *se; +{ +/* Tell process about select results (if any) unless there are still results + * pending. */ + + if ((se->nreadyfds > 0 || !se->block) && !is_deferred(se)) + select_return(se); +} + +/*===========================================================================* + * wipe_select * + *===========================================================================*/ +PRIVATE void wipe_select(struct selectentry *se) +{ + se->nfds = 0; + se->nreadyfds = 0; + se->error = OK; + se->block = 0; + memset(se->filps, 0, sizeof(se->filps)); + + FD_ZERO(&se->readfds); + FD_ZERO(&se->writefds); + FD_ZERO(&se->errorfds); + FD_ZERO(&se->ready_readfds); + FD_ZERO(&se->ready_writefds); + FD_ZERO(&se->ready_errorfds); +} + +/*===========================================================================* + * select_lock_filp * + *===========================================================================*/ +PRIVATE void select_lock_filp(struct filp *f, int ops) +{ +/* Lock a filp and vnode based on which operations are requested */ + tll_access_t locktype;; + + locktype = VNODE_READ; /* By default */ + + if (ops & (SEL_WR|SEL_ERR)) + /* Selecting for error or writing requires exclusive access */ + locktype = VNODE_WRITE; + + lock_filp(f, locktype); +} diff --git a/servers/avfs/select.h b/servers/avfs/select.h new file mode 100644 index 000000000..5215b1a90 --- /dev/null +++ b/servers/avfs/select.h @@ -0,0 +1,9 @@ +#ifndef __VFS_SELECT_H__ +#define __VFS_SELECT_H__ + +/* return codes for select_request_* and select_cancel_* */ +#define SEL_OK 0 /* ready */ +#define SEL_ERROR 1 /* failed */ +#define SEL_DEFERRED 2 /* request is sent to driver */ + +#endif diff --git a/servers/avfs/stadir.c b/servers/avfs/stadir.c new file mode 100644 index 000000000..fbc8fa088 --- /dev/null +++ b/servers/avfs/stadir.c @@ -0,0 +1,287 @@ +/* This file contains the code for performing four system calls relating to + * status and directories. + * + * The entry points into this file are + * do_chdir: perform the CHDIR system call + * do_chroot: perform the CHROOT system call + * do_lstat: perform the LSTAT system call + * do_stat: perform the STAT system call + * do_fstat: perform the FSTAT system call + * do_fstatfs: perform the FSTATFS system call + * do_statvfs: perform the STATVFS system call + * do_fstatvfs: perform the FSTATVFS system call + */ + +#include "fs.h" +#include +#include +#include +#include +#include +#include "file.h" +#include "fproc.h" +#include "path.h" +#include "param.h" +#include +#include +#include "vnode.h" +#include "vmnt.h" + +FORWARD _PROTOTYPE( int change, (struct vnode **iip, char *name_ptr, int len)); +FORWARD _PROTOTYPE( int change_into, (struct vnode **iip, struct vnode *vp)); + +/*===========================================================================* + * do_fchdir * + *===========================================================================*/ +PUBLIC int do_fchdir() +{ + /* Change directory on already-opened fd. */ + struct filp *rfilp; + int r; + + /* Is the file descriptor valid? */ + if ((rfilp = get_filp(m_in.fd, VNODE_READ)) == NULL) return(err_code); + r = change_into(&fp->fp_wd, rfilp->filp_vno); + unlock_filp(rfilp); + return(r); +} + +/*===========================================================================* + * do_chdir * + *===========================================================================*/ +PUBLIC int do_chdir() +{ +/* Perform the chdir(name) system call. */ + + return change(&fp->fp_wd, m_in.name, m_in.name_length); +} + +/*===========================================================================* + * do_chroot * + *===========================================================================*/ +PUBLIC int do_chroot() +{ +/* Perform the chroot(name) system call. */ + + if (!super_user) return(EPERM); /* only su may chroot() */ + return change(&fp->fp_rd, m_in.name, m_in.name_length); +} + +/*===========================================================================* + * change * + *===========================================================================*/ +PRIVATE int change(iip, name_ptr, len) +struct vnode **iip; /* pointer to the inode pointer for the dir */ +char *name_ptr; /* pointer to the directory name to change to */ +int len; /* length of the directory name string */ +{ +/* Do the actual work for chdir() and chroot(). */ + struct vnode *vp; + struct vmnt *vmp; + char fullpath[PATH_MAX+1]; + struct lookup resolve; + int r; + + lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); + resolve.l_vmnt_lock = VMNT_READ; + resolve.l_vnode_lock = VNODE_READ; + + /* Try to open the directory */ + if (fetch_name(name_ptr, len, M3, fullpath) != OK) return(err_code); + if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code); + r = change_into(iip, vp); + unlock_vnode(vp); + unlock_vmnt(vmp); + put_vnode(vp); + return(r); +} + +/*===========================================================================* + * change_into * + *===========================================================================*/ +PRIVATE int change_into(iip, vp) +struct vnode **iip; /* pointer to the inode pointer for the dir */ +struct vnode *vp; /* this is what the inode has to become */ +{ + int r; + + if (*iip == vp) return(OK); /* Nothing to do */ + + /* It must be a directory and also be searchable */ + if ((vp->v_mode & I_TYPE) != I_DIRECTORY) + r = ENOTDIR; + else + r = forbidden(vp, X_BIT); /* Check if dir is searchable*/ + if (r != OK) return(r); + + /* Everything is OK. Make the change. */ + put_vnode(*iip); /* release the old directory */ + dup_vnode(vp); + *iip = vp; /* acquire the new one */ + return(OK); +} + +/*===========================================================================* + * do_stat * + *===========================================================================*/ +PUBLIC int do_stat() +{ +/* Perform the stat(name, buf) system call. */ + int r; + struct vnode *vp; + struct vmnt *vmp; + char fullpath[PATH_MAX+1]; + struct lookup resolve; + int old_stat = 0; + + lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); + resolve.l_vmnt_lock = VMNT_READ; + resolve.l_vnode_lock = VNODE_READ; + + if (call_nr == PREV_STAT) + old_stat = 1; + + if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK) + return(err_code); + if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code); + r = req_stat(vp->v_fs_e, vp->v_inode_nr, who_e, m_in.name2, 0, old_stat); + + unlock_vnode(vp); + unlock_vmnt(vmp); + + put_vnode(vp); + return r; +} + +/*===========================================================================* + * do_fstat * + *===========================================================================*/ +PUBLIC int do_fstat() +{ +/* Perform the fstat(fd, buf) system call. */ + register struct filp *rfilp; + int r; + int pipe_pos = 0; + int old_stat = 0; + + if (call_nr == PREV_FSTAT) + old_stat = 1; + + /* Is the file descriptor valid? */ + if ((rfilp = get_filp(m_in.fd, VNODE_READ)) == NULL) return(err_code); + + /* If we read from a pipe, send position too */ + if (rfilp->filp_vno->v_pipe == I_PIPE) { + if (rfilp->filp_mode & R_BIT) + if (ex64hi(rfilp->filp_pos) != 0) { + panic("do_fstat: bad position in pipe"); + } + pipe_pos = ex64lo(rfilp->filp_pos); + } + + r = req_stat(rfilp->filp_vno->v_fs_e, rfilp->filp_vno->v_inode_nr, + who_e, m_in.buffer, pipe_pos, old_stat); + + unlock_filp(rfilp); + + return(r); +} + +/*===========================================================================* + * do_fstatfs * + *===========================================================================*/ +PUBLIC int do_fstatfs() +{ +/* Perform the fstatfs(fd, buf) system call. */ + struct filp *rfilp; + int r; + + /* Is the file descriptor valid? */ + if( (rfilp = get_filp(m_in.fd, VNODE_READ)) == NULL) return(err_code); + + r = req_fstatfs(rfilp->filp_vno->v_fs_e, who_e, m_in.buffer); + + unlock_filp(rfilp); + + return(r); +} + +/*===========================================================================* + * do_statvfs * + *===========================================================================*/ +PUBLIC int do_statvfs() +{ +/* Perform the stat(name, buf) system call. */ + int r; + struct vnode *vp; + struct vmnt *vmp; + char fullpath[PATH_MAX+1]; + struct lookup resolve; + + lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); + resolve.l_vmnt_lock = VMNT_READ; + resolve.l_vnode_lock = VNODE_READ; + + if (fetch_name(m_in.STATVFS_NAME, m_in.STATVFS_LEN, M1, fullpath) != OK) + return(err_code); + if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code); + r = req_statvfs(vp->v_fs_e, who_e, m_in.STATVFS_BUF); + + unlock_vnode(vp); + unlock_vmnt(vmp); + + put_vnode(vp); + return r; +} + +/*===========================================================================* + * do_fstatvfs * + *===========================================================================*/ +PUBLIC int do_fstatvfs() +{ +/* Perform the fstat(fd, buf) system call. */ + register struct filp *rfilp; + int r; + + /* Is the file descriptor valid? */ + if ((rfilp = get_filp(m_in.FSTATVFS_FD, VNODE_READ)) == NULL) + return(err_code); + + r = req_statvfs(rfilp->filp_vno->v_fs_e, who_e, m_in.FSTATVFS_BUF); + + unlock_filp(rfilp); + + return(r); +} + +/*===========================================================================* + * do_lstat * + *===========================================================================*/ +PUBLIC int do_lstat() +{ +/* Perform the lstat(name, buf) system call. */ + struct vnode *vp; + struct vmnt *vmp; + int r; + char fullpath[PATH_MAX+1]; + struct lookup resolve; + int old_stat = 0; + + lookup_init(&resolve, fullpath, PATH_RET_SYMLINK, &vmp, &vp); + resolve.l_vmnt_lock = VMNT_READ; + resolve.l_vnode_lock = VNODE_READ; + + if (call_nr == PREV_LSTAT) + old_stat = 1; + if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK) + return(err_code); + + if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code); + r = req_stat(vp->v_fs_e, vp->v_inode_nr, who_e, m_in.name2, 0, old_stat); + + unlock_vnode(vp); + unlock_vmnt(vmp); + + put_vnode(vp); + return(r); +} diff --git a/servers/avfs/table.c b/servers/avfs/table.c new file mode 100644 index 000000000..df9b57df3 --- /dev/null +++ b/servers/avfs/table.c @@ -0,0 +1,145 @@ +/* This file contains the table used to map system call numbers onto the + * routines that perform them. + */ + +#define _TABLE + +#include "fs.h" +#include +#include +#include "file.h" +#include "fproc.h" +#include "lock.h" +#include "vnode.h" +#include "vmnt.h" + +PUBLIC _PROTOTYPE (int (*call_vec[]), (void) ) = { + no_sys, /* 0 = unused */ + no_sys, /* 1 = (exit) */ + no_sys, /* 2 = (fork) */ + do_read, /* 3 = read */ + do_write, /* 4 = write */ + do_open, /* 5 = open */ + do_close, /* 6 = close */ + no_sys, /* 7 = wait */ + do_creat, /* 8 = creat */ + do_link, /* 9 = link */ + do_unlink, /* 10 = unlink */ + no_sys, /* 11 = waitpid */ + do_chdir, /* 12 = chdir */ + no_sys, /* 13 = time */ + do_mknod, /* 14 = mknod */ + do_chmod, /* 15 = chmod */ + do_chown, /* 16 = chown */ + no_sys, /* 17 = break */ + do_stat, /* 18 = stat (prev)*/ + do_lseek, /* 19 = lseek */ + no_sys, /* 20 = getpid */ + do_mount, /* 21 = mount */ + do_umount, /* 22 = umount */ + no_sys, /* 23 = (setuid) */ + no_sys, /* 24 = getuid */ + no_sys, /* 25 = (stime) */ + no_sys, /* 26 = ptrace */ + no_sys, /* 27 = alarm */ + do_fstat, /* 28 = fstat (prev)*/ + no_sys, /* 29 = pause */ + do_utime, /* 30 = utime */ + no_sys, /* 31 = (stty) */ + no_sys, /* 32 = (gtty) */ + do_access, /* 33 = access */ + no_sys, /* 34 = (nice) */ + no_sys, /* 35 = (ftime) */ + do_sync, /* 36 = sync */ + no_sys, /* 37 = kill */ + do_rename, /* 38 = rename */ + do_mkdir, /* 39 = mkdir */ + do_unlink, /* 40 = rmdir */ + do_dup, /* 41 = dup */ + do_pipe, /* 42 = pipe */ + no_sys, /* 43 = times */ + no_sys, /* 44 = (prof) */ + do_slink, /* 45 = symlink */ + no_sys, /* 46 = (setgid)*/ + no_sys, /* 47 = getgid */ + no_sys, /* 48 = (signal)*/ + do_rdlink, /* 49 = readlink*/ + do_lstat, /* 50 = lstat (prev)*/ + no_sys, /* 51 = (acct) */ + no_sys, /* 52 = (phys) */ + no_sys, /* 53 = (lock) */ + do_ioctl, /* 54 = ioctl */ + do_fcntl, /* 55 = fcntl */ + no_sys, /* 56 = (mpx) */ + do_fsready, /* 57 = FS proc ready */ + no_sys, /* 58 = unused */ + no_sys, /* 59 = (execve)*/ + do_umask, /* 60 = umask */ + do_chroot, /* 61 = chroot */ + no_sys, /* 62 = (setsid)*/ + no_sys, /* 63 = (getpgrp)*/ + no_sys, /* 64 = (itimer)*/ + do_stat, /* 65 = stat */ + do_fstat, /* 66 = fstat */ + do_lstat, /* 67 = lstat */ + no_sys, /* 68 = unused */ + no_sys, /* 69 = unused */ + no_sys, /* 70 = unused */ + no_sys, /* 71 = (sigaction) */ + no_sys, /* 72 = (sigsuspend) */ + no_sys, /* 73 = (sigpending) */ + no_sys, /* 74 = (sigprocmask) */ + no_sys, /* 75 = (sigreturn) */ + no_sys, /* 76 = (reboot) */ + do_svrctl, /* 77 = svrctl */ + no_sys, /* 78 = (sysuname) */ + do_getsysinfo, /* 79 = getsysinfo */ + do_getdents, /* 80 = getdents */ + do_llseek, /* 81 = llseek */ + do_fstatfs, /* 82 = fstatfs */ + do_statvfs, /* 83 = fstatvfs */ + do_fstatvfs, /* 84 = statvfs */ + do_select, /* 85 = select */ + do_fchdir, /* 86 = fchdir */ + do_fsync, /* 87 = fsync */ + no_sys, /* 88 = (getpriority) */ + no_sys, /* 89 = (setpriority) */ + no_sys, /* 90 = (gettimeofday) */ + no_sys, /* 91 = (seteuid) */ + no_sys, /* 92 = (setegid) */ + do_truncate, /* 93 = truncate */ + do_ftruncate, /* 94 = truncate */ + do_chmod, /* 95 = fchmod */ + do_chown, /* 96 = fchown */ + no_sys, /* 97 = (getsysinfo_up) */ + no_sys, /* 98 = (sprofile) */ + no_sys, /* 99 = (cprofile) */ + /* THE MINIX3 ABI ENDS HERE */ + no_sys, /* 100 = (exec_newmem) */ + no_sys, /* 101 = (srv_fork) */ + no_sys, /* 102 = (exec_restart) */ + no_sys, /* 103 = (procstat) */ + no_sys, /* 104 = (getprocnr) */ + no_sys, /* 105 = unused */ + no_sys, /* 106 = unused */ + no_sys, /* 107 = (getepinfo) */ + no_sys, /* 108 = (adddma) */ + no_sys, /* 109 = (deldma) */ + no_sys, /* 110 = (getdma) */ + no_sys, /* 111 = (srv_kill) */ + do_gcov_flush, /* 112 = gcov_flush */ + no_sys, /* 113 = (getsid) */ +}; +/* This should not fail with "array size is negative": */ +extern int dummy[sizeof(call_vec) == NCALLS * sizeof(call_vec[0]) ? 1 : -1]; + +PUBLIC _PROTOTYPE (int (*pfs_call_vec[]), (void) ) = { + + no_sys, /* 0 */ + do_check_perms, /* 1 */ + do_verify_fd, /* 2 */ + do_set_filp, /* 3 */ + do_copy_filp, /* 4 */ + do_put_filp, /* 5 */ + do_cancel_fd /* 6 */ +}; diff --git a/servers/avfs/threads.h b/servers/avfs/threads.h new file mode 100644 index 000000000..02f03cdee --- /dev/null +++ b/servers/avfs/threads.h @@ -0,0 +1,35 @@ +#ifndef __VFS_WORKERS_H__ +#define __VFS_WORKERS_H__ +#include +#include "job.h" + +#define thread_t mthread_thread_t +#define mutex_t mthread_mutex_t +#define cond_t mthread_cond_t +#define attr_t mthread_attr_t + +#define threads_init mthread_init +#define yield mthread_yield +#define yield_all mthread_yield_all + +#define mutex_init mthread_mutex_init +#define mutex_destroy mthread_mutex_destroy +#define mutex_lock mthread_mutex_lock +#define mutex_trylock mthread_mutex_trylock +#define mutex_unlock mthread_mutex_unlock + +#define cond_init mthread_cond_init +#define cond_destroy mthread_cond_destroy +#define cond_wait mthread_cond_wait +#define cond_signal mthread_cond_signal + +struct worker_thread { + thread_t w_tid; + mutex_t w_event_mutex; + cond_t w_event; + struct job w_job; + struct fproc *w_fp; + struct worker_thread *w_next; +}; + +#endif diff --git a/servers/avfs/time.c b/servers/avfs/time.c new file mode 100644 index 000000000..315fc4b78 --- /dev/null +++ b/servers/avfs/time.c @@ -0,0 +1,66 @@ +/* This file takes care of those system calls that deal with time. + * + * The entry points into this file are + * do_utime: perform the UTIME system call + */ + +#include "fs.h" +#include +#include +#include "file.h" +#include "fproc.h" +#include "path.h" +#include "param.h" +#include "vnode.h" +#include +#include "vmnt.h" + +/*===========================================================================* + * do_utime * + *===========================================================================*/ +PUBLIC int do_utime() +{ +/* Perform the utime(name, timep) system call. */ + register int len; + int r; + time_t actime, modtime; + struct vnode *vp; + struct vmnt *vmp; + char fullpath[PATH_MAX+1]; + struct lookup resolve; + + lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); + resolve.l_vmnt_lock = VMNT_WRITE; + resolve.l_vnode_lock = VNODE_READ; + + /* Adjust for case of 'timep' being NULL; + * utime_strlen then holds the actual size: strlen(name)+1 */ + len = m_in.utime_length; + if (len == 0) len = m_in.utime_strlen; + + /* Temporarily open the file */ + if (fetch_name(m_in.utime_file, len, M1, fullpath) != OK) return(err_code); + if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code); + + /* Only the owner of a file or the super user can change its name. */ + r = OK; + if (vp->v_uid != fp->fp_effuid && fp->fp_effuid != SU_UID) r = EPERM; + if (m_in.utime_length == 0 && r != OK) r = forbidden(vp, W_BIT); + if (read_only(vp) != OK) r = EROFS; /* Not even su can touch if R/O */ + if (r == OK) { + /* Issue request */ + if(m_in.utime_length == 0) { + actime = modtime = clock_time(); + } else { + actime = m_in.utime_actime; + modtime = m_in.utime_modtime; + } + r = req_utime(vp->v_fs_e, vp->v_inode_nr, actime, modtime); + } + + unlock_vnode(vp); + unlock_vmnt(vmp); + + put_vnode(vp); + return(r); +} diff --git a/servers/avfs/tll.c b/servers/avfs/tll.c new file mode 100644 index 000000000..6ce2f2c80 --- /dev/null +++ b/servers/avfs/tll.c @@ -0,0 +1,310 @@ +/* This file contains the implementation of the three-level-lock. */ + +#include "fs.h" +#include "glo.h" +#include "tll.h" +#include "threads.h" +#include + +FORWARD _PROTOTYPE( int tll_append, (tll_t *tllp, tll_access_t locktype)); + +PRIVATE int tll_append(tll_t *tllp, tll_access_t locktype) +{ + struct worker_thread *queue; + + assert(self != NULL); + assert(tllp != NULL); + assert(locktype != TLL_NONE); + + /* Read-only and write-only requests go to the write queue. Read-serialized + * requests go to the serial queue. Then we wait for an event to signal it's + * our turn to go. */ + queue = NULL; + if (locktype == TLL_READ || locktype == TLL_WRITE) { + if (tllp->t_write == NULL) + tllp->t_write = self; + else + queue = tllp->t_write; + } else { + if (tllp->t_serial == NULL) + tllp->t_serial = self; + else + queue = tllp->t_serial; + } + + if (queue != NULL) { /* Traverse to end of queue */ + while (queue->w_next != NULL) queue = queue->w_next; + queue->w_next = self; + } + self->w_next = NULL; /* End of queue */ + + /* Now wait for the event it's our turn */ + worker_wait(); + + tllp->t_current = locktype; + tllp->t_status &= ~TLL_PEND; + tllp->t_owner = self; + + if (tllp->t_current == TLL_READ) { + tllp->t_readonly++; + tllp->t_owner = NULL; + } + + if (verbose) { + printf("got lock on tllp=%p with type %d (self=%p)\n", tllp, + locktype, self); + } + + /* Due to the way upgrading and downgrading works, read-only requests are + * scheduled to run after a downgraded lock is released (because they are + * queued on the write-only queue which has priority). This results from the + * fact that the downgrade operation cannot know whether the next locktype on + * the write-only queue is really write-only or actually read-only. However, + * that means that read-serialized requests stay queued, while they could run + * simultaneously with read-only requests. See if there are any and grant + * the head request access */ + if (tllp->t_current == TLL_READ && tllp->t_serial != NULL) { + tllp->t_owner = tllp->t_serial; + tllp->t_serial = tllp->t_serial->w_next; + tllp->t_owner->w_next = NULL; + assert(!(tllp->t_status & TLL_PEND)); + tllp->t_status |= TLL_PEND; + worker_signal(tllp->t_owner); + } + + return(OK); +} + +PUBLIC void tll_downgrade(tll_t *tllp) +{ +/* Downgrade three-level-lock tll from write-only to read-serialized, or from + * read-serialized to read-only. Caveat: as we can't know whether the next + * lock type on the write queue is actually read-only or write-only, we can't + * grant access to that type. It will be granted access once we unlock. Also, + * because we apply write-bias, we can't grant access to read-serialized + * either, unless nothing is queued on the write-only stack. */ + + assert(self != NULL); + assert(tllp != NULL); + assert(tllp->t_owner == self); + + switch(tllp->t_current) { + case TLL_WRITE: tllp->t_current = TLL_READSER; break; + case TLL_READSER: + /* If nothing is queued on write-only, but there is a pending lock + * requesting read-serialized, grant it and keep the lock type. */ + if (tllp->t_write == NULL && tllp->t_serial != NULL) { + tllp->t_owner = tllp->t_serial; + tllp->t_serial = tllp->t_serial->w_next; /* Remove head */ + tllp->t_owner->w_next = NULL; + assert(!(tllp->t_status & TLL_PEND)); + tllp->t_status |= TLL_PEND; + worker_signal(tllp->t_owner); + } else { + tllp->t_current = TLL_READ; + tllp->t_owner = NULL; + } + tllp->t_readonly++; /* Either way, there's one more read-only lock */ + break; + default: panic("VFS: Incorrect lock state"); + } +} + +PUBLIC void tll_init(tll_t *tllp) +{ +/* Initialize three-level-lock tll */ + assert(tllp != NULL); + + tllp->t_current = TLL_NONE; + tllp->t_readonly = 0; + tllp->t_status = TLL_DFLT; + tllp->t_write = NULL; + tllp->t_serial = NULL; + tllp->t_owner = NULL; +} + +PUBLIC int tll_islocked(tll_t *tllp) +{ + return(tllp->t_current != TLL_NONE); +} + +PUBLIC int tll_locked_by_me(tll_t *tllp) +{ + assert(self != NULL); + return(tllp->t_owner == self && !(tllp->t_status & TLL_PEND)); +} + +PUBLIC int tll_lock(tll_t *tllp, tll_access_t locktype) +{ +/* Try to lock three-level-lock tll with type locktype */ + + assert(self != NULL); + assert(tllp != NULL); + assert(locktype != TLL_NONE); + + self->w_next = NULL; + + if (locktype != TLL_READ && locktype != TLL_READSER && locktype != TLL_WRITE) + panic("Invalid lock type %d\n", locktype); + + /* If this locking has pending locks, we wait */ + if (tllp->t_status & TLL_PEND) + return tll_append(tllp, locktype); + + /* If we already own this lock don't lock it again and return immediately */ + if (tllp->t_owner == self) { + assert(tllp->t_status == TLL_DFLT); + return(EBUSY); + } + + /* If this lock is not accessed by anyone, locktype is granted off the bat */ + if (tllp->t_current == TLL_NONE) { + tllp->t_current = locktype; + if (tllp->t_current == TLL_READ) + tllp->t_readonly = 1; + else { /* Record owner if locktype is read-serialized or write-only */ + tllp->t_owner = self; + } + return(OK); + } + + /* If the current lock is write-only, we have to wait for that lock to be + * released (regardless of the value of locktype). */ + if (tllp->t_current == TLL_WRITE) + return tll_append(tllp, locktype); + + /* However, if it's not and we're requesting a write-only lock, we have to + * wait until the last read access is released (additional read requests + * after this write-only requests are to be queued) */ + if (locktype == TLL_WRITE) + return tll_append(tllp, locktype); + + /* We have to queue read and read-serialized requests if we have a write-only + * request queued ("write bias") or when a read-serialized lock is trying to + * upgrade to write-only. The current lock for this tll is either read or + * read-serialized. */ + if (tllp->t_write != NULL || (tllp->t_status & TLL_UPGR)) + return tll_append(tllp, locktype); + + /* If this lock is in read-serialized mode, we can allow read requests and + * queue read-serialized requests */ + if (tllp->t_current == TLL_READSER) { + if (locktype == TLL_READ) { + tllp->t_readonly++; + return(OK); + } else + return tll_append(tllp, locktype); + } + + /* Finally, if the current lock is read-only, we can change it to + * read-serialized if necessary without a problem. */ + tllp->t_current = locktype; /* Either read-only or read-serialized */ + if (tllp->t_current == TLL_READ) { /* We now have an additional reader */ + tllp->t_readonly++; + tllp->t_owner = NULL; + } else { + assert(tllp->t_current != TLL_WRITE); + tllp->t_owner = self; /* We now have a new owner */ + self->w_next = NULL; + } + + return(OK); +} + +PUBLIC int tll_haspendinglock(tll_t *tllp) +{ +/* Is someone trying to obtain a lock? */ + assert(tllp != NULL); + + /* Someone is trying to obtain a lock if either the write/read-only queue or + * the read-serialized queue is not empty. */ + return(tllp->t_write != NULL || tllp->t_serial != NULL); +} + +PUBLIC int tll_unlock(tll_t *tllp) +{ +/* Unlock a previously locked three-level-lock tll */ + int signal_owner = 0; + + assert(self != NULL); + assert(tllp != NULL); + + if (tllp->t_owner == NULL || tllp->t_owner != self) { + /* This unlock must have been done by a read-only lock */ + tllp->t_readonly--; + assert(tllp->t_readonly >= 0); + + /* If a read-serialized lock is trying to upgrade and there are no more + * read-only locks, the lock can now be upgraded to write-only */ + if ((tllp->t_status & TLL_UPGR) && tllp->t_readonly == 0) + signal_owner = 1; + } + + if(tllp->t_owner == self || (tllp->t_owner == NULL && tllp->t_readonly == 0)){ + /* Let another read-serialized or write-only request obtain access. + * Write-only has priority, but only after the last read-only access + * has left. Read-serialized access will only be granted if there is + * no pending write-only access request. */ + struct worker_thread *new_owner; + new_owner = NULL; + tllp->t_owner = NULL; /* Remove owner of lock */ + + if (tllp->t_write != NULL) { + if (tllp->t_readonly == 0) { + new_owner = tllp->t_write; + tllp->t_write = tllp->t_write->w_next; + } + } else if (tllp->t_serial != NULL) { + new_owner = tllp->t_serial; + tllp->t_serial = tllp->t_serial->w_next; + } + + /* New owner is head of queue or NULL if no proc is available */ + if (new_owner != NULL) { + tllp->t_owner = new_owner; + tllp->t_owner->w_next = NULL; + assert(tllp->t_owner != self); + signal_owner = 1; + } + } + + /* If no one is using this lock, mark it as not in use */ + if (tllp->t_owner == NULL && tllp->t_readonly == 0) + tllp->t_current = TLL_NONE; + + if (tllp->t_current == TLL_NONE || tllp->t_current == TLL_READ) { + if (!signal_owner) { + tllp->t_owner = NULL; + } + } + + /* If we have a new owner or the current owner managed to upgrade its lock, + * tell it to start/continue running */ + if (signal_owner) { + assert(!(tllp->t_status & TLL_PEND)); + tllp->t_status |= TLL_PEND; + worker_signal(tllp->t_owner); + } + + return(OK); +} + +PUBLIC void tll_upgrade(tll_t *tllp) +{ +/* Upgrade three-level-lock tll from read-serialized to write-only */ + + assert(self != NULL); + assert(tllp != NULL); + assert(tllp->t_owner == self); + assert(tllp->t_current != TLL_READ); /* i.e., read-serialized or write-only*/ + if (tllp->t_current == TLL_WRITE) return; /* Nothing to do */ + if (tllp->t_readonly != 0) { /* Wait for readers to leave */ + assert(!(tllp->t_status & TLL_UPGR)); + tllp->t_status |= TLL_UPGR; + worker_wait(); + tllp->t_status &= ~TLL_UPGR; + tllp->t_status &= ~TLL_PEND; + assert(tllp->t_readonly == 0); + } + tllp->t_current = TLL_WRITE; +} diff --git a/servers/avfs/tll.h b/servers/avfs/tll.h new file mode 100644 index 000000000..bfca394f3 --- /dev/null +++ b/servers/avfs/tll.h @@ -0,0 +1,20 @@ +#ifndef __VFS_TLL_H__ +#define __VFS_TLL_H__ + +/* Three-level-lock. Allows read-only, read-serialized, and write-only locks */ + +typedef enum { TLL_NONE, TLL_READ, TLL_READSER, TLL_WRITE } tll_access_t; +typedef enum { TLL_DFLT = 0x0, TLL_UPGR = 0x1, TLL_PEND = 0x2 } tll_status_t; + +typedef struct { + tll_access_t t_current; /* Current type of access to lock */ + struct worker_thread *t_owner;/* Owner of non-read-only lock */ + signed int t_readonly; /* No. of current read-only access */ + tll_status_t t_status; /* Lock status; nothing, pending upgrade, or + * pending upgrade of read-serialized to + * write-only */ + struct worker_thread *t_write;/* Write/read-only access requestors queue */ + struct worker_thread *t_serial;/* Read-serialized access requestors queue */ +} tll_t; + +#endif diff --git a/servers/avfs/utility.c b/servers/avfs/utility.c new file mode 100644 index 000000000..5ff6fd134 --- /dev/null +++ b/servers/avfs/utility.c @@ -0,0 +1,153 @@ +/* This file contains a few general purpose utility routines. + * + * The entry points into this file are + * clock_time: ask the clock task for the real time + * copy: copy a block of data + * fetch_name: go get a path name from user space + * no_sys: reject a system call that FS does not handle + * panic: something awful has occurred; MINIX cannot continue + * conv2: do byte swapping on a 16-bit int + * conv4: do byte swapping on a 32-bit long + * in_group: determines if group 'grp' is in rfp->fp_sgroups[] + */ + +#include "fs.h" +#include +#include +#include +#include +#include +#include "file.h" +#include "fproc.h" +#include "param.h" +#include "vmnt.h" + +/*===========================================================================* + * fetch_name * + *===========================================================================*/ +PUBLIC int fetch_name(path, len, flag, dest) +char *path; /* pointer to the path in user space */ +int len; /* path length, including 0 byte */ +int flag; /* M3 means path may be in message */ +char *dest; /* pointer to where path is to be stored */ +{ +/* Go get path and put it in 'user_fullpath'. + * If 'flag' = M3 and 'len' <= M3_STRING, the path is present in 'message'. + * If it is not, go copy it from user space. + */ + register char *rpu, *rpm; + int r, count; + + if (len > PATH_MAX) { + err_code = ENAMETOOLONG; + return(EGENERIC); + } + + /* Check name length for validity. */ + if (len <= 0) { + err_code = EINVAL; + return(EGENERIC); + } + + if (flag == M3 && len <= M3_STRING) { + /* Just copy the path from the message to 'user_fullpath'. */ + rpu = &dest[0]; + rpm = m_in.pathname; /* contained in input message */ + count = len; + do { *rpu++ = *rpm++; } while (--count); + r = OK; + } else { + /* String is not contained in the message. Get it from user space. */ + r = sys_datacopy(who_e, (vir_bytes) path, + VFS_PROC_NR, (vir_bytes) dest, (phys_bytes) len); + } + + if (dest[len - 1] != '\0') { + err_code = ENAMETOOLONG; + return(EGENERIC); + } + + return(r); +} + + +/*===========================================================================* + * no_sys * + *===========================================================================*/ +PUBLIC int no_sys() +{ +/* Somebody has used an illegal system call number */ + return(ENOSYS); +} + + +/*===========================================================================* + * isokendpt_f * + *===========================================================================*/ +PUBLIC int isokendpt_f(char *file, int line, endpoint_t endpoint, int *proc, int fatal) +{ + int failed = 0; + endpoint_t ke; + *proc = _ENDPOINT_P(endpoint); + if (endpoint == NONE) { + printf("VFS %s:%d: endpoint is NONE\n", file, line); + failed = 1; + } else if (*proc < 0 || *proc >= NR_PROCS) { + printf("VFS %s:%d: proc (%d) from endpoint (%d) out of range\n", + file, line, *proc, endpoint); + failed = 1; + } else if ((ke = fproc[*proc].fp_endpoint) != endpoint) { + if(ke == NONE) { + printf("VFS %s:%d: endpoint (%d) points to NONE slot (%d)\n", + file, line, endpoint, *proc); + assert(fproc[*proc].fp_pid == PID_FREE); + } else { + printf("VFS %s:%d: proc (%d) from endpoint (%d) doesn't match " + "known endpoint (%d)\n", file, line, *proc, endpoint, + fproc[*proc].fp_endpoint); + assert(fproc[*proc].fp_pid != PID_FREE); + } + failed = 1; + } + + if(failed && fatal) + panic("isokendpt_f failed"); + + return(failed ? EDEADEPT : OK); +} + + +/*===========================================================================* + * clock_time * + *===========================================================================*/ +PUBLIC time_t clock_time() +{ +/* This routine returns the time in seconds since 1.1.1970. MINIX is an + * astrophysically naive system that assumes the earth rotates at a constant + * rate and that such things as leap seconds do not exist. + */ + + register int r; + clock_t uptime; + time_t boottime; + + r = getuptime2(&uptime, &boottime); + if (r != OK) + panic("clock_time err: %d", r); + + return( (time_t) (boottime + (uptime/system_hz))); +} + +/*===========================================================================* + * in_group * + *===========================================================================*/ +PUBLIC int in_group(struct fproc *rfp, gid_t grp) +{ + int i; + + for (i = 0; i < rfp->fp_ngroups; i++) + if (rfp->fp_sgroups[i] == grp) + return(OK); + + return(EINVAL); +} diff --git a/servers/avfs/vmnt.c b/servers/avfs/vmnt.c new file mode 100644 index 000000000..cbb517f7f --- /dev/null +++ b/servers/avfs/vmnt.c @@ -0,0 +1,168 @@ +/* Virtual mount table related routines. + * + */ + +#include "fs.h" +#include "threads.h" +#include "vmnt.h" +#include +#include "fproc.h" + +FORWARD _PROTOTYPE( int is_vmnt_locked, (struct vmnt *vmp) ); + +/* Is vmp pointer reasonable? */ +#define SANEVMP(v) ((((v) >= &vmnt[0] && (v) < &vmnt[NR_MNTS]))) +#define BADVMP(v, f, l) printf("%s:%d: bad vmp %p\n", f, l, v) +/* vp check that panics */ +#define ASSERTVMP(v) if(!SANEVMP(v)) { \ + BADVMP(v, __FILE__, __LINE__); panic("bad vmp"); } + +#if LOCK_DEBUG +/*===========================================================================* + * check_vmnt_locks_by_me * + *===========================================================================*/ +PUBLIC void check_vmnt_locks_by_me(struct fproc *rfp) +{ +/* Check whether this thread still has locks held on vmnts */ + struct vmnt *vmp; + + for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; vmp++) { + if (tll_locked_by_me(&vmp->m_lock)) + panic("Thread %d still holds vmnt lock on vmp %p call_nr=%d\n", + mthread_self(), vmp, call_nr); + } + + if (rfp->fp_vmnt_rdlocks != 0) + panic("Thread %d still holds read locks on a vmnt (%d) call_nr=%d\n", + mthread_self(), rfp->fp_vmnt_rdlocks, call_nr); +} +#endif + +/*===========================================================================* + * check_vmnt_locks * + *===========================================================================*/ +PUBLIC void check_vmnt_locks() +{ + struct vmnt *vmp; + int count = 0; + + for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; vmp++) + if (is_vmnt_locked(vmp)) { + count++; + printf("vmnt %p is %s, fs_e=%d dev=%d\n", vmp, (tll_islocked(&vmp->m_lock) ? "locked":"pending locked"), vmp->m_fs_e, vmp->m_dev); + } + + if (count) panic("%d locked vmnts\n", count); +#if 0 + printf("check_vmnt_locks OK\n"); +#endif +} + +/*===========================================================================* + * get_free_vmnt * + *===========================================================================*/ +PUBLIC struct vmnt *get_free_vmnt(void) +{ + struct vmnt *vp; + + for (vp = &vmnt[0]; vp < &vmnt[NR_MNTS]; ++vp) + if (vp->m_dev == NO_DEV) return(vp); + + return(NULL); +} + +/*===========================================================================* + * find_vmnt * + *===========================================================================*/ +PUBLIC struct vmnt *find_vmnt(endpoint_t fs_e) +{ +/* Find the vmnt belonging to an FS with endpoint 'fs_e' iff it's in use */ + struct vmnt *vp; + + for (vp = &vmnt[0]; vp < &vmnt[NR_MNTS]; ++vp) + if (vp->m_fs_e == fs_e && vp->m_dev != NO_DEV) + return(vp); + + return(NULL); +} + +/*===========================================================================* + * init_vmnts * + *===========================================================================*/ +PUBLIC void init_vmnts(void) +{ +/* Initialize vmnt table */ + struct vmnt *vp; + + for (vp = &vmnt[0]; vp < &vmnt[NR_MNTS]; vp++) { + vp->m_fs_e = NONE; + vp->m_dev = NO_DEV; + vp->m_flags = 0; + vp->m_mounted_on = NULL; + vp->m_root_node = NULL; + vp->m_label[0] = '\0'; + vp->m_comm.c_max_reqs = 1; + vp->m_comm.c_cur_reqs = 0; + vp->m_comm.c_req_queue = NULL; + tll_init(&vp->m_lock); + } +} + +/*===========================================================================* + * is_vmnt_locked * + *===========================================================================*/ +PRIVATE int is_vmnt_locked(struct vmnt *vmp) +{ + ASSERTVMP(vmp); + return(tll_islocked(&vmp->m_lock) || tll_haspendinglock(&vmp->m_lock)); +} + +/*===========================================================================* + * lock_vmnt * + *===========================================================================*/ +PUBLIC int lock_vmnt(struct vmnt *vmp, tll_access_t locktype) +{ + int r; + tll_access_t initial_locktype; + + ASSERTVMP(vmp); + + initial_locktype = (locktype == VMNT_EXCL) ? VMNT_WRITE : locktype; + + r = tll_lock(&vmp->m_lock, initial_locktype); + + if (r == EBUSY) return(r); + + if (initial_locktype != locktype) { + tll_upgrade(&vmp->m_lock); + } + +#if LOCK_DEBUG + if (locktype == VMNT_READ) + fp->fp_vmnt_rdlocks++; +#endif + + return(OK); +} + +/*===========================================================================* + * unlock_vmnt * + *===========================================================================*/ +PUBLIC void unlock_vmnt(struct vmnt *vmp) +{ + ASSERTVMP(vmp); + +#if LOCK_DEBUG + /* Decrease read-only lock counter when not locked as VMNT_WRITE or + * VMNT_EXCL */ + if (!tll_locked_by_me(&vmp->m_lock)) + fp->fp_vmnt_rdlocks--; +#endif + + tll_unlock(&vmp->m_lock); + +#if LOCK_DEBUG + assert(!tll_locked_by_me(&vmp->m_lock)); +#endif + +} diff --git a/servers/avfs/vmnt.h b/servers/avfs/vmnt.h new file mode 100644 index 000000000..3143f045e --- /dev/null +++ b/servers/avfs/vmnt.h @@ -0,0 +1,24 @@ +#ifndef __VFS_VMNT_H__ +#define __VFS_VMNT_H__ + +EXTERN struct vmnt { + int m_fs_e; /* FS process' kernel endpoint */ + tll_t m_lock; + comm_t m_comm; + dev_t m_dev; /* device number */ + unsigned int m_flags; /* mount flags */ + struct vnode *m_mounted_on; /* vnode on which the partition is mounted */ + struct vnode *m_root_node; /* root vnode */ + char m_label[LABEL_MAX]; /* label of the file system process */ +} vmnt[NR_MNTS]; + +/* vmnt flags */ +#define VMNT_READONLY 01 /* Device mounted readonly */ +#define VMNT_BACKCALL 02 /* FS did back call */ + +/* vmnt lock types mapping */ +#define VMNT_READ TLL_READ +#define VMNT_WRITE TLL_READSER +#define VMNT_EXCL TLL_WRITE + +#endif diff --git a/servers/avfs/vnode.c b/servers/avfs/vnode.c new file mode 100644 index 000000000..5cd641780 --- /dev/null +++ b/servers/avfs/vnode.c @@ -0,0 +1,387 @@ +/* This file contains the routines related to vnodes. + * The entry points are: + * + * get_vnode - increase counter and get details of an inode + * get_free_vnode - get a pointer to a free vnode obj + * find_vnode - find a vnode according to the FS endpoint and the inode num. + * dup_vnode - duplicate vnode (i.e. increase counter) + * put_vnode - drop vnode (i.e. decrease counter) + */ + +#include "fs.h" +#include "threads.h" +#include "vnode.h" +#include "vmnt.h" +#include "fproc.h" +#include "file.h" +#include +#include + +/* Is vnode pointer reasonable? */ +#if NDEBUG +#define SANEVP(v) +#define CHECKVN(v) +#define ASSERTVP(v) +#else +#define SANEVP(v) ((((v) >= &vnode[0] && (v) < &vnode[NR_VNODES]))) + +#define BADVP(v, f, l) printf("%s:%d: bad vp %p\n", f, l, v) + +/* vp check that returns 0 for use in check_vrefs() */ +#define CHECKVN(v) if(!SANEVP(v)) { \ + BADVP(v, __FILE__, __LINE__); \ + return 0; \ +} + +/* vp check that panics */ +#define ASSERTVP(v) if(!SANEVP(v)) { \ + BADVP(v, __FILE__, __LINE__); panic("bad vp"); } +#endif + +#if LOCK_DEBUG +/*===========================================================================* + * check_vnode_locks_by_me * + *===========================================================================*/ +PUBLIC void check_vnode_locks_by_me(struct fproc *rfp) +{ +/* Check whether this thread still has locks held on vnodes */ + struct vnode *vp; + + for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; vp++) { + if (tll_locked_by_me(&vp->v_lock)) { + panic("Thread %d still holds vnode lock on vp %x call_nr=%d\n", + mthread_self(), vp, call_nr); + } + } + + if (rfp->fp_vp_rdlocks != 0) + panic("Thread %d still holds read locks on a vnode (%d) call_nr=%d\n", + mthread_self(), rfp->fp_vp_rdlocks, call_nr); +} +#endif + +/*===========================================================================* + * check_vnode_locks * + *===========================================================================*/ +PUBLIC void check_vnode_locks() +{ + struct vnode *vp; + int count = 0; + + for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; vp++) + if (is_vnode_locked(vp)) { + count++; + } + + if (count) panic("%d locked vnodes\n", count); +#if 0 + printf("check_vnode_locks OK\n"); +#endif +} + +/*===========================================================================* + * get_free_vnode * + *===========================================================================*/ +PUBLIC struct vnode *get_free_vnode() +{ +/* Find a free vnode slot in the vnode table (it's not actually allocated) */ + struct vnode *vp; + + for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp) { + if (vp->v_ref_count == 0 && !is_vnode_locked(vp)) { + vp->v_pipe = NO_PIPE; + vp->v_uid = -1; + vp->v_gid = -1; + vp->v_sdev = NO_DEV; + vp->v_mapfs_e = NONE; + vp->v_mapfs_count = 0; + vp->v_mapinode_nr = 0; + return(vp); + } + } + + err_code = ENFILE; + return(NULL); +} + + +/*===========================================================================* + * find_vnode * + *===========================================================================*/ +PUBLIC struct vnode *find_vnode(int fs_e, int ino) +{ +/* Find a specified (FS endpoint and inode number) vnode in the + * vnode table */ + struct vnode *vp; + + for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp) + if (vp->v_ref_count > 0 && vp->v_inode_nr == ino && vp->v_fs_e == fs_e) + return(vp); + + return(NULL); +} + +/*===========================================================================* + * is_vnode_locked * + *===========================================================================*/ +PUBLIC int is_vnode_locked(struct vnode *vp) +{ +/* Find out whether a thread holds a lock on this vnode or is trying to obtain + * a lock. */ + ASSERTVP(vp); + + return(tll_islocked(&vp->v_lock) || tll_haspendinglock(&vp->v_lock)); +} + +/*===========================================================================* + * init_vnodes * + *===========================================================================*/ +PUBLIC void init_vnodes(void) +{ + struct vnode *vp; + + for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp) { + vp->v_fs_e = NONE; + vp->v_mapfs_e = NONE; + vp->v_inode_nr = 0; + vp->v_ref_count = 0; + vp->v_fs_count = 0; + vp->v_mapfs_count = 0; + tll_init(&vp->v_lock); + } +} + +/*===========================================================================* + * lock_vnode * + *===========================================================================*/ +PUBLIC int lock_vnode(struct vnode *vp, tll_access_t locktype) +{ + int r; + + ASSERTVP(vp); + + r = tll_lock(&vp->v_lock, locktype); + +#if LOCK_DEBUG + if (locktype == VNODE_READ) { + fp->fp_vp_rdlocks++; + } +#endif + + if (r == EBUSY) return(r); + return(OK); +} + +/*===========================================================================* + * unlock_vnode * + *===========================================================================*/ +PUBLIC void unlock_vnode(struct vnode *vp) +{ + int i; + register struct vnode *rvp; + struct worker_thread *w; + ASSERTVP(vp); + +#if LOCK_DEBUG + /* Decrease read-only lock counter when not locked as VNODE_OPCL or + * VNODE_WRITE */ + if (!tll_locked_by_me(&vp->v_lock)) { + fp->fp_vp_rdlocks--; + } + + for (i = 0; i < NR_VNODES; i++) { + rvp = &vnode[i]; + + w = rvp->v_lock.t_write; + assert(w != self); + while (w && w->w_next != NULL) { + w = w->w_next; + assert(w != self); + } + + w = rvp->v_lock.t_serial; + assert(w != self); + while (w && w->w_next != NULL) { + w = w->w_next; + assert(w != self); + } + } +#endif + + tll_unlock(&vp->v_lock); +} + +/*===========================================================================* + * dup_vnode * + *===========================================================================*/ +PUBLIC void dup_vnode(struct vnode *vp) +{ +/* dup_vnode() is called to increment the vnode and therefore the + * referred inode's counter. + */ + ASSERTVP(vp); + vp->v_ref_count++; +} + + +/*===========================================================================* + * put_vnode * + *===========================================================================*/ +PUBLIC void put_vnode(struct vnode *vp) +{ +/* Decrease vnode's usage counter and decrease inode's usage counter in the + * corresponding FS process. Decreasing the fs_count each time we decrease the + * ref count would lead to poor performance. Instead, only decrease fs_count + * when the ref count hits zero. However, this could lead to fs_count to wrap. + * To prevent this, we drop the counter to 1 when the counter hits 256. + * We maintain fs_count as a sanity check to make sure VFS and the FS are in + * sync. + */ + int r, lock_vp; + + ASSERTVP(vp); + + /* Lock vnode. It's quite possible this thread already has a lock on this + * vnode. That's no problem, because the reference counter will not decrease + * to zero in that case. However, if the counter does decrease to zero *and* + * is already locked, we have a consistency problem somewhere. */ + lock_vp = lock_vnode(vp, VNODE_OPCL); + + if (vp->v_ref_count > 1) { + /* Decrease counter */ + vp->v_ref_count--; + if (vp->v_fs_count > 256) + vnode_clean_refs(vp); + if (lock_vp != EBUSY) unlock_vnode(vp); + return; + } + + /* If we already had a lock, there is a consistency problem */ + assert(lock_vp != EBUSY); + tll_upgrade(&vp->v_lock); /* Make sure nobody else accesses this vnode */ + + /* A vnode that's not in use can't be put back. */ + if (vp->v_ref_count <= 0) + panic("put_vnode failed: bad v_ref_count %d\n", vp->v_ref_count); + + /* fs_count should indicate that the file is in use. */ + if (vp->v_fs_count <= 0) + panic("put_vnode failed: bad v_fs_count %d\n", vp->v_fs_count); + + /* Tell FS we don't need this inode to be open anymore. */ + r = req_putnode(vp->v_fs_e, vp->v_inode_nr, vp->v_fs_count); + + if (r != OK) { + printf("VFS: putnode failed: %d\n", r); + util_stacktrace(); + } + + /* This inode could've been mapped. If so, tell mapped FS to close it as + * well. If mapped onto same FS, this putnode is not needed. */ + if (vp->v_mapfs_e != NONE && vp->v_mapfs_e != vp->v_fs_e) + req_putnode(vp->v_mapfs_e, vp->v_mapinode_nr, vp->v_mapfs_count); + + vp->v_fs_count = 0; + vp->v_ref_count = 0; + vp->v_mapfs_count = 0; + + unlock_vnode(vp); +} + + +/*===========================================================================* + * vnode_clean_refs * + *===========================================================================*/ +PUBLIC void vnode_clean_refs(struct vnode *vp) +{ +/* Tell the underlying FS to drop all reference but one. */ + + if (vp == NULL) return; + if (vp->v_fs_count <= 1) return; /* Nothing to do */ + + /* Drop all references except one */ + req_putnode(vp->v_fs_e, vp->v_inode_nr, vp->v_fs_count - 1); + vp->v_fs_count = 1; +} + + +#define REFVP(v) { vp = (v); CHECKVN(v); vp->v_ref_check++; } + +#if DO_SANITYCHECKS +/*===========================================================================* + * check_vrefs * + *===========================================================================*/ +PUBLIC int check_vrefs() +{ + int i, bad; + int ispipe_flag, ispipe_mode; + struct vnode *vp; + struct vmnt *vmp; + struct fproc *rfp; + struct filp *f; + + /* Clear v_ref_check */ + for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp) + vp->v_ref_check= 0; + + /* Count reference for processes */ + for (rfp=&fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) { + if (rfp->fp_pid == PID_FREE) + continue; + if(rfp->fp_rd) REFVP(rfp->fp_rd); + if(rfp->fp_wd) REFVP(rfp->fp_wd); + } + + /* Count references from filedescriptors */ + for (f = &filp[0]; f < &filp[NR_FILPS]; f++) + { + if (f->filp_count == 0) + continue; + REFVP(f->filp_vno); + } + + /* Count references to mount points */ + for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) + { + if (vmp->m_dev == NO_DEV) + continue; + REFVP(vmp->m_root_node); + if(vmp->m_mounted_on) + REFVP(vmp->m_mounted_on); + } + + /* Check references */ + bad= 0; + for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp) + { + if (vp->v_ref_count != vp->v_ref_check) + { + printf( +"Bad reference count for inode %d on device 0x%x: found %d, listed %d\n", + vp->v_inode_nr, vp->v_dev, vp->v_ref_check, + vp->v_ref_count); + printf("last marked at %s, %d\n", + vp->v_file, vp->v_line); + bad= 1; + } + + /* Also check v_pipe */ + if (vp->v_ref_count != 0) + { + ispipe_flag= (vp->v_pipe == I_PIPE); + ispipe_mode= ((vp->v_mode & I_TYPE) == I_NAMED_PIPE); + if (ispipe_flag != ispipe_mode) + { + printf( +"Bad v_pipe for inode %d on device 0x%x: found %d, mode 0%o\n", + vp->v_inode_nr, vp->v_dev, vp->v_pipe, + vp->v_mode); + printf("last marked at %s, %d\n", + vp->v_file, vp->v_line); + bad= 1; + } + } + } + return !bad; +} +#endif diff --git a/servers/avfs/vnode.h b/servers/avfs/vnode.h new file mode 100644 index 000000000..babb6aab4 --- /dev/null +++ b/servers/avfs/vnode.h @@ -0,0 +1,40 @@ +#ifndef __VFS_VNODE_H__ +#define __VFS_VNODE_H__ + +EXTERN struct vnode { + endpoint_t v_fs_e; /* FS process' endpoint number */ + endpoint_t v_mapfs_e; /* mapped FS process' endpoint number */ + ino_t v_inode_nr; /* inode number on its (minor) device */ + ino_t v_mapinode_nr; /* mapped inode number of mapped FS. */ + mode_t v_mode; /* file type, protection, etc. */ + uid_t v_uid; /* uid of inode. */ + gid_t v_gid; /* gid of inode. */ + off_t v_size; /* current file size in bytes */ + int v_ref_count; /* # times vnode used; 0 means slot is free */ + int v_fs_count; /* # reference at the underlying FS */ + int v_mapfs_count; /* # reference at the underlying mapped FS */ +#if 0 + int v_ref_check; /* for consistency checks */ +#endif + char v_pipe; /* set to I_PIPE if pipe */ + off_t v_pipe_rd_pos; + off_t v_pipe_wr_pos; + endpoint_t v_bfs_e; /* endpoint number for the FS proces in case + of a block special file */ + dev_t v_dev; /* device number on which the corresponding + inode resides */ + dev_t v_sdev; /* device number for special files */ + struct vmnt *v_vmnt; /* vmnt object of the partition */ + tll_t v_lock; /* three-level-lock */ +} vnode[NR_VNODES]; + + +/* Field values. */ +#define NO_PIPE 0 /* i_pipe is NO_PIPE if inode is not a pipe */ +#define I_PIPE 1 /* i_pipe is I_PIPE if inode is a pipe */ + +/* vnode lock types mapping */ +#define VNODE_READ TLL_READ +#define VNODE_OPCL TLL_READSER +#define VNODE_WRITE TLL_WRITE +#endif diff --git a/servers/avfs/worker.c b/servers/avfs/worker.c new file mode 100644 index 000000000..f8449109e --- /dev/null +++ b/servers/avfs/worker.c @@ -0,0 +1,336 @@ +#include "fs.h" +#include "glo.h" +#include "fproc.h" +#include "threads.h" +#include "job.h" +#include + +FORWARD _PROTOTYPE( void append_job, (struct job *job, + void *(*func)(void *arg)) ); +FORWARD _PROTOTYPE( void get_work, (struct worker_thread *worker) ); +FORWARD _PROTOTYPE( void *worker_main, (void *arg) ); +FORWARD _PROTOTYPE( void worker_sleep, (struct worker_thread *worker) ); +FORWARD _PROTOTYPE( void worker_wake, (struct worker_thread *worker) ); +PRIVATE int init = 0; +PRIVATE mthread_attr_t tattr; + +#ifdef MKCOVERAGE +# define TH_STACKSIZE (10 * 1024) +#else +# define TH_STACKSIZE (6 * 1024) +#endif + +#define ASSERTW(w) assert((w) == &sys_worker || (w) == &dl_worker || \ + ((w) >= &workers[0] && (w) < &workers[NR_WTHREADS])); + +/*===========================================================================* + * worker_init * + *===========================================================================*/ +PUBLIC void worker_init(struct worker_thread *worker) +{ +/* Initialize worker thread */ + if (!init) { + threads_init(); + assert(mthread_attr_init(&tattr) == 0); + if (mthread_attr_setstacksize(&tattr, TH_STACKSIZE) != 0) + panic("couldn't set default thread stack size"); + if (mthread_attr_setdetachstate(&tattr, MTHREAD_CREATE_DETACHED) != 0) + panic("couldn't set default thread detach state"); + pending = 0; + init = 1; + } + + ASSERTW(worker); + + worker->w_job.j_func = NULL; /* Mark not in use */ + worker->w_next = NULL; + assert(mutex_init(&worker->w_event_mutex, NULL) == 0); + assert(cond_init(&worker->w_event, NULL) == 0); + assert(mthread_create(&worker->w_tid, &tattr, worker_main, (void *) worker) == 0); + yield(); +} + +/*===========================================================================* + * get_work * + *===========================================================================*/ +PRIVATE void get_work(struct worker_thread *worker) +{ +/* Find new work to do. Work can be 'queued', 'pending', or absent. In the + * latter case wait for new work to come in. */ + + struct job *new_job; + struct fproc *rfp; + + ASSERTW(worker); + self = worker; + + /* Do we have queued work to do? */ + if ((new_job = worker->w_job.j_next) != NULL) { + worker->w_job = *new_job; + free(new_job); + return; + } else if (worker != &sys_worker && worker != &dl_worker && pending > 0) { + /* Find pending work */ + for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) { + if (rfp->fp_flags & FP_PENDING) { + worker->w_job = rfp->fp_job; + rfp->fp_job.j_func = NULL; + rfp->fp_flags &= ~FP_PENDING; /* No longer pending */ + pending--; + assert(pending >= 0); + return; + } + } + panic("Pending work inconsistency"); + } + + /* Wait for work to come to us */ + worker_sleep(worker); +} + +/*===========================================================================* + * worker_available * + *===========================================================================*/ +PUBLIC int worker_available(void) +{ + int busy, i; + + busy = 0; + for (i = 0; i < NR_WTHREADS; i++) { + if (workers[i].w_job.j_func != NULL) + busy++; + } + + return(NR_WTHREADS - busy); +} + +/*===========================================================================* + * worker_main * + *===========================================================================*/ +PRIVATE void *worker_main(void *arg) +{ +/* Worker thread main loop */ + struct worker_thread *me; + + me = (struct worker_thread *) arg; + ASSERTW(me); + + while(TRUE) { + get_work(me); + + /* Register ourselves in fproc table if possible */ + if (me->w_job.j_fp != NULL) { + me->w_job.j_fp->fp_wtid = me->w_tid; + } + + /* Carry out work */ + me->w_job.j_func(&me->w_job); + + /* Mark ourselves as done */ + me->w_job.j_func = NULL; + } + + return(NULL); /* Unreachable */ +} + +/*===========================================================================* + * dl_worker_start * + *===========================================================================*/ +PUBLIC void dl_worker_start(void *(*func)(void *arg)) +{ +/* Start the deadlock resolving worker. This worker is reserved to run in case + * all other workers are busy and we have to have an additional worker to come + * to the rescue. */ + assert(dl_worker.w_job.j_func == NULL); + + if (dl_worker.w_job.j_func == NULL) { + dl_worker.w_job.j_fp = fp; + dl_worker.w_job.j_m_in = m_in; + dl_worker.w_job.j_func = func; + worker_wake(&dl_worker); + } +} + +/*===========================================================================* + * sys_worker_start * + *===========================================================================*/ +PUBLIC void sys_worker_start(void *(*func)(void *arg)) +{ +/* Carry out work for the system (i.e., kernel or PM). If this thread is idle + * do it right away, else create new job and append it to the queue. */ + + if (sys_worker.w_job.j_func == NULL) { + sys_worker.w_job.j_fp = fp; + sys_worker.w_job.j_m_in = m_in; + sys_worker.w_job.j_func = func; + worker_wake(&sys_worker); + } else { + append_job(&sys_worker.w_job, func); + } +} + +/*===========================================================================* + * append_job * + *===========================================================================*/ +PRIVATE void append_job(struct job *job, void *(*func)(void *arg)) +{ +/* Append a job */ + + struct job *new_job, *tail; + + /* Create new job */ + new_job = calloc(1, sizeof(struct job)); + assert(new_job != NULL); + new_job->j_fp = fp; + new_job->j_m_in = m_in; + new_job->j_func = func; + new_job->j_next = NULL; + + /* Append to queue */ + tail = job; + while (tail->j_next != NULL) tail = tail->j_next; + tail->j_next = new_job; +} + +/*===========================================================================* + * worker_start * + *===========================================================================*/ +PUBLIC void worker_start(void *(*func)(void *arg)) +{ +/* Find an available worker or wait for one */ + int i; + struct worker_thread *worker; + + worker = NULL; + for (i = 0; i < NR_WTHREADS; i++) { + if (workers[i].w_job.j_func == NULL) { + worker = &workers[i]; + break; + } + } + + if (worker != NULL) { + worker->w_job.j_fp = fp; + worker->w_job.j_m_in = m_in; + worker->w_job.j_func = func; + worker->w_job.j_next = NULL; + worker_wake(worker); + return; + } + + /* No worker threads available, let's wait for one to finish. */ + /* If this process already has a job scheduled, forget about this new + * job; + * - the new job is do_dummy and we have already scheduled an actual job + * - the new job is an actual job and we have already scheduled do_dummy in + * order to exit this proc, so doing the new job is pointless. */ + if (fp->fp_job.j_func == NULL) { + assert(!(fp->fp_flags & FP_PENDING)); + fp->fp_job.j_fp = fp; + fp->fp_job.j_m_in = m_in; + fp->fp_job.j_func = func; + fp->fp_job.j_next = NULL; + fp->fp_flags |= FP_PENDING; + pending++; + } +} + +/*===========================================================================* + * worker_sleep * + *===========================================================================*/ +PRIVATE void worker_sleep(struct worker_thread *worker) +{ + ASSERTW(worker); + assert(self == worker); + assert(mutex_lock(&worker->w_event_mutex) == 0); + assert(cond_wait(&worker->w_event, &worker->w_event_mutex) == 0); + assert(mutex_unlock(&worker->w_event_mutex) == 0); + self = worker; +} + +/*===========================================================================* + * worker_wake * + *===========================================================================*/ +PRIVATE void worker_wake(struct worker_thread *worker) +{ +/* Signal a worker to wake up */ + ASSERTW(worker); + assert(mutex_lock(&worker->w_event_mutex) == 0); + assert(cond_signal(&worker->w_event) == 0); + assert(mutex_unlock(&worker->w_event_mutex) == 0); +} + +/*===========================================================================* + * worker_wait * + *===========================================================================*/ +PUBLIC void worker_wait(void) +{ + struct worker_thread *worker; + + worker = worker_self(); + worker->w_job.j_m_in = m_in; /* Store important global data */ + assert(fp == worker->w_job.j_fp); + worker_sleep(worker); + /* We continue here after waking up */ + fp = worker->w_job.j_fp; /* Restore global data */ + m_in = worker->w_job.j_m_in; + assert(worker->w_next == NULL); +} + +/*===========================================================================* + * worker_signal * + *===========================================================================*/ +PUBLIC void worker_signal(struct worker_thread *worker) +{ + ASSERTW(worker); /* Make sure we have a valid thread */ + worker_wake(worker); +} + +/*===========================================================================* + * worker_self * + *===========================================================================*/ +PUBLIC struct worker_thread *worker_self(void) +{ + struct worker_thread *worker; + worker = worker_get(mthread_self()); + assert(worker != NULL); + return(worker); +} + +/*===========================================================================* + * worker_get * + *===========================================================================*/ +PUBLIC struct worker_thread *worker_get(thread_t worker_tid) +{ + int i; + struct worker_thread *worker; + + worker = NULL; + if (worker_tid == sys_worker.w_tid) + worker = &sys_worker; + else if (worker_tid == dl_worker.w_tid) + worker = &dl_worker; + else { + for (i = 0; i < NR_WTHREADS; i++) { + if (workers[i].w_tid == worker_tid) { + worker = &workers[i]; + break; + } + } + } + + return(worker); +} + +/*===========================================================================* + * worker_getjob * + *===========================================================================*/ +PUBLIC struct job *worker_getjob(thread_t worker_tid) +{ + struct worker_thread *worker; + + if ((worker = worker_get(worker_tid)) != NULL) + return(&worker->w_job); + + return(NULL); +} diff --git a/servers/avfs/write.c b/servers/avfs/write.c new file mode 100644 index 000000000..b602eff1e --- /dev/null +++ b/servers/avfs/write.c @@ -0,0 +1,19 @@ +/* This file is the counterpart of "read.c". It contains the code for writing + * insofar as this is not contained in read_write(). + * + * The entry points into this file are + * do_write: call read_write to perform the WRITE system call + */ + +#include "fs.h" +#include "file.h" + + +/*===========================================================================* + * do_write * + *===========================================================================*/ +PUBLIC int do_write() +{ +/* Perform the write(fd, buffer, nbytes) system call. */ + return(read_write(WRITING)); +} diff --git a/servers/is/Makefile b/servers/is/Makefile index 811068304..225689c9d 100644 --- a/servers/is/Makefile +++ b/servers/is/Makefile @@ -1,4 +1,8 @@ # Makefile for Information Server (IS) +# + +.include + PROG= is SRCS= main.c dmp.c dmp_kernel.c dmp_pm.c dmp_fs.c dmp_rs.c dmp_ds.c dmp_vm.c @@ -13,4 +17,8 @@ CPPFLAGS.dmp_kernel.c+= -I${MINIXSRCDIR} CPPFLAGS.dmp_rs.c+= -I${MINIXSRCDIR} CPPFLAGS.dmp_vm.c+= -I${MINIXSRCDIR} +.if ${BUILDAVFS} == "yes" +CFLAGS+= -D_USEAVFS +.endif + .include diff --git a/servers/is/dmp_fs.c b/servers/is/dmp_fs.c index aba188dd9..ae3691070 100644 --- a/servers/is/dmp_fs.c +++ b/servers/is/dmp_fs.c @@ -10,9 +10,15 @@ #include "inc.h" #include "../mfs/const.h" -#include "../vfs/const.h" -#include "../vfs/fproc.h" -#include "../vfs/dmap.h" +#if defined(_USEAVFS) +# include "../avfs/const.h" +# include "../avfs/fproc.h" +# include "../avfs/dmap.h" +#else +# include "../vfs/const.h" +# include "../vfs/fproc.h" +# include "../vfs/dmap.h" +#endif #include PUBLIC struct fproc fproc[NR_PROCS]; @@ -35,6 +41,7 @@ PUBLIC void fproc_dmp() fp = &fproc[i]; if (fp->fp_pid <= 0) continue; if (++n > 22) break; +#if defined(_USEVFS) printf("%3d %4d %2d/%d 0x%05x %2d (%2d) %2d (%2d) %3d %3d %3d ", i, fp->fp_pid, ((fp->fp_tty>>MAJOR)&BYTE), ((fp->fp_tty>>MINOR)&BYTE), @@ -43,6 +50,16 @@ PUBLIC void fproc_dmp() fp->fp_sesldr, fp->fp_blocked_on, !!fp->fp_revived ); +#else + printf("%3d %4d %2d/%d 0x%05x %2d (%2d) %2d (%2d) %3d %3d %3d ", + i, fp->fp_pid, + major(fp->fp_tty), minor(fp->fp_tty), + fp->fp_umask, + fp->fp_realuid, fp->fp_effuid, fp->fp_realgid, fp->fp_effgid, + !!(fp->fp_flags & FP_SESLDR), + fp->fp_blocked_on, !!(fp->fp_flags & FP_REVIVED) + ); +#endif if (fp->fp_blocked_on == FP_BLOCKED_ON_OTHER) printf("%4d\n", fp->fp_task); else diff --git a/servers/procfs/Makefile b/servers/procfs/Makefile index 04356f9d1..5836e8dab 100644 --- a/servers/procfs/Makefile +++ b/servers/procfs/Makefile @@ -1,9 +1,17 @@ # Makefile for ProcFS server +# + +.include + PROG= procfs SRCS= buf.c main.c pid.c root.c tree.c util.c cpuinfo.c CPPFLAGS+= -I${MINIXSRCDIR} -I${MINIXSRCDIR}/servers +.if ${BUILDAVFS} == "yes" +CFLAGS+= -D_USEAVFS +.endif + DPADD+= ${LIBVTREEFS} ${LIBSYS} LDADD+= -lvtreefs -lsys diff --git a/servers/procfs/inc.h b/servers/procfs/inc.h index 1a51668ea..4240e076a 100644 --- a/servers/procfs/inc.h +++ b/servers/procfs/inc.h @@ -50,8 +50,13 @@ #include "kernel/type.h" #include "kernel/proc.h" #include "pm/mproc.h" -#include "vfs/const.h" -#include "vfs/fproc.h" +#if defined(_USEAVFS) +# include "avfs/const.h" +# include "avfs/fproc.h" +#else +# include "vfs/const.h" +# include "vfs/fproc.h" +#endif #include #include diff --git a/share/mk/bsd.own.mk b/share/mk/bsd.own.mk index 770dfcf34..c4192bfd3 100644 --- a/share/mk/bsd.own.mk +++ b/share/mk/bsd.own.mk @@ -12,6 +12,8 @@ SMP_FLAGS += -DCONFIG_MAX_CPUS=${CONFIG_MAX_CPUS} CPPFLAGS+= ${SMP_FLAGS} +BUILDAVFS?= "no" + MAKECONF?= /etc/make.conf .-include "${MAKECONF}" diff --git a/tools/Makefile b/tools/Makefile index ce5c27621..a359ff748 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -9,19 +9,26 @@ MDEC= /usr/mdec GEN_FILES= *.bak image kernel *.iso *.iso.gz cdfdimage rootimage src # Specify the programs that are part of the system image. +.if ${BUILDAVFS} == "yes" +VFS= "../servers/avfs/vfs" +PFS= "../servers/apfs/pfs" +.else +VFS= "../servers/vfs/vfs" +PFS= "../servers/pfs/pfs" +.endif KERNEL= kernel PROGRAMS= \ ../servers/ds/ds \ ../servers/rs/rs \ ../servers/pm/pm \ ../servers/sched/sched \ - ../servers/vfs/vfs \ + ${VFS} \ ../drivers/memory/memory \ ../drivers/log/log \ ../drivers/tty/tty \ ../servers/mfs/mfs \ ../servers/vm/vm \ - ../servers/pfs/pfs \ + ${PFS} \ ../servers/init/init usage: