From: Thomas Veerman <thomas@minix3.org>
Date: Wed, 17 Aug 2011 13:23:45 +0000 (+0000)
Subject: Merge AVFS and APFS
X-Git-Tag: v3.2.0~354
X-Git-Url: http://zhaoyanbai.com/repos/?a=commitdiff_plain;h=a6bd3f4a2260809e96dc423b3173db3fb79a4c21;p=minix.git

Merge AVFS and APFS
---

diff --git a/servers/Makefile b/servers/Makefile
index f5e7140e7..05a2bc6ce 100644
--- a/servers/Makefile
+++ b/servers/Makefile
@@ -3,14 +3,22 @@
 
 .include <bsd.own.mk>
 
+.if ${BUILDAVFS} == "yes"
+VFS= "avfs"
+PFS= "apfs"
+.else
+VFS= "vfs"
+PFS= "pfs"
+.endif
+
 .if ${MKIMAGEONLY} == "yes"
 
-SUBDIR=	ds init mfs pfs pm rs sched vfs vm
+SUBDIR=	ds init mfs ${PFS} pm rs sched ${VFS} vm
 
 .else
 
 SUBDIR=	ds ext2 hgfs inet init ipc is iso9660fs \
-	mfs pfs pm procfs rs sched vfs vm devman
+	mfs ${PFS} pm procfs rs sched ${VFS} vm devman
 
 .endif
 
diff --git a/servers/apfs/Makefile b/servers/apfs/Makefile
new file mode 100644
index 000000000..b713d2dee
--- /dev/null
+++ b/servers/apfs/Makefile
@@ -0,0 +1,14 @@
+# Makefile for Pipe File System (PFS)
+PROG=	pfs
+SRCS=	open.c table.c inode.c main.c super.c link.c \
+	buffer.c read.c misc.c mount.c utility.c stadir.c \
+	uds.c dev_uds.c
+
+DPADD+=	${LIBDRIVER} ${LIBSYS}
+LDADD+=	-ldriver -lsys
+
+MAN=
+
+BINDIR?= /usr/sbin
+
+.include <minix.bootprog.mk>
diff --git a/servers/apfs/buf.h b/servers/apfs/buf.h
new file mode 100644
index 000000000..cd31f7e84
--- /dev/null
+++ b/servers/apfs/buf.h
@@ -0,0 +1,26 @@
+#ifndef __PFS_BUF_H__
+#define __PFS_BUF_H__
+
+/* Buffer (block) cache.
+ */
+
+struct buf {
+  /* Data portion of the buffer. */
+  char b_data[PIPE_BUF];     /* ordinary user data */
+
+  /* Header portion of the buffer. */
+  struct buf *b_next;           /* used to link all free bufs in a chain */
+  struct buf *b_prev;           /* used to link all free bufs the other way */
+  ino_t b_num;			/* inode number on minor device */
+  dev_t b_dev;                  /* major | minor device where block resides */
+  int b_bytes;                  /* Number of bytes allocated in bp */
+  int b_count;			/* Number of users of this buffer */
+};
+
+/* A block is free if b_dev == NO_DEV. */
+
+
+EXTERN struct buf *front;	/* points to least recently used free block */
+EXTERN struct buf *rear;	/* points to most recently used free block */
+
+#endif
diff --git a/servers/apfs/buffer.c b/servers/apfs/buffer.c
new file mode 100644
index 000000000..a5c77b4a6
--- /dev/null
+++ b/servers/apfs/buffer.c
@@ -0,0 +1,103 @@
+#include "fs.h"
+#include "buf.h"
+#include "inode.h"
+#include <sys/types.h>
+#include <stdlib.h>
+#include <string.h>
+
+FORWARD _PROTOTYPE( struct buf *new_block, (dev_t dev, ino_t inum)			);
+
+/*===========================================================================*
+ *                              buf_pool                                     *
+ *===========================================================================*/
+PUBLIC void buf_pool(void)
+{
+/* Initialize the buffer pool. */
+
+  front = NULL;
+  rear = NULL;
+}
+
+
+
+/*===========================================================================*
+ *				get_block				     *
+ *===========================================================================*/
+PUBLIC struct buf *get_block(dev_t dev, ino_t inum)
+{
+  struct buf *bp = front;
+
+  while(bp != NULL) {
+	if (bp->b_dev == dev && bp->b_num == inum) {
+		bp->b_count++;
+		return(bp);
+	}
+	bp = bp->b_next;
+  }
+
+  /* Buffer was not found. Try to allocate a new one */
+  return new_block(dev, inum);
+}
+
+
+/*===========================================================================*
+ *				new_block				     *
+ *===========================================================================*/
+PRIVATE struct buf *new_block(dev_t dev, ino_t inum)
+{
+/* Allocate a new buffer and add it to the double linked buffer list */
+  struct buf *bp;
+
+  bp = malloc(sizeof(struct buf));
+  if (bp == NULL) {
+	err_code = ENOSPC;
+	return(NULL);
+  }
+  bp->b_num = inum;
+  bp->b_dev = dev;
+  bp->b_bytes = 0;
+  bp->b_count = 1;
+  memset(bp->b_data, 0 , PIPE_BUF);
+
+  /* Add at the end of the buffer */
+  if (front == NULL) {	/* Empty list? */
+	front = bp;
+	bp->b_prev = NULL;
+  } else {
+	rear->b_next = bp;
+	bp->b_prev = rear;
+  }
+  bp->b_next = NULL;
+  rear = bp;
+
+  return(bp);
+}
+
+
+/*===========================================================================*
+ *				put_block				     *
+ *===========================================================================*/
+PUBLIC void put_block(dev_t dev, ino_t inum)
+{
+  struct buf *bp;
+
+  bp = get_block(dev, inum);
+  if (bp == NULL) return; /* We didn't find the block. Nothing to put. */
+
+  bp->b_count--;	/* Compensate for above 'get_block'. */
+  if (--bp->b_count > 0) return;
+
+  /* Cut bp out of the loop */
+  if (bp->b_prev == NULL)
+	front = bp->b_next;
+  else
+	bp->b_prev->b_next = bp->b_next;
+
+  if (bp->b_next == NULL)
+	rear = bp->b_prev;
+  else
+	bp->b_next->b_prev = bp->b_prev;
+
+  /* Buffer administration is done. Now it's safe to free up bp. */
+  free(bp);
+}
diff --git a/servers/apfs/const.h b/servers/apfs/const.h
new file mode 100644
index 000000000..f89a239be
--- /dev/null
+++ b/servers/apfs/const.h
@@ -0,0 +1,42 @@
+#ifndef __PFS_CONST_H__
+#define __PFS_CONST_H__
+
+#define NR_INODES        256 	/* # slots in "in core" inode table */
+
+/* Size of descriptor table for unix domain sockets. This should be
+ * equal to the maximum number of minor devices (currently 256).
+ */
+#define NR_FDS           256
+
+#define INODE_HASH_LOG2   7     /* 2 based logarithm of the inode hash size */
+#define INODE_HASH_SIZE   ((unsigned long)1<<INODE_HASH_LOG2)
+#define INODE_HASH_MASK   (((unsigned long)1<<INODE_HASH_LOG2)-1)
+
+
+/* The type of sizeof may be (unsigned) long.  Use the following macro for
+ * taking the sizes of small objects so that there are no surprises like
+ * (small) long constants being passed to routines expecting an int.
+ */
+#define usizeof(t) ((unsigned) sizeof(t))
+
+/* Miscellaneous constants */
+#define INVAL_UID ((uid_t) -1)	/* Invalid user ID */
+#define INVAL_GID ((gid_t) -1)	/* Invalid group ID */
+#define NORMAL	           0	/* forces get_block to do disk read */
+#define NO_READ            1	/* prevents get_block from doing disk read */
+#define PREFETCH           2	/* tells get_block not to read or mark dev */
+
+#define NO_BIT   ((bit_t) 0)	/* returned by alloc_bit() to signal failure */
+
+#define ATIME            002	/* set if atime field needs updating */
+#define CTIME            004	/* set if ctime field needs updating */
+#define MTIME            010	/* set if mtime field needs updating */
+
+#define FS_BITMAP_CHUNKS(b) ((b)/usizeof (bitchunk_t))/* # map chunks/blk   */
+#define FS_BITCHUNK_BITS		(usizeof(bitchunk_t) * CHAR_BIT)
+#define FS_BITS_PER_BLOCK(b)	(FS_BITMAP_CHUNKS(b) * FS_BITCHUNK_BITS)
+
+#define FS_CALL_VEC_SIZE 31
+#define DEV_CALL_VEC_SIZE 25
+
+#endif
diff --git a/servers/apfs/dev_uds.c b/servers/apfs/dev_uds.c
new file mode 100644
index 000000000..b88f3ee1e
--- /dev/null
+++ b/servers/apfs/dev_uds.c
@@ -0,0 +1,1167 @@
+/*
+ * Unix Domain Sockets Implementation (PF_UNIX, PF_LOCAL)
+ * This code handles requests generated by operations on /dev/uds
+ *
+ * The entry points into this file are...
+ *
+ *   uds_open:   handles the   open(2) syscall on /dev/uds
+ *   uds_close:  handles the  close(2) syscall on /dev/uds
+ *   uds_select: handles the select(2) syscall on /dev/uds
+ *   uds_read:   handles the   read(2) syscall on /dev/uds
+ *   uds_write:  handles the  write(2) syscall on /dev/uds
+ *   uds_ioctl:  handles the  ioctl(2) syscall on /dev/uds
+ *   uds_status: handles status requests.
+ *   uds_cancel: handles cancelled syscalls.
+ *
+ * Also See...
+ *
+ *   table.c, uds.c, uds.h
+ *
+ * Overview
+ *
+ * The interface to unix domain sockets is similar to the
+ * the interface to network sockets. There is a character
+ * device (/dev/uds) that uses STYLE_CLONE and this server
+ * is a 'driver' for that device.
+ */
+
+#define DEBUG 0
+
+#include "inc.h"
+#include "const.h"
+#include "glo.h"
+#include "uds.h"
+
+FORWARD _PROTOTYPE( int uds_perform_read, (int minor, endpoint_t m_source,
+	size_t size, int pretend));
+FORWARD _PROTOTYPE( int uds_perform_write, (int minor, endpoint_t m_source,
+	size_t size, int pretend));
+
+PUBLIC int uds_open(message *dev_m_in, message *dev_m_out)
+{
+	message fs_m_in, fs_m_out;
+	struct ucred ucred;
+	int rc, i;
+	int minor;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] uds_open() call_count=%d\n", uds_minor(dev_m_in),
+							++call_count);
+	printf("Endpoint: 0x%x\n", dev_m_in->USER_ENDPT);
+#endif
+
+	/*
+	 * Find a slot in the descriptor table for the new descriptor.
+	 * The index of the descriptor in the table will be returned.
+	 * Subsequent calls to read/write/close/ioctl/etc will use this
+	 * minor number. The minor number must be different from the
+	 * the /dev/uds device's minor number (currently 0).
+	 */
+
+	minor = -1; /* to trap error */
+
+	for (i = 1; i < NR_FDS; i++) {
+		if (uds_fd_table[i].state == UDS_FREE) {
+			minor = i;
+			break;
+		}
+	}
+
+	if (minor == -1) {
+
+		/* descriptor table full */
+		uds_set_reply(dev_m_out, DEV_OPEN_REPL, dev_m_in->USER_ENDPT,
+			      (cp_grant_id_t) dev_m_in->IO_GRANT, ENFILE);
+		return ENFILE;
+	}
+
+	/*
+	 * We found a slot in uds_fd_table, now initialize the descriptor
+	 */
+
+	/* mark this one as 'in use' so that it doesn't get assigned to
+	 * another socket
+	 */
+	uds_fd_table[minor].state = UDS_INUSE;
+
+	/* track the system call we are performing in case it gets cancelled */
+	uds_fd_table[minor].call_nr = dev_m_in->m_type;
+	uds_fd_table[minor].ioctl = 0;
+	uds_fd_table[minor].syscall_done = 0;
+
+	/* set the socket owner */
+	uds_fd_table[minor].owner = dev_m_in->USER_ENDPT;
+	uds_fd_table[minor].endpoint = dev_m_in->USER_ENDPT;
+
+	/* setup select(2) framework */
+	uds_fd_table[minor].selecting = 0;
+	uds_fd_table[minor].select_proc = 0;
+	uds_fd_table[minor].sel_ops_in = 0;
+	uds_fd_table[minor].sel_ops_out = 0;
+	uds_fd_table[minor].status_updated = 0;
+
+	/* initialize the data pointer (pos) to the start of the PIPE */
+	uds_fd_table[minor].pos = 0;
+
+	/* the PIPE is initially empty */
+	uds_fd_table[minor].size = 0;
+
+	/* the default for a new socket is to allow reading and writing.
+	 * shutdown(2) will remove one or both flags.
+	 */
+	uds_fd_table[minor].mode = S_IRUSR|S_IWUSR;
+
+	/* In libc socket(2) sets this to the actual value later with the
+	 * NWIOSUDSTYPE ioctl().
+	 */
+	uds_fd_table[minor].type = -1;
+
+	/* Clear the backlog by setting each entry to -1 */
+	for (i = 0; i < UDS_SOMAXCONN; i++) {
+		/* initially no connections are pending */
+		uds_fd_table[minor].backlog[i] = -1;
+	}
+
+	memset(&uds_fd_table[minor].ancillary_data, '\0', sizeof(struct
+								ancillary));
+	for (i = 0; i < OPEN_MAX; i++) {
+		uds_fd_table[minor].ancillary_data.fds[i] = -1;
+	}
+
+	/* default the size to UDS_SOMAXCONN */
+	uds_fd_table[minor].backlog_size = UDS_SOMAXCONN;
+
+	/* the socket isn't listening for incoming connections until
+	 * listen(2) is called
+	 */
+	uds_fd_table[minor].listening = 0;
+
+	/* initially the socket is not connected to a peer */
+	uds_fd_table[minor].peer = -1;
+
+	/* there isn't a child waiting to be accept(2)'d */
+	uds_fd_table[minor].child = -1;
+
+	/* initially the socket is not bound or listening on an address */
+	memset(&(uds_fd_table[minor].addr), '\0', sizeof(struct sockaddr_un));
+	memset(&(uds_fd_table[minor].source), '\0', sizeof(struct sockaddr_un));
+	memset(&(uds_fd_table[minor].target), '\0', sizeof(struct sockaddr_un));
+
+	/* Initially the socket isn't suspended. */
+	uds_fd_table[minor].suspended = UDS_NOT_SUSPENDED;
+
+	/* and the socket doesn't have an I/O grant initially */
+	uds_fd_table[minor].io_gr = (cp_grant_id_t) 0;
+
+	/* since there is no I/O grant it effectively has no size either */
+	uds_fd_table[minor].io_gr_size = 0;
+
+	/* The process isn't suspended so we don't flag it as revivable */
+	uds_fd_table[minor].ready_to_revive = 0;
+
+	/* get the effective user id and effective group id from the endpoint */
+	/* this is needed in the REQ_NEWNODE request to PFS. */
+	rc = getnucred(uds_fd_table[minor].endpoint, &ucred);
+	if (rc == -1) {
+		/* roll back the changes we made to the descriptor */
+		memset(&(uds_fd_table[minor]), '\0', sizeof(uds_fd_t));
+
+		/* likely error: invalid endpoint / proc doesn't exist */
+		uds_set_reply(dev_m_out, DEV_OPEN_REPL, dev_m_in->USER_ENDPT,
+			      (cp_grant_id_t) dev_m_in->IO_GRANT, errno);
+		return errno;
+	}
+
+	/* Prepare Request to the FS side of PFS */
+
+	fs_m_in.m_type = REQ_NEWNODE;
+	fs_m_in.REQ_MODE = I_NAMED_PIPE;
+	fs_m_in.REQ_DEV = NO_DEV;
+	fs_m_in.REQ_UID = ucred.uid;
+	fs_m_in.REQ_GID = ucred.gid;
+
+	/* Request a new inode on the pipe file system */
+
+	rc = fs_newnode(&fs_m_in, &fs_m_out);
+	if (rc != OK) {
+		/* roll back the changes we made to the descriptor */
+		memset(&(uds_fd_table[minor]), '\0', sizeof(uds_fd_t));
+
+		/* likely error: get_block() failed */
+		uds_set_reply(dev_m_out, DEV_OPEN_REPL, dev_m_in->USER_ENDPT,
+				(cp_grant_id_t) dev_m_in->IO_GRANT, rc);
+		return rc;
+	}
+
+	/* Process the response */
+
+	uds_fd_table[minor].inode_nr = fs_m_out.RES_INODE_NR;
+
+	/* prepare the reply */
+
+	uds_fd_table[minor].syscall_done = 1;
+	uds_set_reply(dev_m_out, DEV_OPEN_REPL, dev_m_in->USER_ENDPT,
+		      (cp_grant_id_t) dev_m_in->IO_GRANT, minor);
+	return minor;
+}
+
+PUBLIC int uds_close(message *dev_m_in, message *dev_m_out)
+{
+	int minor;
+	message fs_m_in, fs_m_out;
+	int rc;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] uds_close() call_count=%d\n", uds_minor(dev_m_in),
+							++call_count);
+	printf("Endpoint: 0x%x\n", dev_m_in->USER_ENDPT);
+#endif
+
+	minor = uds_minor(dev_m_in);
+
+	if (uds_fd_table[minor].state != UDS_INUSE) {
+		/* attempted to close a socket that hasn't been opened --
+		 * something is very wrong :(
+		 */
+		uds_set_reply(dev_m_out, DEV_CLOSE_REPL, dev_m_in->USER_ENDPT,
+			      (cp_grant_id_t) dev_m_in->IO_GRANT, EINVAL);
+		return EINVAL;
+	}
+
+	/* no need to track the syscall in case of cancellation. close() is
+	 * atomic and can't be cancelled. no need to update the endpoint here,
+	 * we won't be needing it to kill the socket
+	 */
+
+	/* if the socket is connected, disconnect it */
+	if (uds_fd_table[minor].peer != -1) {
+
+		/* set peer of this peer to -1 */
+		uds_fd_table[uds_fd_table[minor].peer].peer = -1;
+
+		/* error to pass to peer */
+		uds_fd_table[uds_fd_table[minor].peer].err = ECONNRESET;
+
+		/* if peer was blocked on I/O revive peer */
+		if (uds_fd_table[uds_fd_table[minor].peer].suspended) {
+			int peer = uds_fd_table[minor].peer;
+
+			uds_fd_table[peer].ready_to_revive = 1;
+			uds_unsuspend(dev_m_in->m_source, peer);
+		}
+	}
+
+	if (uds_fd_table[minor].ancillary_data.nfiledes > 0) {
+		clear_fds(minor, &(uds_fd_table[minor].ancillary_data));
+	}
+
+	/* Prepare Request to the FS side of PFS */
+
+	fs_m_in.m_type = REQ_PUTNODE;
+	fs_m_in.REQ_INODE_NR = uds_fd_table[minor].inode_nr;
+	fs_m_in.REQ_COUNT = 1;
+
+	/* set the socket back to its original UDS_FREE state */
+	memset(&(uds_fd_table[minor]), '\0', sizeof(uds_fd_t));
+
+	/* Request the removal of the inode from the pipe file system */
+
+	rc = fs_putnode(&fs_m_in, &fs_m_out);
+	if (rc != OK) {
+		perror("fs_putnode");
+		/* likely error: get_block() failed */
+		return rc;
+	}
+
+	uds_set_reply(dev_m_out, DEV_CLOSE_REPL, dev_m_in->USER_ENDPT,
+		      (cp_grant_id_t) dev_m_in->IO_GRANT, OK);
+	return OK;
+}
+
+PUBLIC int uds_select(message *dev_m_in, message *dev_m_out)
+{
+	int i, bytes;
+	int minor;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] uds_select() call_count=%d\n", uds_minor(dev_m_in),
+							++call_count);
+	printf("Endpoint: 0x%x\n", dev_m_in->USER_ENDPT);
+#endif
+
+	minor = uds_minor(dev_m_in);
+
+	if (uds_fd_table[minor].state != UDS_INUSE) {
+
+		/* attempted to close a socket that hasn't been opened --
+		 * something is very wrong :(
+		 */
+
+		uds_sel_reply(dev_m_out, DEV_SEL_REPL1, minor, EINVAL);
+		return EINVAL;
+	}
+
+	/* setup select(2) framework */
+	uds_fd_table[minor].selecting = 1;
+	uds_fd_table[minor].select_proc = dev_m_in->m_source;
+
+	/* track the system call we are performing in case it gets cancelled */
+	uds_fd_table[minor].call_nr = dev_m_in->m_type;
+	uds_fd_table[minor].ioctl = 0;
+	uds_fd_table[minor].syscall_done = 0;
+
+	/* Can't update the process endpoint here, no info.  */
+
+	uds_fd_table[minor].sel_ops_in = dev_m_in->USER_ENDPT;
+	uds_fd_table[minor].sel_ops_out = 0;
+
+	/* check if there is data available to read */
+	bytes = uds_perform_read(minor, dev_m_in->m_source, 1, 1);
+	if (bytes > 0) {
+
+		/* there is data in the pipe for us to read */
+		uds_fd_table[minor].sel_ops_out |= SEL_RD;
+
+	} else if (uds_fd_table[minor].listening == 1) {
+
+		/* check for pending connections */
+		for (i = 0; i < uds_fd_table[minor].backlog_size; i++) {
+			if (uds_fd_table[minor].backlog[i] != -1) {
+				uds_fd_table[minor].sel_ops_out |= SEL_RD;
+				break;
+			}
+		}
+	}
+
+	/* check if we can write without blocking */
+	bytes = uds_perform_write(minor, dev_m_in->m_source, PIPE_BUF, 1);
+	if (bytes > 0) {
+		uds_fd_table[minor].sel_ops_out |= SEL_WR;
+	}
+
+	uds_fd_table[minor].syscall_done = 1;
+	uds_sel_reply(dev_m_out, DEV_SEL_REPL1, minor,
+		      uds_fd_table[minor].sel_ops_out);
+
+	return uds_fd_table[minor].sel_ops_out;
+}
+
+PRIVATE int uds_perform_read(int minor, endpoint_t m_source,
+	size_t size, int pretend)
+{
+	int rc;
+	message fs_m_in;
+	message fs_m_out;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] uds_perform_read() call_count=%d\n", minor,
+							++call_count);
+#endif
+
+	/* skip reads and writes of 0 (or less!) bytes */
+	if (size <= 0) {
+		return 0;
+	}
+
+	/* check if we are allowed to read */
+	if (!(uds_fd_table[minor].mode & S_IRUSR)) {
+
+		/* socket is shutdown for reading */
+		return EPIPE;
+	}
+
+	if (uds_fd_table[minor].size == 0) {
+
+		if (pretend) {
+			return SUSPEND;
+		}
+
+		/* maybe a process is blocked waiting to write? if
+		 * needed revive the writer
+		 */
+		if (uds_fd_table[minor].peer != -1 &&
+			uds_fd_table[uds_fd_table[minor].peer].suspended) {
+			int peer = uds_fd_table[minor].peer;
+
+			uds_fd_table[peer].ready_to_revive = 1;
+			uds_unsuspend(m_source, peer);
+		}
+
+#if DEBUG == 1
+		printf("(uds) [%d] suspending read request\n", minor);
+#endif
+
+		/* Process is reading from an empty pipe,
+		 * suspend it so some bytes can be written
+		 */
+		uds_fd_table[minor].suspended = UDS_SUSPENDED_READ;
+		return SUSPEND;
+	}
+
+	if (pretend) {
+
+		return (size > uds_fd_table[minor].size) ?
+				uds_fd_table[minor].size : size;
+	}
+
+
+	/* Prepare Request to the FS side of PFS */
+	fs_m_in.m_type = REQ_READ;
+	fs_m_in.REQ_INODE_NR = uds_fd_table[minor].inode_nr;
+	fs_m_in.REQ_GRANT = uds_fd_table[minor].io_gr;
+	fs_m_in.REQ_SEEK_POS_HI = 0;
+	fs_m_in.REQ_SEEK_POS_LO = uds_fd_table[minor].pos;
+	fs_m_in.REQ_NBYTES = (size > uds_fd_table[minor].size) ?
+				uds_fd_table[minor].size : size;
+
+	/* perform the read */
+	rc = fs_readwrite(&fs_m_in, &fs_m_out);
+	if (rc != OK) {
+		perror("fs_readwrite");
+		return rc;
+	}
+
+	/* Process the response */
+#if DEBUG == 1
+	printf("(uds) [%d] read complete\n", minor);
+#endif
+
+	/* move the position of the data pointer up to data we haven't
+	 * read yet
+	 */
+	uds_fd_table[minor].pos += fs_m_out.RES_NBYTES;
+
+	/* decrease the number of unread bytes */
+	uds_fd_table[minor].size -= fs_m_out.RES_NBYTES;
+
+	/* if we have 0 unread bytes, move the data pointer back to the
+	 * start of the buffer
+	 */
+	if (uds_fd_table[minor].size == 0) {
+		uds_fd_table[minor].pos = 0;
+	}
+
+	/* maybe a big write was waiting for us to read some data, if
+	 * needed revive the writer
+	 */
+	if (uds_fd_table[minor].peer != -1 &&
+			uds_fd_table[uds_fd_table[minor].peer].suspended) {
+		int peer = uds_fd_table[minor].peer;
+
+		uds_fd_table[peer].ready_to_revive = 1;
+		uds_unsuspend(m_source, peer);
+	}
+
+	/* see if peer is blocked on select() and a write is possible
+	 * (from peer to minor)
+	 */
+	if (uds_fd_table[minor].peer != -1 &&
+		uds_fd_table[uds_fd_table[minor].peer].selecting == 1 &&
+		(uds_fd_table[minor].size + uds_fd_table[minor].pos + 1
+		< PIPE_BUF)) {
+
+		int peer = uds_fd_table[minor].peer;
+
+		/* if the peer wants to know about write being possible
+		 * and it doesn't know about it already, then let the peer know.
+		 */
+		if ((uds_fd_table[peer].sel_ops_in & SEL_WR) &&
+				!(uds_fd_table[peer].sel_ops_out & SEL_WR)) {
+
+			/* a write on peer is possible now */
+			uds_fd_table[peer].sel_ops_out |= SEL_WR;
+			uds_fd_table[peer].status_updated = 1;
+			uds_unsuspend(m_source, peer);
+		}
+	}
+
+	return fs_m_out.RES_NBYTES; /* return number of bytes read */
+}
+
+PRIVATE int uds_perform_write(int minor, endpoint_t m_source,
+						size_t size, int pretend)
+{
+	int rc, peer, i;
+	message fs_m_in;
+	message fs_m_out;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] uds_perform_write() call_count=%d\n", minor,
+							++call_count);
+#endif
+
+	/* skip reads and writes of 0 (or less!) bytes */
+	if (size <= 0) {
+		return 0;
+	}
+
+	/* check if we are allowed to write */
+	if (!(uds_fd_table[minor].mode & S_IWUSR)) {
+
+		/* socket is shutdown for writing */
+		return EPIPE;
+	}
+
+	if (size > PIPE_BUF) {
+
+		/* message is too big to ever write to the PIPE */
+		return EMSGSIZE;
+	}
+
+	if (uds_fd_table[minor].type == SOCK_STREAM ||
+			uds_fd_table[minor].type == SOCK_SEQPACKET) {
+
+		/* if we're writing with a connection oriented socket,
+		 * then it needs a peer to write to
+		 */
+		if (uds_fd_table[minor].peer == -1) {
+			if (uds_fd_table[minor].err == ECONNRESET) {
+
+				uds_fd_table[minor].err = 0;
+				return ECONNRESET;
+			} else {
+				return ENOTCONN;
+			}
+		} else {
+
+			peer = uds_fd_table[minor].peer;
+		}
+
+	} else /* uds_fd_table[minor].type == SOCK_DGRAM */ {
+
+		peer = -1;
+
+		/* locate the "peer" we want to write to */
+		for (i = 0; i < NR_FDS; i++) {
+
+			/* look for a SOCK_DGRAM socket that is bound on
+			 * the target address
+			 */
+			if (uds_fd_table[i].type == SOCK_DGRAM &&
+				uds_fd_table[i].addr.sun_family == AF_UNIX &&
+				!strncmp(uds_fd_table[minor].target.sun_path,
+				uds_fd_table[i].addr.sun_path, UNIX_PATH_MAX)) {
+
+				peer = i;
+				break;
+			}
+		}
+
+		if (peer == -1) {
+			return ENOENT;
+		}
+	}
+
+	/* check if write would overrun buffer. check if message
+	 * boundry preserving types (SEQPACKET and DGRAM) wouldn't write
+	 * to an empty buffer. check if connectionless sockets have a
+	 * target to write to.
+	 */
+	if ((uds_fd_table[peer].pos+uds_fd_table[peer].size+size > PIPE_BUF) ||
+		((uds_fd_table[minor].type == SOCK_SEQPACKET ||
+		uds_fd_table[minor].type == SOCK_DGRAM) &&
+		uds_fd_table[peer].size > 0) || (peer == -1)) {
+
+		if (pretend) {
+			return SUSPEND;
+		}
+
+		/* if needed revive the reader */
+		if (uds_fd_table[peer].suspended) {
+			uds_fd_table[peer].ready_to_revive = 1;
+			uds_unsuspend(m_source, peer);
+		}
+
+#if DEBUG == 1
+	printf("(uds) [%d] suspending write request\n", minor);
+#endif
+
+		/* Process is reading from an empty pipe,
+		 * suspend it so some bytes can be written
+		 */
+		uds_fd_table[minor].suspended = UDS_SUSPENDED_WRITE;
+		return SUSPEND;
+	}
+
+	if (pretend) {
+		return size;
+	}
+
+	/* Prepare Request to the FS side of PFS */
+	fs_m_in.m_type = REQ_WRITE;
+	fs_m_in.REQ_INODE_NR = uds_fd_table[peer].inode_nr;
+	fs_m_in.REQ_GRANT = uds_fd_table[minor].io_gr;
+	fs_m_in.REQ_SEEK_POS_HI = 0;
+	fs_m_in.REQ_SEEK_POS_LO = uds_fd_table[peer].pos +
+					uds_fd_table[peer].size;
+	fs_m_in.REQ_NBYTES = size;
+
+	/* Request the write */
+	rc = fs_readwrite(&fs_m_in, &fs_m_out);
+	if (rc != OK) {
+		perror("fs_readwrite");
+		return rc;
+	}
+
+	/* Process the response */
+#if DEBUG == 1
+	printf("(uds) [%d] write complete\n", minor);
+#endif
+	/* increase the count of unread bytes */
+	uds_fd_table[peer].size += fs_m_out.RES_NBYTES;
+
+
+	/* fill in the source address to be returned by recvfrom & recvmsg */
+	if (uds_fd_table[minor].type == SOCK_DGRAM) {
+		memcpy(&uds_fd_table[peer].source, &uds_fd_table[minor].addr,
+						sizeof(struct sockaddr_un));
+	}
+
+	/* revive peer that was waiting for us to write */
+	if (uds_fd_table[peer].suspended) {
+		uds_fd_table[peer].ready_to_revive = 1;
+		uds_unsuspend(m_source, peer);
+	}
+
+	/* see if peer is blocked on select()*/
+	if (uds_fd_table[peer].selecting == 1 && fs_m_out.RES_NBYTES > 0) {
+
+		/* if the peer wants to know about data ready to read
+		 * and it doesn't know about it already, then let the peer
+		 * know we have data for it.
+		 */
+		if ((uds_fd_table[peer].sel_ops_in & SEL_RD) &&
+				!(uds_fd_table[peer].sel_ops_out & SEL_RD)) {
+
+			/* a read on peer is possible now */
+			uds_fd_table[peer].sel_ops_out |= SEL_RD;
+			uds_fd_table[peer].status_updated = 1;
+			uds_unsuspend(m_source, peer);
+		}
+	}
+
+	return fs_m_out.RES_NBYTES; /* return number of bytes written */
+}
+
+PUBLIC int uds_read(message *dev_m_in, message *dev_m_out)
+{
+	int bytes;
+	int minor;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] uds_read() call_count=%d\n", uds_minor(dev_m_in),
+							++call_count);
+	printf("Endpoint: 0x%x | Position 0x%x\n", dev_m_in->USER_ENDPT,
+							dev_m_in->POSITION);
+#endif
+
+	minor = uds_minor(dev_m_in);
+
+	if (uds_fd_table[minor].state != UDS_INUSE) {
+
+		/* attempted to close a socket that hasn't been opened --
+		 * something is very wrong :(
+		 */
+		uds_set_reply(dev_m_out, DEV_REVIVE, dev_m_in->USER_ENDPT,
+			      (cp_grant_id_t) dev_m_in->IO_GRANT, EINVAL);
+
+		return EINVAL;
+	}
+
+	/* track the system call we are performing in case it gets cancelled */
+	uds_fd_table[minor].call_nr = dev_m_in->m_type;
+	uds_fd_table[minor].ioctl = 0;
+	uds_fd_table[minor].syscall_done = 0;
+
+	/* Update the process endpoint. */
+	uds_fd_table[minor].endpoint = dev_m_in->USER_ENDPT;
+
+	/* setup select(2) framework */
+	uds_fd_table[minor].selecting = 0;
+
+	/* save I/O Grant info */
+	uds_fd_table[minor].io_gr = (cp_grant_id_t) dev_m_in->IO_GRANT;
+	uds_fd_table[minor].io_gr_size = dev_m_in->COUNT;
+
+	bytes = uds_perform_read(minor, dev_m_in->m_source,
+					uds_fd_table[minor].io_gr_size, 0);
+
+	uds_set_reply(dev_m_out, DEV_REVIVE, uds_fd_table[minor].endpoint,
+		      uds_fd_table[minor].io_gr, bytes);
+
+	return bytes;
+}
+
+PUBLIC int uds_write(message *dev_m_in, message *dev_m_out)
+{
+	int bytes;
+	int minor;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] uds_write() call_count=%d\n", uds_minor(dev_m_in),
+							++call_count);
+	printf("Endpoint: 0x%x | Position 0x%x\n", dev_m_in->USER_ENDPT,
+							dev_m_in->POSITION);
+#endif
+
+	minor = uds_minor(dev_m_in);
+
+	if (uds_fd_table[minor].state != UDS_INUSE) {
+
+		/* attempted to close a socket that hasn't been opened --
+		 * something is very wrong :(
+		 */
+		uds_set_reply(dev_m_out, DEV_REVIVE, dev_m_in->USER_ENDPT,
+			      (cp_grant_id_t) dev_m_in->IO_GRANT, EINVAL);
+
+		return EINVAL;
+	}
+
+	/* track the system call we are performing in case it gets cancelled */
+	uds_fd_table[minor].call_nr = dev_m_in->m_type;
+	uds_fd_table[minor].ioctl = 0;
+	uds_fd_table[minor].syscall_done = 0;
+
+	/* Update the process endpoint. */
+	uds_fd_table[minor].endpoint = dev_m_in->USER_ENDPT;
+
+	/* setup select(2) framework */
+	uds_fd_table[minor].selecting = 0;
+
+	/* save I/O Grant info */
+	uds_fd_table[minor].io_gr = (cp_grant_id_t) dev_m_in->IO_GRANT;
+	uds_fd_table[minor].io_gr_size = dev_m_in->COUNT;
+
+	bytes = uds_perform_write(minor, dev_m_in->m_source,
+					uds_fd_table[minor].io_gr_size, 0);
+
+	uds_set_reply(dev_m_out, DEV_REVIVE, uds_fd_table[minor].endpoint,
+		      uds_fd_table[minor].io_gr, bytes);
+
+	return bytes;
+}
+
+PUBLIC int uds_ioctl(message *dev_m_in, message *dev_m_out)
+{
+	int rc, minor;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] uds_ioctl() call_count=%d\n", uds_minor(dev_m_in),
+							++call_count);
+	printf("Endpoint: 0x%x | Position 0x%x\n", dev_m_in->USER_ENDPT,
+							dev_m_in->POSITION);
+#endif
+
+	minor = uds_minor(dev_m_in);
+
+	if (uds_fd_table[minor].state != UDS_INUSE) {
+
+		/* attempted to close a socket that hasn't been opened --
+		 * something is very wrong :(
+		 */
+		uds_set_reply(dev_m_out, DEV_REVIVE, dev_m_in->USER_ENDPT,
+			      (cp_grant_id_t) dev_m_in->IO_GRANT, EINVAL);
+
+		return EINVAL;
+	}
+
+	/* track the system call we are performing in case it gets cancelled */
+	uds_fd_table[minor].call_nr = dev_m_in->m_type;
+	uds_fd_table[minor].ioctl = dev_m_in->COUNT;
+	uds_fd_table[minor].syscall_done = 0;
+
+	/* setup select(2) framework */
+	uds_fd_table[minor].selecting = 0;
+
+	/* update the owner endpoint - yes it's really stored in POSITION */
+	uds_fd_table[minor].owner = dev_m_in->POSITION;
+
+	switch (dev_m_in->COUNT) {	/* Handle the ioctl(2) command */
+
+		case NWIOSUDSCONN:
+
+			/* connect to a listening socket -- connect() */
+			rc = do_connect(dev_m_in, dev_m_out);
+
+			break;
+
+		case NWIOSUDSACCEPT:
+
+			/* accept an incoming connection -- accept() */
+			rc = do_accept(dev_m_in, dev_m_out);
+
+			break;
+
+		case NWIOSUDSBLOG:
+
+			/* set the backlog_size and put the socket into the
+			 * listening state -- listen()
+			 */
+			rc = do_listen(dev_m_in, dev_m_out);
+
+			break;
+
+		case NWIOSUDSTYPE:
+
+			/* set the type for this socket (i.e.
+			 * SOCK_STREAM, SOCK_DGRAM, etc) -- socket()
+			 */
+			rc = do_socket(dev_m_in, dev_m_out);
+
+			break;
+
+		case NWIOSUDSADDR:
+
+			/* set the address for this socket -- bind() */
+			rc = do_bind(dev_m_in, dev_m_out);
+
+			break;
+
+		case NWIOGUDSADDR:
+
+			/* get the address for this socket -- getsockname() */
+			rc = do_getsockname(dev_m_in, dev_m_out);
+
+			break;
+
+		case NWIOGUDSPADDR:
+
+			/* get the address for the peer -- getpeername() */
+			rc = do_getpeername(dev_m_in, dev_m_out);
+
+			break;
+
+		case NWIOSUDSSHUT:
+
+			/* shutdown a socket for reading, writing, or
+			 * both -- shutdown()
+			 */
+			rc = do_shutdown(dev_m_in, dev_m_out);
+
+			break;
+
+		case NWIOSUDSPAIR:
+
+			/* connect two sockets -- socketpair() */
+			rc = do_socketpair(dev_m_in, dev_m_out);
+
+			break;
+
+		case NWIOGUDSSOTYPE:
+
+			/* get socket type -- getsockopt(SO_TYPE) */
+			rc = do_getsockopt_sotype(dev_m_in, dev_m_out);
+
+			break;
+
+		case NWIOGUDSPEERCRED:
+
+			/* get peer endpoint -- getsockopt(SO_PEERCRED) */
+			rc = do_getsockopt_peercred(dev_m_in, dev_m_out);
+
+			break;
+
+		case NWIOSUDSTADDR:
+
+			/* set target address -- sendto() */
+			rc = do_sendto(dev_m_in, dev_m_out);
+
+			break;
+
+		case NWIOGUDSFADDR:
+
+			/* get from address -- recvfrom() */
+			rc = do_recvfrom(dev_m_in, dev_m_out);
+
+			break;
+
+		case NWIOGUDSSNDBUF:
+
+			/* get the send buffer size -- getsockopt(SO_SNDBUF) */
+			rc = do_getsockopt_sndbuf(dev_m_in, dev_m_out);
+
+			break;
+
+		case NWIOSUDSSNDBUF:
+
+			/* set the send buffer size -- setsockopt(SO_SNDBUF) */
+			rc = do_setsockopt_sndbuf(dev_m_in, dev_m_out);
+
+			break;
+
+		case NWIOGUDSRCVBUF:
+
+			/* get the send buffer size -- getsockopt(SO_SNDBUF) */
+			rc = do_getsockopt_rcvbuf(dev_m_in, dev_m_out);
+
+			break;
+
+		case NWIOSUDSRCVBUF:
+
+			/* set the send buffer size -- setsockopt(SO_SNDBUF) */
+			rc = do_setsockopt_rcvbuf(dev_m_in, dev_m_out);
+
+			break;
+
+		case NWIOSUDSCTRL:
+
+			/* set the control data -- sendmsg() */
+			rc = do_sendmsg(dev_m_in, dev_m_out);
+
+			break;
+
+		case NWIOGUDSCTRL:
+
+			/* set the control data -- recvmsg() */
+			rc = do_recvmsg(dev_m_in, dev_m_out);
+
+			break;
+
+		default:
+
+			/* the IOCTL command is not valid for /dev/uds --
+			 * this happens a lot and is normal. a lot of
+			 * libc functions determine the socket type with
+			 * IOCTLs. Any not for us simply get a EBADIOCTL
+			 * response.
+			 */
+
+			rc = EBADIOCTL;
+	}
+
+	if (rc != SUSPEND)
+		uds_fd_table[minor].syscall_done = 1;
+
+	uds_set_reply(dev_m_out, DEV_REVIVE, dev_m_in->USER_ENDPT,
+		      (cp_grant_id_t) dev_m_in->IO_GRANT, rc);
+
+	return rc;
+}
+
+PUBLIC int uds_unsuspend(endpoint_t m_source, int minor)
+{
+	int r, bytes;
+	message m_out;
+	uds_fd_t *fdp;
+
+	fdp = &uds_fd_table[minor];
+
+	if (fdp->status_updated == 1) {
+
+		/* clear the status_updated flag */
+		fdp->status_updated = 0;
+		fdp->selecting = 0;
+
+		/* prepare the response */
+		uds_sel_reply(&m_out, DEV_SEL_REPL2, minor, fdp->sel_ops_out);
+		r = OK;
+	} else if (fdp->ready_to_revive == 1) {
+
+		/* clear the ready to revive flag */
+		fdp->ready_to_revive = 0;
+
+		switch (fdp->suspended) {
+
+			case UDS_SUSPENDED_READ:
+
+				bytes = uds_perform_read(minor, m_source,
+							 fdp->io_gr_size, 0);
+
+				if (bytes == SUSPEND) {
+					r = SUSPEND;
+					break;
+				}
+
+				fdp->suspended = UDS_NOT_SUSPENDED;
+
+				uds_set_reply(&m_out, DEV_REVIVE, fdp->endpoint,
+					      fdp->io_gr, bytes);
+
+				r = OK;
+				break;
+
+			case UDS_SUSPENDED_WRITE:
+
+				bytes = uds_perform_write(minor, m_source,
+							  fdp->io_gr_size, 0);
+
+				if (bytes == SUSPEND) {
+					r = SUSPEND;
+					break;
+				}
+
+				fdp->suspended = UDS_NOT_SUSPENDED;
+
+				uds_set_reply(&m_out, DEV_REVIVE, fdp->endpoint,
+					      fdp->io_gr, bytes);
+
+				r = OK;
+				break;
+
+			case UDS_SUSPENDED_CONNECT:
+			case UDS_SUSPENDED_ACCEPT:
+
+				/* In both cases, the process
+				 * that send the notify()
+				 * already performed the connection.
+				 * The only thing to do here is
+				 * unblock.
+				 */
+
+				fdp->suspended = UDS_NOT_SUSPENDED;
+
+				uds_set_reply(&m_out, DEV_REVIVE, fdp->endpoint,
+					      fdp->io_gr, OK);
+
+				r = OK;
+				break;
+
+			default:
+				return(OK);
+		}
+
+	}
+
+	if (r == OK) reply(m_source, &m_out);
+	return(r);
+}
+
+PUBLIC int uds_cancel(message *dev_m_in, message *dev_m_out)
+{
+	int i, j;
+	int minor;
+	/* XXX: should become a noop? */
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] uds_cancel() call_count=%d\n", uds_minor(dev_m_in),
+							++call_count);
+	printf("Endpoint: 0x%x\n", dev_m_in->USER_ENDPT);
+#endif
+
+	minor = uds_minor(dev_m_in);
+
+	if (uds_fd_table[minor].state != UDS_INUSE) {
+
+		/* attempted to close a socket that hasn't been opened --
+		 * something is very wrong :(
+		 */
+		uds_set_reply(dev_m_out, DEV_NO_STATUS, dev_m_in->USER_ENDPT,
+			      (cp_grant_id_t) dev_m_in->IO_GRANT, EINVAL);
+
+		return EINVAL;
+	}
+
+	/* Update the process endpoint. */
+	uds_fd_table[minor].endpoint = dev_m_in->USER_ENDPT;
+
+	/* setup select(2) framework */
+	uds_fd_table[minor].selecting = 0;
+
+	/* the system call was cancelled, so if the socket was suspended
+	 * (which is likely the case), then it is not suspended anymore.
+	 */
+	uds_fd_table[minor].suspended = UDS_NOT_SUSPENDED;
+
+	/* If there is a system call and it isn't complete, roll back */
+	if (uds_fd_table[minor].call_nr && !uds_fd_table[minor].syscall_done) {
+
+
+		if  (uds_fd_table[minor].call_nr == DEV_IOCTL_S) {
+
+			switch (uds_fd_table[minor].ioctl) {
+
+				case NWIOSUDSACCEPT:	/* accept() */
+
+					/* partial accept() only changes
+					 * uds_fd_table[minorparent].child
+					 */
+
+					for (i = 0; i < NR_FDS; i++) {
+						if (uds_fd_table[i].child ==
+							minor) {
+
+						uds_fd_table[i].child = -1;
+
+						}
+					}
+
+					break;
+
+				case NWIOSUDSCONN:	/* connect() */
+
+					/* partial connect() sets addr
+					 * and adds minor to server backlog
+					 */
+
+					for (i = 0; i < NR_FDS; i++) {
+
+						/* find a socket that is in
+						 * use.
+						 */
+						if (uds_fd_table[i].state ==
+							UDS_INUSE) {
+
+							/* see if minor is in
+							 * the backlog
+							 */
+			for (j = 0; j < uds_fd_table[i].backlog_size; j++) {
+
+				if (uds_fd_table[i].backlog[j] == minor) {
+
+					/* remove from backlog */
+					uds_fd_table[i].backlog[j] = -1;
+				}
+			}
+
+						}
+					}
+
+					/* clear the address */
+					memset(&(uds_fd_table[minor].addr),
+						'\0',
+						sizeof(struct sockaddr_un));
+
+					break;
+
+				case NWIOSUDSTADDR:	/* sendto() */
+				case NWIOSUDSADDR:	/* bind() */
+				case NWIOGUDSADDR:	/* getsockname() */
+				case NWIOGUDSPADDR:	/* getpeername() */
+				case NWIOSUDSTYPE:	/* socket() */
+				case NWIOSUDSBLOG:	/* listen() */
+				case NWIOSUDSSHUT:	/* shutdown() */
+				case NWIOSUDSPAIR:	/* socketpair() */
+				case NWIOGUDSSOTYPE:	/* SO_TYPE */
+				case NWIOGUDSPEERCRED:	/* SO_PEERCRED */
+				default:
+					/* these are atomic, never suspend,
+					 * and can't be cancelled once called
+					 */
+					break;
+			}
+
+		}
+
+		/* DEV_READ_S or DEV_WRITE_S don't need to do anything
+		 * when cancelled. DEV_OPEN, DEV_REOPEN, DEV_SELECT,
+		 * DEV_CLOSE are atomic, never suspend, and can't
+		 * be cancelled once called.
+		 */
+
+		uds_fd_table[minor].syscall_done = 1;
+	}
+
+
+	uds_set_reply(dev_m_out, DEV_NO_STATUS, dev_m_in->USER_ENDPT,
+			(cp_grant_id_t) dev_m_in->IO_GRANT, EINTR);
+
+	return EINTR;
+}
diff --git a/servers/apfs/fs.h b/servers/apfs/fs.h
new file mode 100644
index 000000000..2bc006af3
--- /dev/null
+++ b/servers/apfs/fs.h
@@ -0,0 +1,31 @@
+#ifndef __PFS_FS_H__
+#define __PFS_FS_H__
+
+/* This is the master header for pfs.  It includes some other files
+ * and defines the principal constants.
+ */
+#define _POSIX_SOURCE      1	/* tell headers to include POSIX stuff */
+#define _MINIX             1	/* tell headers to include MINIX stuff */
+#define _SYSTEM            1	/* tell headers that this is the kernel */
+
+/* The following are so basic, all the *.c files get them automatically. */
+#include <minix/config.h>	/* MUST be first */
+#include <minix/ansi.h>		/* MUST be second */
+#include <sys/types.h>
+#include <minix/const.h>
+#include <minix/type.h>
+#include <minix/dmap.h>
+#include <minix/vfsif.h>
+#include <limits.h>
+#include <errno.h>
+#include <minix/syslib.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <minix/sysutil.h>
+
+#include "const.h"
+#include "proto.h"
+#include "glo.h"
+
+#endif
diff --git a/servers/apfs/glo.h b/servers/apfs/glo.h
new file mode 100644
index 000000000..175b4fc8e
--- /dev/null
+++ b/servers/apfs/glo.h
@@ -0,0 +1,29 @@
+#ifndef __PFS_GLO_H__
+#define __PFS_GLO_H__
+
+/* EXTERN should be extern except for the table file */
+#ifdef _TABLE
+#undef EXTERN
+#define EXTERN
+#endif
+
+#include <minix/vfsif.h>
+
+/* The following variables are used for returning results to the caller. */
+EXTERN int err_code;		/* temporary storage for error number */
+
+EXTERN _PROTOTYPE (int (*fs_call_vec[]), (message *fs_m_in, message *fs_m_out) ); /* fs call table */
+EXTERN _PROTOTYPE (int (*dev_call_vec[]), (message *fs_m_in, message *fs_m_out) ); /* dev call table */
+
+EXTERN uid_t caller_uid;
+EXTERN gid_t caller_gid;
+EXTERN int req_nr;
+EXTERN int SELF_E;
+EXTERN int exitsignaled;
+EXTERN int busy;
+EXTERN int unmountdone;
+
+/* Inode map. */
+EXTERN bitchunk_t inodemap[FS_BITMAP_CHUNKS(NR_INODES)];
+
+#endif
diff --git a/servers/apfs/inc.h b/servers/apfs/inc.h
new file mode 100644
index 000000000..4484e8038
--- /dev/null
+++ b/servers/apfs/inc.h
@@ -0,0 +1,41 @@
+
+#define _SYSTEM            1    /* get OK and negative error codes */
+#define _MINIX             1	/* tell headers to include MINIX stuff */
+
+#define VERBOSE		   0	/* display diagnostics */
+
+#ifdef __NBSD_LIBC
+#include <sys/ioc_net.h>
+#else
+#include <net/ioctl.h>
+#endif
+
+#include <minix/ansi.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/select.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <limits.h>
+#include <errno.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <minix/callnr.h>
+#include <minix/config.h>
+#include <minix/dmap.h>
+#include <minix/type.h>
+#include <minix/const.h>
+#include <minix/com.h>
+#include <minix/syslib.h>
+#include <minix/sysutil.h>
+#include <minix/keymap.h>
+#include <minix/bitmap.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+
+#include "proto.h"
diff --git a/servers/apfs/inode.c b/servers/apfs/inode.c
new file mode 100644
index 000000000..e73d39ca6
--- /dev/null
+++ b/servers/apfs/inode.c
@@ -0,0 +1,334 @@
+/* This file manages the inode table.  There are procedures to allocate and
+ * deallocate inodes, acquire, erase, and release them, and read and write
+ * them from the disk.
+ *
+ * The entry points into this file are
+ *   get_inode:	   search inode table for a given inode; if not there,
+ *                 read it
+ *   put_inode:	   indicate that an inode is no longer needed in memory
+ *   alloc_inode:  allocate a new, unused inode
+ *   wipe_inode:   erase some fields of a newly allocated inode
+ *   free_inode:   mark an inode as available for a new file
+ *   update_times: update atime, ctime, and mtime
+ *   find_inode:   retrieve pointer to inode in inode cache
+ *
+ */
+
+#include "fs.h"
+#include "buf.h"
+#include "inode.h"
+#include <minix/vfsif.h>
+
+FORWARD _PROTOTYPE( void addhash_inode, (struct inode * const node)		);
+FORWARD _PROTOTYPE( void unhash_inode, (struct inode * const node) 		);
+
+
+/*===========================================================================*
+ *				fs_putnode				     *
+ *===========================================================================*/
+PUBLIC int fs_putnode(message *fs_m_in, message *fs_m_out)
+{
+/* Find the inode specified by the request message and decrease its counter.*/
+
+  struct inode *rip;
+  int count;
+  dev_t dev;
+  ino_t inum;
+
+  rip = find_inode( (ino_t) fs_m_in->REQ_INODE_NR);
+
+  if(!rip) {
+	  printf("%s:%d put_inode: inode #%ld dev: %d not found\n", __FILE__,
+		 __LINE__, fs_m_in->REQ_INODE_NR, (dev_t) fs_m_in->REQ_DEV);
+	  panic("fs_putnode failed");
+  }
+
+  count = fs_m_in->REQ_COUNT;
+  if (count <= 0) {
+	printf("%s:%d put_inode: bad value for count: %d\n", __FILE__,
+	       __LINE__, count);
+	panic("fs_putnode failed");
+  } else if(count > rip->i_count) {
+	printf("%s:%d put_inode: count too high: %d > %d\n", __FILE__,
+	       __LINE__, count, rip->i_count);
+	panic("fs_putnode failed");
+  }
+
+  /* Decrease reference counter, but keep one reference; it will be consumed by
+   * put_inode(). */
+  rip->i_count -= count - 1;
+  dev = rip->i_dev;
+  inum = rip->i_num;
+  put_inode(rip);
+  if (rip->i_count == 0) put_block(dev, inum);
+  return(OK);
+}
+
+
+/*===========================================================================*
+ *				init_inode_cache			     *
+ *===========================================================================*/
+PUBLIC void init_inode_cache()
+{
+  struct inode *rip;
+  struct inodelist *rlp;
+
+  /* init free/unused list */
+  TAILQ_INIT(&unused_inodes);
+
+  /* init hash lists */
+  for (rlp = &hash_inodes[0]; rlp < &hash_inodes[INODE_HASH_SIZE]; ++rlp)
+      LIST_INIT(rlp);
+
+  /* add free inodes to unused/free list */
+  for (rip = &inode[0]; rip < &inode[NR_INODES]; ++rip) {
+      rip->i_num = NO_ENTRY;
+      TAILQ_INSERT_HEAD(&unused_inodes, rip, i_unused);
+  }
+
+  /* Reserve the first inode (bit 0) to prevent it from being allocated later*/
+  if (alloc_bit() != NO_BIT) printf("PFS could not reserve NO_BIT\n");
+  busy = 0; /* This bit does not make the server 'in use/busy'. */
+}
+
+
+/*===========================================================================*
+ *				addhash_inode   			     *
+ *===========================================================================*/
+PRIVATE void addhash_inode(struct inode * const node)
+{
+  int hashi = (int) (node->i_num & INODE_HASH_MASK);
+
+  /* insert into hash table */
+  LIST_INSERT_HEAD(&hash_inodes[hashi], node, i_hash);
+}
+
+
+/*===========================================================================*
+ *				unhash_inode      			     *
+ *===========================================================================*/
+PRIVATE void unhash_inode(struct inode * const node)
+{
+  /* remove from hash table */
+  LIST_REMOVE(node, i_hash);
+}
+
+
+/*===========================================================================*
+ *				get_inode				     *
+ *===========================================================================*/
+PUBLIC struct inode *get_inode(
+  dev_t dev,		/* device on which inode resides */
+  ino_t numb		/* inode number */
+)
+{
+/* Find the inode in the hash table. If it is not there, get a free inode
+ * load it from the disk if it's necessary and put on the hash list
+ */
+  register struct inode *rip;
+  int hashi;
+
+  hashi = (int) (numb & INODE_HASH_MASK);
+
+  /* Search inode in the hash table */
+  LIST_FOREACH(rip, &hash_inodes[hashi], i_hash) {
+	if (rip->i_num == numb && rip->i_dev == dev) {
+		/* If unused, remove it from the unused/free list */
+		if (rip->i_count == 0) {
+			TAILQ_REMOVE(&unused_inodes, rip, i_unused);
+		}
+		++rip->i_count;
+
+		return(rip);
+	}
+  }
+
+  /* Inode is not on the hash, get a free one */
+  if (TAILQ_EMPTY(&unused_inodes)) {
+      err_code = ENFILE;
+      return(NULL);
+  }
+  rip = TAILQ_FIRST(&unused_inodes);
+
+  /* If not free unhash it */
+  if (rip->i_num != NO_ENTRY) unhash_inode(rip);
+
+  /* Inode is not unused any more */
+  TAILQ_REMOVE(&unused_inodes, rip, i_unused);
+
+  /* Load the inode. */
+  rip->i_dev = dev;
+  rip->i_num = numb;
+  rip->i_count = 1;
+  rip->i_update = 0;		/* all the times are initially up-to-date */
+
+  /* Add to hash */
+  addhash_inode(rip);
+
+
+  return(rip);
+}
+
+
+/*===========================================================================*
+ *				find_inode        			     *
+ *===========================================================================*/
+PUBLIC struct inode *find_inode(numb)
+ino_t numb;		/* inode number */
+{
+/* Find the inode specified by the inode and device number.
+ */
+  struct inode *rip;
+  int hashi;
+
+  hashi = (int) (numb & INODE_HASH_MASK);
+
+  /* Search inode in the hash table */
+  LIST_FOREACH(rip, &hash_inodes[hashi], i_hash) {
+      if (rip->i_count > 0 && rip->i_num == numb) {
+          return(rip);
+      }
+  }
+
+  return(NULL);
+}
+
+
+/*===========================================================================*
+ *				put_inode				     *
+ *===========================================================================*/
+PUBLIC void put_inode(rip)
+struct inode *rip;	/* pointer to inode to be released */
+{
+/* The caller is no longer using this inode.  If no one else is using it either
+ * write it back to the disk immediately.  If it has no links, truncate it and
+ * return it to the pool of available inodes.
+ */
+
+  if (rip == NULL) return;	/* checking here is easier than in caller */
+
+  if (rip->i_count < 1)
+	panic("put_inode: i_count already below 1: %d", rip->i_count);
+
+  if (--rip->i_count == 0) {	/* i_count == 0 means no one is using it now */
+	if (rip->i_nlinks == NO_LINK) { /* Are there links to this file? */
+		/* no links, free the inode. */
+		truncate_inode(rip, 0);	/* return all the disk blocks */
+		rip->i_mode = I_NOT_ALLOC;	/* clear I_TYPE field */
+		free_inode(rip);
+	} else {
+		truncate_inode(rip, (off_t) 0);
+	}
+
+	if (rip->i_nlinks == NO_LINK) {
+		/* free, put at the front of the LRU list */
+		unhash_inode(rip);
+		rip->i_num = NO_ENTRY;
+		rip->i_dev = NO_DEV;
+		rip->i_rdev = NO_DEV;
+		TAILQ_INSERT_HEAD(&unused_inodes, rip, i_unused);
+	} else {
+		/* unused, put at the back of the LRU (cache it) */
+		TAILQ_INSERT_TAIL(&unused_inodes, rip, i_unused);
+	}
+  }
+}
+
+
+/*===========================================================================*
+ *				alloc_inode				     *
+ *===========================================================================*/
+PUBLIC struct inode *alloc_inode(dev_t dev, mode_t bits)
+{
+/* Allocate a free inode on 'dev', and return a pointer to it. */
+
+  register struct inode *rip;
+  bit_t b;
+  ino_t i_num;
+
+  b = alloc_bit();
+  if (b == NO_BIT) {
+	err_code = ENOSPC;
+	printf("PipeFS is out of inodes\n");
+	return(NULL);
+  }
+  i_num = (ino_t) b;
+
+
+  /* Try to acquire a slot in the inode table. */
+  if ((rip = get_inode(dev, i_num)) == NULL) {
+	/* No inode table slots available.  Free the inode if just allocated.*/
+	if (dev == NO_DEV) free_bit(b);
+  } else {
+	/* An inode slot is available. */
+
+	rip->i_mode = bits;		/* set up RWX bits */
+	rip->i_nlinks = NO_LINK;	/* initial no links */
+	rip->i_uid = caller_uid;	/* file's uid is owner's */
+	rip->i_gid = caller_gid;	/* ditto group id */
+
+	/* Fields not cleared already are cleared in wipe_inode().  They have
+	 * been put there because truncate() needs to clear the same fields if
+	 * the file happens to be open while being truncated.  It saves space
+	 * not to repeat the code twice.
+	 */
+	wipe_inode(rip);
+  }
+
+  return(rip);
+}
+
+
+/*===========================================================================*
+ *				wipe_inode				     *
+ *===========================================================================*/
+PUBLIC void wipe_inode(rip)
+struct inode *rip;	/* the inode to be erased */
+{
+/* Erase some fields in the inode.  This function is called from alloc_inode()
+ * when a new inode is to be allocated, and from truncate(), when an existing
+ * inode is to be truncated.
+ */
+
+  rip->i_size = 0;
+  rip->i_update = ATIME | CTIME | MTIME;	/* update all times later */
+}
+
+
+/*===========================================================================*
+ *				free_inode				     *
+ *===========================================================================*/
+PUBLIC void free_inode(rip)
+struct inode *rip;
+{
+/* Return an inode to the pool of unallocated inodes. */
+
+  bit_t b;
+
+  if (rip->i_num <= (ino_t) 0 || rip->i_num >= (ino_t) NR_INODES) return;
+  b = (bit_t) rip->i_num;
+  free_bit(b);
+}
+
+
+/*===========================================================================*
+ *				update_times				     *
+ *===========================================================================*/
+PUBLIC void update_times(rip)
+struct inode *rip;	/* pointer to inode to be read/written */
+{
+/* Various system calls are required by the standard to update atime, ctime,
+ * or mtime.  Since updating a time requires sending a message to the clock
+ * task--an expensive business--the times are marked for update by setting
+ * bits in i_update.  When a stat, fstat, or sync is done, or an inode is
+ * released, update_times() may be called to actually fill in the times.
+ */
+
+  time_t cur_time;
+
+  cur_time = clock_time();
+  if (rip->i_update & ATIME) rip->i_atime = cur_time;
+  if (rip->i_update & CTIME) rip->i_ctime = cur_time;
+  if (rip->i_update & MTIME) rip->i_mtime = cur_time;
+  rip->i_update = 0;		/* they are all up-to-date now */
+}
diff --git a/servers/apfs/inode.h b/servers/apfs/inode.h
new file mode 100644
index 000000000..19e582593
--- /dev/null
+++ b/servers/apfs/inode.h
@@ -0,0 +1,39 @@
+#ifndef __PFS_INODE_H__
+#define __PFS_INODE_H__
+
+/* Inode table.  This table holds inodes that are currently in use.
+ */
+
+#include <sys/queue.h>
+
+EXTERN struct inode {
+  mode_t i_mode;		/* file type, protection, etc. */
+  nlink_t i_nlinks;		/* how many links to this file */
+  uid_t i_uid;			/* user id of the file's owner */
+  gid_t i_gid;			/* group number */
+  off_t i_size;			/* current file size in bytes */
+  time_t i_atime;		/* time of last access (V2 only) */
+  time_t i_mtime;		/* when was file data last changed */
+  time_t i_ctime;		/* when was inode itself changed (V2 only)*/
+
+  /* The following items are not present on the disk. */
+  dev_t i_dev;			/* which device is the inode on */
+  dev_t i_rdev;			/* which special device is the inode on */
+  ino_t i_num;			/* inode number on its (minor) device */
+  int i_count;			/* # times inode used; 0 means slot is free */
+  char i_update;		/* the ATIME, CTIME, and MTIME bits are here */
+
+  LIST_ENTRY(inode) i_hash;     /* hash list */
+  TAILQ_ENTRY(inode) i_unused;  /* free and unused list */
+
+
+} inode[NR_INODES];
+
+/* list of unused/free inodes */
+EXTERN TAILQ_HEAD(unused_inodes_t, inode)  unused_inodes;
+
+/* inode hashtable */
+EXTERN LIST_HEAD(inodelist, inode)         hash_inodes[INODE_HASH_SIZE];
+
+
+#endif
diff --git a/servers/apfs/link.c b/servers/apfs/link.c
new file mode 100644
index 000000000..4ec064ef9
--- /dev/null
+++ b/servers/apfs/link.c
@@ -0,0 +1,50 @@
+#include "fs.h"
+#include "buf.h"
+#include "inode.h"
+#include <minix/vfsif.h>
+
+/*===========================================================================*
+ *				fs_ftrunc				     *
+ *===========================================================================*/
+PUBLIC int fs_ftrunc(message *fs_m_in, message *fs_m_out)
+{
+  struct inode *rip;
+  off_t start, end;
+  ino_t inumb;
+
+  inumb = (ino_t) fs_m_in->REQ_INODE_NR;
+
+  if( (rip = find_inode(inumb)) == NULL) return(EINVAL);
+
+  start = fs_m_in->REQ_TRC_START_LO;
+  end = fs_m_in->REQ_TRC_END_LO;
+
+  return truncate_inode(rip, start);
+}
+
+
+/*===========================================================================*
+ *				truncate_inode				     *
+ *===========================================================================*/
+PUBLIC int truncate_inode(rip, newsize)
+register struct inode *rip;	/* pointer to inode to be truncated */
+off_t newsize;			/* inode must become this size */
+{
+/* Set inode to a certain size, freeing any zones no longer referenced
+ * and updating the size in the inode. If the inode is extended, the
+ * extra space is a hole that reads as zeroes.
+ *
+ * Nothing special has to happen to file pointers if inode is opened in
+ * O_APPEND mode, as this is different per fd and is checked when
+ * writing is done.
+ */
+
+  /* Pipes can shrink, so adjust size to make sure all zones are removed. */
+  if(newsize != 0) return(EINVAL);	/* Only truncate pipes to 0. */
+  rip->i_size = newsize;
+
+  /* Next correct the inode size. */
+  wipe_inode(rip);	/* Pipes can only be truncated to 0. */
+
+  return(OK);
+}
diff --git a/servers/apfs/main.c b/servers/apfs/main.c
new file mode 100644
index 000000000..2f9f4d4ed
--- /dev/null
+++ b/servers/apfs/main.c
@@ -0,0 +1,187 @@
+#include "fs.h"
+#include <assert.h>
+#include <signal.h>
+#include <minix/dmap.h>
+#include <minix/driver.h>
+#include <minix/endpoint.h>
+#include <minix/vfsif.h>
+#include "buf.h"
+#include "inode.h"
+#include "uds.h"
+
+FORWARD _PROTOTYPE(void get_work, (message *m_in)			);
+
+/* SEF functions and variables. */
+FORWARD _PROTOTYPE( void sef_local_startup, (void) );
+FORWARD _PROTOTYPE( int sef_cb_init_fresh, (int type, sef_init_info_t *info) );
+FORWARD _PROTOTYPE( void sef_cb_signal_handler, (int signo) );
+
+/*===========================================================================*
+ *				main                                         *
+ *===========================================================================*/
+PUBLIC int main(int argc, char *argv[])
+{
+/* This is the main routine of this service. The main loop consists of
+ * three major activities: getting new work, processing the work, and
+ * sending the reply. The loop never terminates, unless a panic occurs.
+ */
+  int ind, do_reply, transid;
+  message pfs_m_in;
+  message pfs_m_out;
+
+  /* SEF local startup. */
+  env_setargs(argc, argv);
+  sef_local_startup();
+
+  printf("Started APFS\n");
+  while(!unmountdone || !exitsignaled) {
+	endpoint_t src;
+
+	do_reply = 1;
+	/* Wait for request message. */
+	get_work(&pfs_m_in);
+
+	transid = TRNS_GET_ID(pfs_m_in.m_type);
+	pfs_m_in.m_type = TRNS_DEL_ID(pfs_m_in.m_type);
+	if (pfs_m_in.m_type == 0) {
+		assert(!IS_VFS_FS_TRANSID(transid));
+		pfs_m_in.m_type = transid;
+		transid = 0;
+	} else
+		assert(IS_VFS_FS_TRANSID(transid) || transid == 0);
+
+	src = pfs_m_in.m_source;
+	caller_uid = INVAL_UID;	/* To trap errors */
+	caller_gid = INVAL_GID;
+	req_nr = pfs_m_in.m_type;
+
+	if (IS_DEV_RQ(req_nr)) {
+		ind = req_nr - DEV_RQ_BASE;
+		if (ind < 0 || ind >= DEV_CALL_VEC_SIZE) {
+			printf("pfs: bad DEV request %d\n", req_nr);
+			pfs_m_out.m_type = EINVAL;
+		} else {
+			int result;
+			result = (*dev_call_vec[ind])(&pfs_m_in, &pfs_m_out);
+			if (pfs_m_out.REP_STATUS == SUSPEND ||
+			    result == SUSPEND) {
+				/* Nothing to tell, so not replying */
+				do_reply = 0;
+			}
+		}
+	} else if (IS_VFS_RQ(req_nr)) {
+		ind = req_nr - VFS_BASE;
+		if (ind < 0 || ind >= FS_CALL_VEC_SIZE) {
+			printf("pfs: bad FS request %d\n", req_nr);
+			pfs_m_out.m_type = EINVAL;
+		} else {
+			pfs_m_out.m_type =
+				(*fs_call_vec[ind])(&pfs_m_in, &pfs_m_out);
+		}
+	} else {
+		printf("pfs: bad request %d\n", req_nr);
+		pfs_m_out.m_type = EINVAL;
+	}
+
+	if (do_reply) {
+		if (IS_VFS_RQ(req_nr) && IS_VFS_FS_TRANSID(transid)) {
+			pfs_m_out.m_type = TRNS_ADD_ID(pfs_m_out.m_type,
+							transid);
+		}
+		reply(src, &pfs_m_out);
+	}
+  }
+  return(OK);
+}
+
+/*===========================================================================*
+ *			       sef_local_startup			     *
+ *===========================================================================*/
+PRIVATE void sef_local_startup()
+{
+  /* Register init callbacks. */
+  sef_setcb_init_fresh(sef_cb_init_fresh);
+  sef_setcb_init_restart(sef_cb_init_fail);
+
+  /* No live update support for now. */
+
+  /* Register signal callbacks. */
+  sef_setcb_signal_handler(sef_cb_signal_handler);
+
+  /* Let SEF perform startup. */
+  sef_startup();
+}
+
+/*===========================================================================*
+ *		            sef_cb_init_fresh                                *
+ *===========================================================================*/
+PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info)
+{
+/* Initialize the pipe file server. */
+  int i;
+
+  /* Initialize main loop parameters. */
+  exitsignaled = 0;	/* No exit request seen yet. */
+  busy = 0;		/* Server is not 'busy' (i.e., inodes in use). */
+
+  /* Init inode table */
+  for (i = 0; i < NR_INODES; ++i) {
+	inode[i].i_count = 0;
+  }
+
+  init_inode_cache();
+  uds_init();
+
+  SELF_E = getprocnr();
+  buf_pool();
+
+  driver_announce();
+
+  return(OK);
+}
+
+/*===========================================================================*
+ *		           sef_cb_signal_handler                             *
+ *===========================================================================*/
+PRIVATE void sef_cb_signal_handler(int signo)
+{
+  /* Only check for termination signal, ignore anything else. */
+  if (signo != SIGTERM) return;
+
+
+  exitsignaled = 1;
+}
+
+/*===========================================================================*
+ *				get_work				     *
+ *===========================================================================*/
+PRIVATE void get_work(m_in)
+message *m_in;				/* pointer to message */
+{
+  int r, srcok = 0, status;
+  endpoint_t src;
+
+  do {
+	/* wait for a message */
+	if ((r = sef_receive_status(ANY, m_in, &status)) != OK)
+		panic("sef_receive_status failed: %d", r);
+	src = m_in->m_source;
+
+	if(src == VFS_PROC_NR) {
+		srcok = 1;		/* Normal FS request. */
+	} else
+		printf("PFS: unexpected source %d\n", src);
+  } while(!srcok);
+}
+
+
+/*===========================================================================*
+ *				reply					     *
+ *===========================================================================*/
+PUBLIC void reply(who, m_out)
+endpoint_t who;
+message *m_out;                       	/* report result */
+{
+  if (OK != send(who, m_out))	/* send the message */
+	printf("PFS(%d) was unable to send reply\n", SELF_E);
+}
diff --git a/servers/apfs/misc.c b/servers/apfs/misc.c
new file mode 100644
index 000000000..2f8e3010d
--- /dev/null
+++ b/servers/apfs/misc.c
@@ -0,0 +1,12 @@
+#include "fs.h"
+
+
+/*===========================================================================*
+ *				fs_sync					     *
+ *===========================================================================*/
+PUBLIC int fs_sync(message *fs_m_in, message *fs_m_out)
+{
+/* Perform the sync() system call.  No-op on this FS. */
+
+  return(OK);		/* sync() can't fail */
+}
diff --git a/servers/apfs/mount.c b/servers/apfs/mount.c
new file mode 100644
index 000000000..61fbac78e
--- /dev/null
+++ b/servers/apfs/mount.c
@@ -0,0 +1,18 @@
+#include "fs.h"
+#include "glo.h"
+
+
+/*===========================================================================*
+ *				fs_unmount				     *
+ *===========================================================================*/
+PUBLIC int fs_unmount(message *fs_m_in, message *fs_m_out)
+{
+/* Unmount Pipe File Server. */
+
+  if (busy) return(EBUSY);	/* can't umount a busy file system */
+
+  /* Finish off the unmount. */
+  unmountdone = TRUE;
+
+  return(OK);
+}
diff --git a/servers/apfs/open.c b/servers/apfs/open.c
new file mode 100644
index 000000000..a7e275757
--- /dev/null
+++ b/servers/apfs/open.c
@@ -0,0 +1,52 @@
+#include "fs.h"
+#include <sys/stat.h>
+#include "buf.h"
+#include "inode.h"
+#include <minix/vfsif.h>
+
+
+/*===========================================================================*
+ *				fs_newnode				     *
+ *===========================================================================*/
+PUBLIC int fs_newnode(message *fs_m_in, message *fs_m_out)
+{
+  register int r = OK;
+  mode_t bits;
+  struct inode *rip;
+  dev_t dev;
+
+  caller_uid = (uid_t) fs_m_in->REQ_UID;
+  caller_gid = (gid_t) fs_m_in->REQ_GID;
+  bits = (mode_t) fs_m_in->REQ_MODE;
+  dev = (dev_t) fs_m_in->REQ_DEV;
+
+  /* Try to allocate the inode */
+  if( (rip = alloc_inode(dev, bits) ) == NULL) return(err_code);
+
+  switch (bits & S_IFMT) {
+	case S_IFBLK:
+	case S_IFCHR:
+		rip->i_rdev = dev;		/* Major/minor dev numbers */
+		break;
+	case S_IFIFO:
+		if ((get_block(dev, rip->i_num)) == NULL)
+			r = EIO;
+		break;
+	default:
+		r = EIO; /* Unsupported file type */
+  }
+
+  if (r != OK) {
+	free_inode(rip);
+  } else {
+	/* Fill in the fields of the response message */
+	fs_m_out->RES_INODE_NR = rip->i_num;
+	fs_m_out->RES_MODE = rip->i_mode;
+	fs_m_out->RES_FILE_SIZE_LO = rip->i_size;
+	fs_m_out->RES_UID = rip->i_uid;
+	fs_m_out->RES_GID = rip->i_gid;
+	fs_m_out->RES_DEV = dev;
+  }
+
+  return(r);
+}
diff --git a/servers/apfs/proto.h b/servers/apfs/proto.h
new file mode 100644
index 000000000..fe28b2895
--- /dev/null
+++ b/servers/apfs/proto.h
@@ -0,0 +1,104 @@
+#ifndef __PFS_PROTO_H__
+#define __PFS_PROTO_H__
+
+/* Function prototypes. */
+
+/* Structs used in prototypes must be declared as such first. */
+struct buf;
+struct inode;
+struct sockaddr_un;
+struct ancillary;
+
+/* buffer.c */
+_PROTOTYPE( struct buf *get_block, (dev_t dev, ino_t inum)		);
+_PROTOTYPE( void put_block, (dev_t dev, ino_t inum)			);
+
+/* cache.c */
+_PROTOTYPE( void buf_pool, (void)					);
+
+/* inode.c */
+_PROTOTYPE( struct inode *alloc_inode, (dev_t dev, mode_t mode)		);
+_PROTOTYPE( void dup_inode, (struct inode *ip)				);
+_PROTOTYPE( struct inode *find_inode, (ino_t numb)			);
+_PROTOTYPE( void free_inode, (struct inode *rip)			);
+_PROTOTYPE( int fs_putnode, (message *fs_m_in, message *fs_m_out)	);
+_PROTOTYPE( void init_inode_cache, (void)				);
+_PROTOTYPE( struct inode *get_inode, (dev_t dev, ino_t numb)		);
+_PROTOTYPE( void put_inode, (struct inode *rip)				);
+_PROTOTYPE( void update_times, (struct inode *rip)			);
+_PROTOTYPE( void wipe_inode, (struct inode *rip)			);
+
+/* link.c */
+_PROTOTYPE( int fs_ftrunc, (message *fs_m_in, message *fs_m_out)	);
+_PROTOTYPE( int truncate_inode, (struct inode *rip, off_t newsize)	);
+
+
+/* main.c */
+_PROTOTYPE( void reply, (endpoint_t who, message *m_out)		);
+
+/* misc.c */
+_PROTOTYPE( int fs_sync, (message *fs_m_in, message *fs_m_out)		);
+
+/* mount.c */
+_PROTOTYPE( int fs_unmount, (message *fs_m_in, message *fs_m_out)	);
+
+/* open.c */
+_PROTOTYPE( int fs_newnode, (message *fs_m_in, message *fs_m_out)	);
+
+/* read.c */
+_PROTOTYPE( int fs_readwrite, (message *fs_m_in, message *fs_m_out)	);
+
+/* utility.c */
+_PROTOTYPE( time_t clock_time, (void)					);
+_PROTOTYPE( int no_sys, (message *pfs_m_in, message *pfs_m_out)		);
+
+/* stadir.c */
+_PROTOTYPE( int fs_stat, (message *fs_m_in, message *fs_m_out)		);
+
+/* super.c */
+_PROTOTYPE( bit_t alloc_bit, (void)					);
+_PROTOTYPE( void free_bit, (bit_t bit_returned)				);
+
+/* dev_uds.c */
+_PROTOTYPE( int uds_open, (message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int uds_close, (message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int uds_read, (message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int uds_write, (message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int uds_ioctl, (message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int uds_select, (message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int uds_unsuspend, (endpoint_t m_source, int minor)		);
+_PROTOTYPE( int uds_cancel, (message *dev_m_in, message *dev_m_out)	);
+
+/* uds.c */
+_PROTOTYPE( void uds_init, (void)					);
+_PROTOTYPE( int do_accept, (message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int do_connect, (message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int do_listen, (message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int do_socket, (message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int do_bind, (message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int do_getsockname, (message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int do_getpeername, (message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int do_shutdown, (message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int do_socketpair, (message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int do_getsockopt_sotype,
+				(message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int do_getsockopt_peercred,
+				(message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int do_getsockopt_sndbuf,
+				(message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int do_setsockopt_sndbuf,
+				(message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int do_getsockopt_rcvbuf,
+				(message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int do_setsockopt_rcvbuf,
+				(message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int do_sendto, (message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int do_recvfrom, (message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int do_sendmsg, (message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int do_recvmsg, (message *dev_m_in, message *dev_m_out)	);
+_PROTOTYPE( int perform_connection,
+				(message *dev_m_in, message *dev_m_out,
+				struct sockaddr_un *addr, int minorx,
+				int minory)				);
+_PROTOTYPE( int clear_fds, (int minor, struct ancillary *data)		);
+#endif
diff --git a/servers/apfs/read.c b/servers/apfs/read.c
new file mode 100644
index 000000000..b4f06b846
--- /dev/null
+++ b/servers/apfs/read.c
@@ -0,0 +1,89 @@
+#include "fs.h"
+#include "buf.h"
+#include <minix/com.h>
+#include "inode.h"
+
+
+/*===========================================================================*
+ *				fs_readwrite				     *
+ *===========================================================================*/
+PUBLIC int fs_readwrite(message *fs_m_in, message *fs_m_out)
+{
+  int r, rw_flag;
+  struct buf *bp;
+  cp_grant_id_t gid;
+  off_t position, f_size;
+  unsigned int nrbytes, cum_io;
+  mode_t mode_word;
+  struct inode *rip;
+  ino_t inumb;
+
+  r = OK;
+  cum_io = 0;
+  inumb = (ino_t) fs_m_in->REQ_INODE_NR;
+
+  /* Find the inode referred */
+  if ((rip = find_inode(inumb)) == NULL) return(EINVAL);
+
+  mode_word = rip->i_mode & I_TYPE;
+  if (mode_word != I_NAMED_PIPE) return(EIO);
+  f_size = rip->i_size;
+
+  /* Get the values from the request message */
+  rw_flag = (fs_m_in->m_type == REQ_READ ? READING : WRITING);
+  gid = (cp_grant_id_t) fs_m_in->REQ_GRANT;
+  position = fs_m_in->REQ_SEEK_POS_LO;
+  nrbytes = (unsigned) fs_m_in->REQ_NBYTES;
+
+  /* We can't read beyond the max file position */
+  if (nrbytes > MAX_FILE_POS) return(EFBIG);
+
+  if (rw_flag == WRITING) {
+	  /* Check in advance to see if file will grow too big. */
+	  /* Casting nrbytes to signed is safe, because it's guaranteed not to
+	     be beyond max signed value (i.e., MAX_FILE_POS). */
+	  if (position > PIPE_BUF - (signed) nrbytes) return(EFBIG);
+  }
+
+  /* Mark inode in use */
+  if ((get_inode(rip->i_dev, rip->i_num)) == NULL) return(err_code);
+  if ((bp = get_block(rip->i_dev, rip->i_num)) == NULL) return(err_code);
+
+  if (rw_flag == READING) {
+	/* Copy a chunk from the block buffer to user space. */
+	r = sys_safecopyto(VFS_PROC_NR, gid, (vir_bytes) 0,
+		(vir_bytes) (bp->b_data+position), (size_t) nrbytes, D);
+  } else {
+	/* Copy a chunk from user space to the block buffer. */
+	r = sys_safecopyfrom(VFS_PROC_NR, gid, (vir_bytes) 0,
+		(vir_bytes) (bp->b_data+position), (size_t) nrbytes, D);
+  }
+
+  if (r == OK) {
+	position += (signed) nrbytes; /* Update position */
+	cum_io += nrbytes;
+  }
+
+  fs_m_out->RES_SEEK_POS_LO = position; /* It might change later and the VFS
+					   has to know this value */
+
+  /* On write, update file size and access time. */
+  if (rw_flag == WRITING) {
+	  if (position > f_size) rip->i_size = position;
+  } else {
+	if(position >= rip->i_size) {
+		/* All data in the pipe is read, so reset pipe pointers */
+		rip->i_size = 0;	/* no data left */
+		position = 0;		/* reset reader(s) */
+	}
+  }
+
+  bp->b_bytes = position;
+  if (rw_flag == READING) rip->i_update |= ATIME;
+  if (rw_flag == WRITING) rip->i_update |= CTIME | MTIME;
+  fs_m_out->RES_NBYTES = (size_t) cum_io;
+  put_inode(rip);
+  put_block(rip->i_dev, rip->i_num);
+
+  return(r);
+}
diff --git a/servers/apfs/stadir.c b/servers/apfs/stadir.c
new file mode 100644
index 000000000..7a49caff5
--- /dev/null
+++ b/servers/apfs/stadir.c
@@ -0,0 +1,70 @@
+#include "fs.h"
+#include "inode.h"
+#include <string.h>
+#include <sys/stat.h>
+
+
+/*===========================================================================*
+ *				stat_inode				     *
+ *===========================================================================*/
+PRIVATE int stat_inode(
+  register struct inode *rip,	/* pointer to inode to stat */
+  endpoint_t who_e,		/* Caller endpoint */
+  cp_grant_id_t gid		/* grant for the stat buf */
+)
+{
+/* Common code for stat and fstat system calls. */
+  mode_t type;
+  struct stat statbuf;
+  u32_t blocks; /* The unit of this is 512 */
+  int r, s;
+
+  type = rip->i_mode & I_TYPE;
+  s = (type == I_CHAR_SPECIAL || type == I_BLOCK_SPECIAL);
+
+  /* Update the atime, ctime, and mtime fields in the inode, if need be. */
+  if (rip->i_update) update_times(rip);
+
+  blocks = rip->i_size / S_BLKSIZE;
+  if (rip->i_size % S_BLKSIZE != 0)
+	blocks += 1;
+
+  memset(&statbuf, 0, sizeof(struct stat));
+
+  statbuf.st_dev = rip->i_dev;
+  statbuf.st_ino = rip->i_num;
+  statbuf.st_mode = rip->i_mode;
+  statbuf.st_nlink = rip->i_nlinks;
+  statbuf.st_uid = rip->i_uid;
+  statbuf.st_gid = (short int) rip->i_gid;
+  statbuf.st_rdev = (dev_t) (s ? rip->i_rdev : NO_DEV);
+  statbuf.st_size = rip->i_size;
+  if (!s)  statbuf.st_mode &= ~I_REGULAR;/* wipe out I_REGULAR bit for pipes */
+  statbuf.st_atime = rip->i_atime;
+  statbuf.st_mtime = rip->i_mtime;
+  statbuf.st_ctime = rip->i_ctime;
+  statbuf.st_blksize = PIPE_BUF;
+  statbuf.st_blocks = blocks;
+
+  /* Copy the struct to user space. */
+  r = sys_safecopyto(who_e, gid, (vir_bytes) 0, (vir_bytes) &statbuf,
+		(size_t) sizeof(statbuf), D);
+
+  return(r);
+}
+
+
+/*===========================================================================*
+ *                             fs_stat					     *
+ *===========================================================================*/
+PUBLIC int fs_stat(message *fs_m_in, message *fs_m_out)
+{
+  register int r;              /* return value */
+  register struct inode *rip;  /* target inode */
+
+  if( (rip = find_inode(fs_m_in->REQ_INODE_NR)) == NULL) return(EINVAL);
+  get_inode(rip->i_dev, rip->i_num);	/* mark inode in use */
+  r = stat_inode(rip, fs_m_in->m_source, (cp_grant_id_t) fs_m_in->REQ_GRANT);
+  put_inode(rip);			/* release the inode */
+  return(r);
+}
diff --git a/servers/apfs/super.c b/servers/apfs/super.c
new file mode 100644
index 000000000..50f4e0ba5
--- /dev/null
+++ b/servers/apfs/super.c
@@ -0,0 +1,75 @@
+/* This file manages the super block table and the related data structures,
+ * namely, the bit maps that keep track of which zones and which inodes are
+ * allocated and which are free.  When a new inode or zone is needed, the
+ * appropriate bit map is searched for a free entry.
+ *
+ * The entry points into this file are
+ *   alloc_bit:       somebody wants to allocate a zone or inode; find one
+ *   free_bit:        indicate that a zone or inode is available for allocation
+ */
+
+#include "fs.h"
+#include "buf.h"
+#include "inode.h"
+#include "const.h"
+
+
+/*===========================================================================*
+ *				alloc_bit				     *
+ *===========================================================================*/
+PUBLIC bit_t alloc_bit(void)
+{
+/* Allocate a bit from a bit map and return its bit number. */
+  bitchunk_t *wptr, *wlim;
+  bit_t b;
+  unsigned int i, bcount;
+
+  bcount = FS_BITMAP_CHUNKS(NR_INODES); /* Inode map has this many chunks. */
+  wlim = &inodemap[bcount]; /* Point to last chunk in inodemap. */
+
+  for (wptr = &inodemap[0]; wptr < wlim; wptr++) {
+	/* Does this word contain a free bit? */
+	if (*wptr == (bitchunk_t) ~0) continue; /* No. Go to next word */
+
+	/* Find and allocate the free bit. */
+	for (i = 0; (*wptr & (1 << i)) != 0; ++i) {}
+
+	/* Get inode number */
+	b = (bit_t) ((wptr - &inodemap[0]) * FS_BITCHUNK_BITS + i);
+
+	/* Don't allocate bits beyond end of map. */
+	if (b >= NR_INODES) break;
+
+	/* Allocate and return bit number. */
+	*wptr |= 1 << i;
+
+	/* Mark server 'busy' */
+	busy++;
+	return(b);
+  }
+
+  return(NO_BIT);			/* no bit could be allocated */
+}
+
+
+/*===========================================================================*
+ *				free_bit				     *
+ *===========================================================================*/
+PUBLIC void free_bit(bit_returned)
+bit_t bit_returned;		/* number of bit to insert into the inode map*/
+{
+  bitchunk_t *k, mask;
+  bit_t bit;
+  unsigned word;
+
+  /* Get word offset and bit within offset */
+  word = (unsigned) (bit_returned / (bit_t) FS_BITCHUNK_BITS);
+  bit = bit_returned % (bit_t) FS_BITCHUNK_BITS;
+
+  /* Unset bit */
+  k = &inodemap[word];
+  mask = (unsigned) 1 << bit;
+  *k &= ~mask;
+
+  busy--; /* One inode less in use. */
+}
diff --git a/servers/apfs/table.c b/servers/apfs/table.c
new file mode 100644
index 000000000..760e5b21e
--- /dev/null
+++ b/servers/apfs/table.c
@@ -0,0 +1,82 @@
+
+/* This file contains the table used to map system call numbers onto the
+ * routines that perform them.
+ */
+
+#define _TABLE
+
+#include "fs.h"
+#include "inode.h"
+#include "buf.h"
+#include "uds.h"
+
+/* File System Handlers (pfs) */
+PUBLIC _PROTOTYPE (int (*fs_call_vec[]),
+				(message *fs_m_in, message *fs_m_out) ) = {
+
+        no_sys,             /* 0   not used */
+        no_sys,             /* 1   */
+        fs_putnode,         /* 2   */
+        no_sys,             /* 3   */
+        fs_ftrunc,          /* 4   */
+        no_sys,             /* 5   */
+	no_sys,             /* 6   */
+        no_sys,             /* 7   */
+        fs_stat,            /* 8   */
+        no_sys,             /* 9   */
+        no_sys,             /* 10  */
+        no_sys,             /* 11  */
+        no_sys,             /* 12  */
+        no_sys,	            /* 13  */
+        no_sys,             /* 14  */
+        fs_unmount,         /* 15  */
+	fs_sync,            /* 16  */
+        no_sys,             /* 17  */
+        no_sys,	            /* 18  */
+        fs_readwrite,	    /* 19  */
+        fs_readwrite,	    /* 20  */
+        no_sys,             /* 21  */
+        no_sys,             /* 22  */
+        no_sys,             /* 23  */
+        no_sys,             /* 24  */
+        no_sys,             /* 25  */
+        no_sys,             /* 26  */
+        no_sys,             /* 27  */
+        no_sys,	            /* 28  */
+        fs_newnode,	    /* 29  */
+        no_sys,	            /* 30  */
+        no_sys,	            /* 31  */
+	no_sys,             /* 32 */
+};
+
+/* Device Handlers (/dev/uds) */
+PUBLIC _PROTOTYPE (int (*dev_call_vec[]),
+				(message *dev_m_in, message *dev_m_out) ) = {
+
+        uds_cancel,         /* 0  CANCEL */
+        no_sys,             /* 1   */
+        no_sys,             /* 2   */
+        no_sys,             /* 3   */
+        no_sys,             /* 4   */
+        no_sys,             /* 5   */
+	uds_open,           /* 6  DEV_OPEN */
+        uds_close,          /* 7  DEV_CLOSE */
+        no_sys,             /* 8   */
+        no_sys,             /* 9   */
+        no_sys,             /* 10 TTY_SETPGRP */
+        no_sys,             /* 11 TTY_EXIT */
+        uds_select,         /* 12 DEV_SELECT */
+        no_sys,             /* 13 DEV_STATUS */
+        uds_open,           /* 14 DEV_REOPEN */
+        no_sys,             /* 15  */
+	no_sys,             /* 16  */
+        no_sys,             /* 17  */
+        no_sys,	            /* 18  */
+        no_sys,		    /* 19  */
+        uds_read,	    /* 20 DEV_READ_S */
+        uds_write,          /* 21 DEV_WRITE_S */
+        no_sys,             /* 22 DEV_SCATTER_S */
+        no_sys,             /* 23 DEV_GATHER_S */
+        uds_ioctl,          /* 24 DEV_IOCTL_S */
+        no_sys,             /* 25 DEV_MMAP_S */
+};
diff --git a/servers/apfs/uds.c b/servers/apfs/uds.c
new file mode 100644
index 000000000..fed57afdf
--- /dev/null
+++ b/servers/apfs/uds.c
@@ -0,0 +1,1528 @@
+/*
+ * Unix Domain Sockets Implementation (PF_UNIX, PF_LOCAL)
+ * This code handles ioctl(2) commands to implement the socket API.
+ * Some helper functions are also present.
+ *
+ * The entry points into this file are...
+ *
+ *   uds_init:               initialize the descriptor table.
+ *   do_accept:              handles the      accept(2) syscall.
+ *   do_connect:             handles the     connect(2) syscall.
+ *   do_listen:              handles the      listen(2) syscall.
+ *   do_socket:              handles the      socket(2) syscall.
+ *   do_bind:                handles the        bind(2) syscall.
+ *   do_getsockname:         handles the getsockname(2) syscall.
+ *   do_getpeername:         handles the getpeername(2) syscall.
+ *   do_shutdown:            handles the    shutdown(2) syscall.
+ *   do_socketpair:          handles the  socketpair(2) syscall.
+ *   do_getsockopt_sotype:   handles the  getsockopt(2) syscall.
+ *   do_getsockopt_peercred: handles the  getsockopt(2) syscall.
+ *   do_getsockopt_sndbuf:   handles the  getsockopt(2) syscall.
+ *   do_setsockopt_sndbuf:   handles the  setsockopt(2) syscall.
+ *   do_getsockopt_rcvbuf:   handles the  getsockopt(2) syscall.
+ *   do_setsockopt_rcvbuf:   handles the  setsockopt(2) syscall.
+ *   do_sendto:              handles the      sendto(2) syscall.
+ *   do_recvfrom:            handles the    recvfrom(2) syscall.
+ *   do_sendmsg:             handles the     sendmsg(2) syscall.
+ *   do_recvmsg:             handles the     recvmsg(2) syscall.
+ *   perform_connection:     performs the connection of two descriptors.
+ *   clear_fds:              calls put_filp for undelivered FDs.
+ *
+ * Also see...
+ *
+ *   table.c, dev_uds.c, uds.h
+ */
+
+#define DEBUG 0
+
+#include "inc.h"
+#include "const.h"
+#include "glo.h"
+#include "uds.h"
+
+/* File Descriptor Table */
+uds_fd_t uds_fd_table[NR_FDS];
+
+/* initialize the descriptor table */
+PUBLIC void uds_init(void)
+{
+	/*
+	 * Setting everything to NULL implicitly sets the
+	 * state to UDS_FREE.
+	 */
+	memset(uds_fd_table, '\0', sizeof(uds_fd_t) * NR_FDS);
+}
+
+/* check the permissions of a socket file */
+PRIVATE int check_perms(int minor, struct sockaddr_un *addr)
+{
+	int rc;
+	message vfs_m;
+	cp_grant_id_t grant_id;
+
+	grant_id = cpf_grant_direct(VFS_PROC_NR, (vir_bytes) addr->sun_path,
+					UNIX_PATH_MAX, CPF_READ | CPF_WRITE);
+
+	/* ask the VFS to verify the permissions */
+	memset(&vfs_m, '\0', sizeof(message));
+
+	vfs_m.m_type = PFS_REQ_CHECK_PERMS;
+	vfs_m.USER_ENDPT = uds_fd_table[minor].owner;
+	vfs_m.IO_GRANT = (char *) grant_id;
+	vfs_m.COUNT = UNIX_PATH_MAX;
+
+	rc = sendrec(VFS_PROC_NR, &vfs_m);
+	cpf_revoke(grant_id);
+	if (OK != rc) {
+                printf("(uds) sendrec error... req_nr: %d err: %d\n",
+			vfs_m.m_type, rc);
+
+		return EIO;
+	}
+
+#if DEBUG == 1
+	printf("(uds) VFS reply => %d\n", vfs_m.m_type);
+	printf("(uds) Canonical Path => %s\n", addr->sun_path);
+#endif
+
+	return vfs_m.m_type; /* return reply code OK, ELOOP, etc. */
+}
+
+PRIVATE filp_id_t verify_fd(endpoint_t ep, int fd)
+{
+	int rc;
+	message vfs_m;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) verify_fd(%d,%d) call_count=%d\n", ep, fd,
+							++call_count);
+#endif
+
+	memset(&vfs_m, '\0', sizeof(message));
+
+	vfs_m.m_type = PFS_REQ_VERIFY_FD;
+	vfs_m.USER_ENDPT = ep;
+	vfs_m.COUNT = fd;
+
+	rc = sendrec(VFS_PROC_NR, &vfs_m);
+	if (OK != rc) {
+                printf("(uds) sendrec error... req_nr: %d err: %d\n",
+			vfs_m.m_type, rc);
+		return NULL;
+	}
+
+#if DEBUG == 1
+	printf("(uds) VFS reply => %d\n", vfs_m.m_type);
+#endif
+
+	return vfs_m.ADDRESS;
+}
+
+PRIVATE int set_filp(filp_id_t sfilp)
+{
+	int rc;
+	message vfs_m;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) set_filp(0x%x) call_count=%d\n", sfilp, ++call_count);
+#endif
+
+	memset(&vfs_m, '\0', sizeof(message));
+
+	vfs_m.m_type = PFS_REQ_SET_FILP;
+	vfs_m.ADDRESS = sfilp;
+
+	rc = sendrec(VFS_PROC_NR, &vfs_m);
+	if (OK != rc) {
+                printf("(uds) sendrec error... req_nr: %d err: %d\n",
+			vfs_m.m_type, rc);
+		return EIO;
+	}
+
+#if DEBUG == 1
+	printf("(uds) VFS reply => %d\n", vfs_m.m_type);
+#endif
+	return vfs_m.m_type; /* return reply code OK, ELOOP, etc. */
+}
+
+PRIVATE int copy_filp(endpoint_t to_ep, filp_id_t cfilp)
+{
+	int rc;
+	message vfs_m;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) copy_filp(%d, 0x%x) call_count=%d\n",to_ep, cfilp,
+							++call_count);
+#endif
+
+	memset(&vfs_m, '\0', sizeof(message));
+
+	vfs_m.m_type = PFS_REQ_COPY_FILP;
+	vfs_m.USER_ENDPT = to_ep;
+	vfs_m.ADDRESS = cfilp;
+
+	rc = sendrec(VFS_PROC_NR, &vfs_m);
+	if (OK != rc) {
+                printf("(uds) sendrec error... req_nr: %d err: %d\n",
+			vfs_m.m_type, rc);
+		return EIO;
+	}
+
+#if DEBUG == 1
+	printf("(uds) VFS reply => %d\n", vfs_m.m_type);
+#endif
+	return vfs_m.m_type;
+}
+
+PRIVATE int put_filp(filp_id_t pfilp)
+{
+	int rc;
+	message vfs_m;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) put_filp(0x%x) call_count=%d\n", pfilp, ++call_count);
+#endif
+
+	memset(&vfs_m, '\0', sizeof(message));
+
+	vfs_m.m_type = PFS_REQ_PUT_FILP;
+	vfs_m.ADDRESS = pfilp;
+
+	rc = sendrec(VFS_PROC_NR, &vfs_m);
+	if (OK != rc) {
+                printf("(uds) sendrec error... req_nr: %d err: %d\n",
+			vfs_m.m_type, rc);
+		return EIO;
+	}
+
+#if DEBUG == 1
+	printf("(uds) VFS reply => %d\n", vfs_m.m_type);
+#endif
+	return vfs_m.m_type; /* return reply code OK, ELOOP, etc. */
+}
+
+PRIVATE int cancel_fd(endpoint_t ep, int fd)
+{
+	int rc;
+	message vfs_m;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) cancel_fd(%d,%d) call_count=%d\n", ep, fd, ++call_count);
+#endif
+
+	memset(&vfs_m, '\0', sizeof(message));
+
+	vfs_m.m_type = PFS_REQ_CANCEL_FD;
+	vfs_m.USER_ENDPT = ep;
+	vfs_m.COUNT = fd;
+
+	rc = sendrec(VFS_PROC_NR, &vfs_m);
+	if (OK != rc) {
+                printf("(uds) sendrec error... req_nr: %d err: %d\n",
+			vfs_m.m_type, rc);
+		return EIO;
+	}
+
+#if DEBUG == 1
+	printf("(uds) VFS reply => %d\n", vfs_m.m_type);
+#endif
+	return vfs_m.m_type; /* return reply code OK, ELOOP, etc. */
+}
+
+PUBLIC int perform_connection(message *dev_m_in, message *dev_m_out,
+			struct sockaddr_un *addr, int minorx, int minory)
+{
+	/* there are several places were a connection is established. */
+	/* accept(2), connect(2), uds_status(2), socketpair(2)        */
+	/* This is a helper function to make sure it is done in the   */
+	/* same way in each place with the same validation checks.    */
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] perform_connection() call_count=%d\n",
+					uds_minor(dev_m_in), ++call_count);
+#endif
+
+	/* only connection oriented types are acceptable and only like
+	 * types can connect to each other
+	 */
+	if ((uds_fd_table[minorx].type != SOCK_SEQPACKET &&
+		uds_fd_table[minorx].type != SOCK_STREAM) ||
+		uds_fd_table[minorx].type != uds_fd_table[minory].type) {
+
+		/* sockets are not in a valid state */
+		return EINVAL;
+	}
+
+	/* connect the pair of sockets */
+	uds_fd_table[minorx].peer = minory;
+	uds_fd_table[minory].peer = minorx;
+
+	/* Set the address of both sockets */
+	memcpy(&(uds_fd_table[minorx].addr), addr, sizeof(struct sockaddr_un));
+	memcpy(&(uds_fd_table[minory].addr), addr, sizeof(struct sockaddr_un));
+
+	return OK;
+}
+
+
+PUBLIC int do_accept(message *dev_m_in, message *dev_m_out)
+{
+	int minor;
+	int minorparent; /* minor number of parent (server) */
+	int minorpeer;
+	int rc, i;
+	struct sockaddr_un addr;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] do_accept() call_count=%d\n",
+					uds_minor(dev_m_in), ++call_count);
+#endif
+
+	/* Somewhat weird logic is used in this function, so here's an
+	 * overview... The minor number is the server's client socket
+	 * (the socket to be returned by accept()). The data waiting
+	 * for us in the IO Grant is the address that the server is
+	 * listening on. This function uses the address to find the
+	 * server's descriptor. From there we can perform the
+	 * connection or suspend and wait for a connect().
+	 */
+
+	minor = uds_minor(dev_m_in);
+
+	if (uds_fd_table[minor].type != -1) {
+		/* this IOCTL must be called on a 'fresh' socket */
+		return EINVAL;
+	}
+
+	/* Get the server's address */
+	rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+		(vir_bytes) 0, (vir_bytes) &addr, sizeof(struct sockaddr_un),
+		D);
+
+	if (rc != OK) {
+		return EIO;
+	}
+
+	/* locate server socket */
+	rc = -1; /* to trap error */
+
+	for (i = 0; i < NR_FDS; i++) {
+
+		if (uds_fd_table[i].addr.sun_family == AF_UNIX &&
+				!strncmp(addr.sun_path,
+				uds_fd_table[i].addr.sun_path,
+				UNIX_PATH_MAX) &&
+				uds_fd_table[i].listening == 1) {
+
+			rc = 0;
+			break;
+		}
+	}
+
+	if (rc == -1) {
+		/* there is no server listening on addr. Maybe someone
+		 * screwed up the ioctl()?
+		 */
+		return EINVAL;
+	}
+
+	minorparent = i; /* parent */
+
+	/* we are the parent's child */
+	uds_fd_table[minorparent].child = minor;
+
+	/* the peer has the same type as the parent. we need to be that
+	 * type too.
+	 */
+	uds_fd_table[minor].type = uds_fd_table[minorparent].type;
+
+	/* locate peer to accept in the parent's backlog */
+	minorpeer = -1; /* to trap error */
+	for (i = 0; i < uds_fd_table[minorparent].backlog_size; i++) {
+		if (uds_fd_table[minorparent].backlog[i] != -1) {
+			minorpeer = uds_fd_table[minorparent].backlog[i];
+			uds_fd_table[minorparent].backlog[i] = -1;
+			rc = 0;
+			break;
+		}
+	}
+
+	if (minorpeer == -1) {
+
+#if DEBUG == 1
+		printf("(uds) [%d] {do_accept} suspend\n", minor);
+#endif
+
+		/* there are no peers in the backlog, suspend and wait
+		 * for some to show up
+		 */
+		uds_fd_table[minor].suspended = UDS_SUSPENDED_ACCEPT;
+
+		return SUSPEND;
+	}
+
+#if DEBUG == 1
+	printf("(uds) [%d] connecting to %d -- parent is %d\n", minor,
+						minorpeer, minorparent);
+#endif
+
+	rc = perform_connection(dev_m_in, dev_m_out, &addr, minor, minorpeer);
+	if (rc != OK) {
+#if DEBUG == 1
+		printf("(uds) [%d] {do_accept} connection not performed\n",
+								minor);
+#endif
+		return rc;
+	}
+
+	uds_fd_table[minorparent].child = -1;
+
+	/* if peer is blocked on connect() revive peer */
+	if (uds_fd_table[minorpeer].suspended) {
+#if DEBUG == 1
+		printf("(uds) [%d] {do_accept} revive %d\n", minor,
+								minorpeer);
+#endif
+		uds_fd_table[minorpeer].ready_to_revive = 1;
+		uds_unsuspend(dev_m_in->m_source, minorpeer);
+	}
+
+	return OK;
+}
+
+PUBLIC int do_connect(message *dev_m_in, message *dev_m_out)
+{
+	int minor;
+	struct sockaddr_un addr;
+	int rc, i, j;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] do_connect() call_count=%d\n", uds_minor(dev_m_in),
+								++call_count);
+#endif
+
+	minor = uds_minor(dev_m_in);
+
+	/* only connection oriented sockets can connect */
+	if (uds_fd_table[minor].type != SOCK_STREAM &&
+			uds_fd_table[minor].type != SOCK_SEQPACKET) {
+		return EINVAL;
+	}
+
+	if (uds_fd_table[minor].peer != -1) {
+		/* socket is already connected */
+		return EISCONN;
+	}
+
+	rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+				(vir_bytes) 0, (vir_bytes) &addr,
+				sizeof(struct sockaddr_un), D);
+
+	if (rc != OK) {
+		return EIO;
+	}
+
+	rc = check_perms(minor, &addr);
+	if (rc != OK) {
+		/* permission denied, socket file doesn't exist, etc. */
+		return rc;
+	}
+
+	/* look for a socket of the same type that is listening on the
+	 * address we want to connect to
+	 */
+	for (i = 0; i < NR_FDS; i++) {
+
+		if (uds_fd_table[minor].type == uds_fd_table[i].type &&
+			uds_fd_table[i].listening &&
+			uds_fd_table[i].addr.sun_family == AF_UNIX &&
+			!strncmp(addr.sun_path, uds_fd_table[i].addr.sun_path,
+			UNIX_PATH_MAX)) {
+
+			if (uds_fd_table[i].child != -1) {
+
+				/* the server is blocked on accept(2) --
+				 * perform connection to the child
+				 */
+
+				rc = perform_connection(dev_m_in, dev_m_out,
+					&addr, minor, uds_fd_table[i].child);
+
+				if (rc == OK) {
+
+					uds_fd_table[i].child = -1;
+
+#if DEBUG == 1
+			printf("(uds) [%d] {do_connect} revive %d\n", minor, i);
+#endif
+
+					/* wake the parent (server) */
+					uds_fd_table[i].ready_to_revive = 1;
+					uds_unsuspend(dev_m_in->m_source, i);
+				}
+
+				return rc;
+
+			} else {
+
+#if DEBUG == 1
+				printf("(uds) [%d] adding to %d's backlog\n",
+								minor, i);
+#endif
+
+				/* tell the server were waiting to be served */
+
+				/* look for a free slot in the backlog */
+				rc = -1; /* to trap error */
+				for (j = 0; j < uds_fd_table[i].backlog_size;
+					j++) {
+
+					if (uds_fd_table[i].backlog[j] == -1) {
+
+						uds_fd_table[i].backlog[j] =
+							minor;
+
+						rc = 0;
+						break;
+					}
+				}
+
+				if (rc == -1) {
+
+					/* backlog is full */
+					break;
+				}
+
+				/* see if the server is blocked on select() */
+				if (uds_fd_table[i].selecting == 1) {
+
+					/* if the server wants to know
+					 * about data ready to read and
+					 * it doesn't know about it
+					 * already, then let the server
+					 * know we have data for it.
+					 */
+					if ((uds_fd_table[i].sel_ops_in &
+						SEL_RD) &&
+						!(uds_fd_table[i].sel_ops_out &
+						SEL_RD)) {
+
+						uds_fd_table[i].sel_ops_out |=
+							SEL_RD;
+						uds_fd_table[i].status_updated
+							= 1;
+
+						uds_unsuspend(
+						dev_m_in->m_source, i);
+					}
+				}
+
+				/* we found our server */
+				uds_fd_table[minor].peer = i;
+
+				/* set the address */
+				memcpy(&(uds_fd_table[minor].addr), &addr,
+					sizeof(struct sockaddr_un));
+
+				break;
+			}
+		}
+	}
+
+	if (uds_fd_table[minor].peer == -1) {
+		/* could not find another open socket listening on the
+		 * specified address with room in the backlog
+		 */
+		return ECONNREFUSED;
+	}
+
+#if DEBUG == 1
+	printf("(uds) [%d] {do_connect} suspend\n", minor);
+#endif
+
+	/* suspend until the server side completes the connection with accept()
+	 */
+
+	uds_fd_table[minor].suspended = UDS_SUSPENDED_CONNECT;
+
+	return SUSPEND;
+}
+
+PUBLIC int do_listen(message *dev_m_in, message *dev_m_out)
+{
+	int minor;
+	int rc;
+	int backlog_size;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] do_listen() call_count=%d\n", uds_minor(dev_m_in),
+							++call_count);
+#endif
+
+	minor = uds_minor(dev_m_in);
+
+	/* ensure the socket has a type and is bound */
+	if (uds_fd_table[minor].type == -1 ||
+		uds_fd_table[minor].addr.sun_family != AF_UNIX) {
+
+		/* probably trying to call listen() before bind() */
+		return EINVAL;
+	}
+
+	/* the two supported types for listen(2) are SOCK_STREAM and
+	 * SOCK_SEQPACKET
+	 */
+	if (uds_fd_table[minor].type != SOCK_STREAM &&
+			uds_fd_table[minor].type != SOCK_SEQPACKET) {
+
+		/* probably trying to call listen() with a SOCK_DGRAM */
+		return EOPNOTSUPP;
+	}
+
+	/* The POSIX standard doesn't say what to do if listen() has
+	 * already been called. Well, there isn't an errno. we silently
+	 * let it happen, but if listen() has already been called, we
+	 * don't allow the backlog to shrink
+	 */
+	rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+		(vir_bytes) 0, (vir_bytes) &backlog_size, sizeof(int), D);
+
+	if (rc != OK) {
+		return EIO;
+	}
+
+	if (uds_fd_table[minor].listening == 0) {
+
+		/* See if backlog_size is between 0 and UDS_SOMAXCONN */
+		if (backlog_size >= 0 || backlog_size < UDS_SOMAXCONN) {
+
+			/* use the user provided backlog_size */
+			uds_fd_table[minor].backlog_size = backlog_size;
+
+		} else {
+
+			/* the user gave an invalid size, use
+			 * UDS_SOMAXCONN instead
+			 */
+			uds_fd_table[minor].backlog_size = UDS_SOMAXCONN;
+		}
+	} else {
+
+		/* See if the user is trying to expand the backlog_size */
+		if (backlog_size > uds_fd_table[minor].backlog_size &&
+			backlog_size < UDS_SOMAXCONN) {
+
+			/* expand backlog_size */
+			uds_fd_table[minor].backlog_size = backlog_size;
+		}
+
+		/* Don't let the user shrink the backlog_size (we might
+		 * have clients waiting in those slots
+		 */
+	}
+
+	/* perform listen(2) */
+	uds_fd_table[minor].listening = 1;
+
+	return OK;
+}
+
+PUBLIC int do_socket(message *dev_m_in, message *dev_m_out)
+{
+	int rc;
+	int minor;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] do_socket() call_count=%d\n", uds_minor(dev_m_in),
+							++call_count);
+#endif
+
+	minor = uds_minor(dev_m_in);
+
+	/* see if this socket already has a type */
+	if (uds_fd_table[minor].type != -1) {
+		/* socket type can only be set once */
+		return EINVAL;
+	}
+
+	/* get the requested type */
+	rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+		(vir_bytes) 0, (vir_bytes) &(uds_fd_table[minor].type),
+		sizeof(int), D);
+
+	if (rc != OK) {
+
+		/* something went wrong and we couldn't get the type */
+		return EIO;
+	}
+
+	/* validate the type */
+	switch (uds_fd_table[minor].type) {
+		case SOCK_STREAM:
+		case SOCK_DGRAM:
+		case SOCK_SEQPACKET:
+
+			/* the type is one of the 3 valid socket types */
+			return OK;
+
+		default:
+
+			/* if the type isn't one of the 3 valid socket
+			 * types, then it must be invalid.
+			 */
+
+			/* set the type back to '-1' (no type set) */
+			uds_fd_table[minor].type = -1;
+
+			return EINVAL;
+	}
+}
+
+PUBLIC int do_bind(message *dev_m_in, message *dev_m_out)
+{
+	int minor;
+	struct sockaddr_un addr;
+	int rc, i;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] do_bind() call_count=%d\n", uds_minor(dev_m_in),
+							++call_count);
+#endif
+
+	minor = uds_minor(dev_m_in);
+
+	if ((uds_fd_table[minor].type == -1) ||
+		(uds_fd_table[minor].addr.sun_family == AF_UNIX &&
+		uds_fd_table[minor].type != SOCK_DGRAM)) {
+
+		/* the type hasn't been set by do_socket() yet OR attempting
+		 * to re-bind() a non-SOCK_DGRAM socket
+		 */
+		return EINVAL;
+	}
+
+	rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+		(vir_bytes) 0, (vir_bytes) &addr, sizeof(struct sockaddr_un),
+		D);
+
+	if (rc != OK) {
+		return EIO;
+	}
+
+	/* do some basic sanity checks on the address */
+	if (addr.sun_family != AF_UNIX) {
+
+		/* bad family */
+		return EAFNOSUPPORT;
+	}
+
+	if (addr.sun_path[0] == '\0') {
+
+		/* bad address */
+		return ENOENT;
+	}
+
+	rc = check_perms(minor, &addr);
+	if (rc != OK) {
+		/* permission denied, socket file doesn't exist, etc. */
+		return rc;
+	}
+
+	/* make sure the address isn't already in use by another socket. */
+	for (i = 0; i < NR_FDS; i++) {
+		if ((uds_fd_table[i].addr.sun_family == AF_UNIX) &&
+			!strncmp(addr.sun_path,
+			uds_fd_table[i].addr.sun_path, UNIX_PATH_MAX)) {
+
+			/* another socket is bound to this sun_path */
+			return EADDRINUSE;
+		}
+	}
+
+	/* looks good, perform the bind() */
+	memcpy(&(uds_fd_table[minor].addr), &addr, sizeof(struct sockaddr_un));
+
+	return OK;
+}
+
+PUBLIC int do_getsockname(message *dev_m_in, message *dev_m_out)
+{
+	int minor;
+	int rc;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] do_getsockname() call_count=%d\n",
+					uds_minor(dev_m_in), ++call_count);
+#endif
+
+	minor = uds_minor(dev_m_in);
+
+	/* Unconditionally send the address we have assigned to this socket.
+	 * The POSIX standard doesn't say what to do if the address
+	 * hasn't been set. If the address isn't currently set, then
+	 * the user will get NULL bytes. Note: libc depends on this
+	 * behavior.
+	 */
+	rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+		(vir_bytes) 0, (vir_bytes) &(uds_fd_table[minor].addr),
+		sizeof(struct sockaddr_un), D);
+
+	return rc ? EIO : OK;
+}
+
+PUBLIC int do_getpeername(message *dev_m_in, message *dev_m_out)
+{
+	int minor;
+	int rc;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] do_getpeername() call_count=%d\n",
+				uds_minor(dev_m_in), ++call_count);
+#endif
+
+	minor = uds_minor(dev_m_in);
+
+	/* check that the socket is connected with a valid peer */
+	if (uds_fd_table[minor].peer != -1) {
+		int peer_minor;
+
+		peer_minor = uds_fd_table[minor].peer;
+
+		/* copy the address from the peer */
+		rc = sys_safecopyto(VFS_PROC_NR,
+			(cp_grant_id_t) dev_m_in->IO_GRANT, (vir_bytes) 0,
+			(vir_bytes) &(uds_fd_table[peer_minor].addr),
+			sizeof(struct sockaddr_un), D);
+
+		return rc ? EIO : OK;
+	} else {
+		if (uds_fd_table[minor].err == ECONNRESET) {
+			uds_fd_table[minor].err = 0;
+
+			return ECONNRESET;
+		} else {
+			return ENOTCONN;
+		}
+	}
+}
+
+PUBLIC int do_shutdown(message *dev_m_in, message *dev_m_out)
+{
+	int minor;
+	int rc, how;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] do_shutdown() call_count=%d\n",
+					uds_minor(dev_m_in), ++call_count);
+#endif
+
+	minor = uds_minor(dev_m_in);
+
+	if (uds_fd_table[minor].type != SOCK_STREAM &&
+			uds_fd_table[minor].type != SOCK_SEQPACKET) {
+
+		/* socket must be a connection oriented socket */
+		return EINVAL;
+	}
+
+	if (uds_fd_table[minor].peer == -1) {
+		/* shutdown(2) is only valid for connected sockets */
+		if (uds_fd_table[minor].err == ECONNRESET) {
+			return ECONNRESET;
+		} else {
+			return ENOTCONN;
+		}
+	}
+
+	/* get the 'how' parameter from the process */
+	rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+			(vir_bytes) 0, (vir_bytes) &how, sizeof(int), D);
+
+	if (rc != OK) {
+		return EIO;
+	}
+
+	switch (how) {
+		case SHUT_RD:
+			/* take away read permission */
+			uds_fd_table[minor].mode =
+				uds_fd_table[minor].mode ^ S_IRUSR;
+			break;
+
+		case SHUT_WR:
+			/* take away write permission */
+			uds_fd_table[minor].mode =
+				uds_fd_table[minor].mode ^ S_IWUSR;
+			break;
+
+		case SHUT_RDWR:
+			/* completely shutdown */
+			uds_fd_table[minor].mode = 0;
+			break;
+
+		default:
+			/* the 'how' parameter is invalid */
+			return EINVAL;
+	}
+
+	return OK;
+}
+
+PUBLIC int do_socketpair(message *dev_m_in, message *dev_m_out)
+{
+	int rc;
+	dev_t minorin;
+	int minorx, minory;
+	struct sockaddr_un addr;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] do_socketpair() call_count=%d\n",
+				uds_minor(dev_m_in), ++call_count);
+#endif
+
+	/* first ioctl param is the first socket */
+	minorx = uds_minor(dev_m_in);
+
+	/* third ioctl param is the minor number of the second socket */
+	rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+			(vir_bytes) 0, (vir_bytes) &minorin, sizeof(dev_t), D);
+
+	if (rc != OK) {
+		return EIO;
+	}
+
+	minory = (minor(minorin) & BYTE);
+
+#if DEBUG == 1
+	printf("socketpair() %d - %d\n", minorx, minory);
+#endif
+
+	/* security check - both sockets must have the same endpoint (owner) */
+	if (uds_fd_table[minorx].owner != uds_fd_table[minory].owner) {
+
+		/* we won't allow you to magically connect your socket to
+		 * someone elses socket
+		 */
+		return EPERM;
+	}
+
+	addr.sun_family = AF_UNIX;
+	addr.sun_path[0] = 'X';
+	addr.sun_path[1] = '\0';
+
+	uds_fd_table[minorx].syscall_done = 1;
+	return perform_connection(dev_m_in, dev_m_out, &addr, minorx, minory);
+}
+
+PUBLIC int do_getsockopt_sotype(message *dev_m_in, message *dev_m_out)
+{
+	int minor;
+	int rc;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] do_getsockopt_sotype() call_count=%d\n",
+				uds_minor(dev_m_in), ++call_count);
+#endif
+
+	minor = uds_minor(dev_m_in);
+
+	if (uds_fd_table[minor].type == -1) {
+
+		/* the type hasn't been set yet. instead of returning an
+		 * invalid type, we fail with EINVAL
+		 */
+		return EINVAL;
+	}
+
+	rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+		(vir_bytes) 0, (vir_bytes) &(uds_fd_table[minor].type),
+		sizeof(int), D);
+
+	return rc ? EIO : OK;
+}
+
+PUBLIC int do_getsockopt_peercred(message *dev_m_in, message *dev_m_out)
+{
+	int minor;
+	int peer_minor;
+	int rc;
+	struct ucred cred;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] do_getsockopt_peercred() call_count=%d\n",
+					uds_minor(dev_m_in), ++call_count);
+#endif
+
+	minor = uds_minor(dev_m_in);
+
+	if (uds_fd_table[minor].peer == -1) {
+
+		if (uds_fd_table[minor].err == ECONNRESET) {
+			uds_fd_table[minor].err = 0;
+
+			return ECONNRESET;
+		} else {
+			return ENOTCONN;
+		}
+	}
+
+	peer_minor = uds_fd_table[minor].peer;
+
+	/* obtain the peer's credentials */
+	rc = getnucred(uds_fd_table[peer_minor].owner, &cred);
+	if (rc == -1) {
+		/* likely error: invalid endpoint / proc doesn't exist */
+		return errno;
+	}
+
+	rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+		(vir_bytes) 0, (vir_bytes) &cred, sizeof(struct ucred), D);
+
+	return rc ? EIO : OK;
+}
+
+int do_getsockopt_sndbuf(message *dev_m_in, message *dev_m_out)
+{
+	int minor;
+	int rc;
+	size_t sndbuf = PIPE_BUF;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] do_getsockopt_sndbuf() call_count=%d\n",
+				uds_minor(dev_m_in), ++call_count);
+#endif
+
+	minor = uds_minor(dev_m_in);
+
+	rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+		(vir_bytes) 0, (vir_bytes) &(sndbuf),
+		sizeof(size_t), D);
+
+	return rc ? EIO : OK;
+}
+
+int do_setsockopt_sndbuf(message *dev_m_in, message *dev_m_out)
+{
+	int minor;
+	int rc;
+	size_t sndbuf;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] do_setsockopt_rcvbuf() call_count=%d\n",
+				uds_minor(dev_m_in), ++call_count);
+#endif
+
+	minor = uds_minor(dev_m_in);
+
+
+	rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+				(vir_bytes) 0, (vir_bytes) &sndbuf,
+				sizeof(size_t), D);
+
+	if (rc != OK) {
+		return EIO;
+	}
+
+	if (sndbuf > PIPE_BUF) {
+		/* The send buffer is limited to 32K at the moment. */
+		return ENOSYS;
+	}
+
+	/* There is no way to reduce the send buffer, do we have to
+	 * let this call fail for smaller buffers?
+	 */
+	return OK;
+}
+
+int do_getsockopt_rcvbuf(message *dev_m_in, message *dev_m_out)
+{
+	int minor;
+	int rc;
+	size_t rcvbuf = PIPE_BUF;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] do_getsockopt_rcvbuf() call_count=%d\n",
+				uds_minor(dev_m_in), ++call_count);
+#endif
+
+	minor = uds_minor(dev_m_in);
+
+	rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+		(vir_bytes) 0, (vir_bytes) &(rcvbuf),
+		sizeof(size_t), D);
+
+	return rc ? EIO : OK;
+}
+
+int do_setsockopt_rcvbuf(message *dev_m_in, message *dev_m_out)
+{
+	int minor;
+	int rc;
+	size_t rcvbuf;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] do_setsockopt_rcvbuf() call_count=%d\n",
+				uds_minor(dev_m_in), ++call_count);
+#endif
+
+	minor = uds_minor(dev_m_in);
+
+
+	rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+				(vir_bytes) 0, (vir_bytes) &rcvbuf,
+				sizeof(size_t), D);
+
+	if (rc != OK) {
+		return EIO;
+	}
+
+	if (rcvbuf > PIPE_BUF) {
+		/* The send buffer is limited to 32K at the moment. */
+		return ENOSYS;
+	}
+
+	/* There is no way to reduce the send buffer, do we have to
+	 * let this call fail for smaller buffers?
+	 */
+	return OK;
+}
+
+
+PUBLIC int do_sendto(message *dev_m_in, message *dev_m_out)
+{
+	int minor;
+	int rc;
+	struct sockaddr_un addr;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] do_sendto() call_count=%d\n", uds_minor(dev_m_in),
+							++call_count);
+#endif
+
+	minor = uds_minor(dev_m_in);
+
+	if (uds_fd_table[minor].type != SOCK_DGRAM) {
+		/* This IOCTL is only for SOCK_DGRAM sockets */
+		return EINVAL;
+	}
+
+	rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+		(vir_bytes) 0, (vir_bytes) &addr, sizeof(struct sockaddr_un),
+		D);
+
+	if (rc != OK) {
+		return EIO;
+	}
+
+	/* do some basic sanity checks on the address */
+	if (addr.sun_family != AF_UNIX || addr.sun_path[0] == '\0') {
+		/* bad address */
+		return EINVAL;
+	}
+
+	rc = check_perms(minor, &addr);
+	if (rc != OK) {
+		return rc;
+	}
+
+	memcpy(&(uds_fd_table[minor].target), &addr,
+					sizeof(struct sockaddr_un));
+
+	return OK;
+}
+
+PUBLIC int do_recvfrom(message *dev_m_in, message *dev_m_out)
+{
+	int minor;
+	int rc;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] do_recvfrom() call_count=%d\n",
+					uds_minor(dev_m_in), ++call_count);
+#endif
+
+	minor = uds_minor(dev_m_in);
+
+	rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+		(vir_bytes) 0, (vir_bytes) &(uds_fd_table[minor].source),
+		sizeof(struct sockaddr_un), D);
+
+	return rc ? EIO : OK;
+}
+
+int msg_control_read(struct msg_control *msg_ctrl, struct ancillary *data,
+							int minor)
+{
+	int rc;
+	struct msghdr msghdr;
+	struct cmsghdr *cmsg = NULL;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] msg_control_read() call_count=%d\n", minor,
+							++call_count);
+#endif
+
+	data->nfiledes = 0;
+
+	memset(&msghdr, '\0', sizeof(struct msghdr));
+	msghdr.msg_control = msg_ctrl->msg_control;
+	msghdr.msg_controllen = msg_ctrl->msg_controllen;
+
+	for(cmsg = CMSG_FIRSTHDR(&msghdr); cmsg != NULL;
+					cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
+
+		if (cmsg->cmsg_level == SOL_SOCKET &&
+					cmsg->cmsg_type == SCM_RIGHTS) {
+
+			int i;
+			int nfds =
+				MIN((cmsg->cmsg_len-CMSG_LEN(0))/sizeof(int),
+								OPEN_MAX);
+
+			for (i = 0; i < nfds; i++) {
+				if (data->nfiledes == OPEN_MAX) {
+					return EOVERFLOW;
+				}
+
+				data->fds[data->nfiledes] =
+					((int *) CMSG_DATA(cmsg))[i];
+#if DEBUG == 1
+				printf("(uds) [%d] fd[%d]=%d\n", minor,
+				data->nfiledes, data->fds[data->nfiledes]);
+#endif
+				data->nfiledes++;
+			}
+		}
+	}
+
+	/* obtain this socket's credentials */
+	rc = getnucred(uds_fd_table[minor].owner, &(data->cred));
+	if (rc == -1) {
+		return errno;
+	}
+#if DEBUG == 1
+	printf("(uds) [%d] cred={%d,%d,%d}\n", minor,
+		data->cred.pid, data->cred.uid,
+		data->cred.gid);
+#endif
+	return OK;
+}
+
+PRIVATE int send_fds(int minor, struct ancillary *data)
+{
+	int rc, i, j;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] send_fds() call_count=%d\n", minor, ++call_count);
+#endif
+
+	/* verify the file descriptors and get their filps. */
+	for (i = 0; i < data->nfiledes; i++) {
+		data->filps[i] = verify_fd(uds_fd_table[minor].owner,
+						data->fds[i]);
+
+		if (data->filps[i] == NULL) {
+			return EINVAL;
+		}
+	}
+
+	/* set them as in-flight */
+	for (i = 0; i < data->nfiledes; i++) {
+		rc = set_filp(data->filps[i]);
+		if (rc != OK) {
+			/* revert set_filp() calls */
+			for (j = i; j >= 0; j--) {
+				put_filp(data->filps[j]);
+			}
+			return rc;
+		}
+	}
+
+	return OK;
+}
+
+PUBLIC int clear_fds(int minor, struct ancillary *data)
+{
+/* This function calls put_filp() for all of the FDs in data.
+ * This is used when a Unix Domain Socket is closed and there
+ * exists references to file descriptors that haven't been received
+ * with recvmsg().
+ */
+	int i;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] recv_fds() call_count=%d\n", minor,
+							++call_count);
+#endif
+
+	for (i = 0; i < data->nfiledes; i++) {
+		put_filp(data->filps[i]);
+#if DEBUG == 1
+		printf("(uds) clear_fds() => %d\n", data->fds[i]);
+#endif
+		data->fds[i] = -1;
+		data->filps[i] = NULL;
+	}
+
+	data->nfiledes = 0;
+
+	return OK;
+}
+
+PRIVATE int recv_fds(int minor, struct ancillary *data,
+					struct msg_control *msg_ctrl)
+{
+	int rc, i, j;
+	struct msghdr msghdr;
+	struct cmsghdr *cmsg;
+	endpoint_t to_ep;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] recv_fds() call_count=%d\n", minor,
+							++call_count);
+#endif
+
+	msghdr.msg_control = msg_ctrl->msg_control;
+	msghdr.msg_controllen = msg_ctrl->msg_controllen;
+
+	cmsg = CMSG_FIRSTHDR(&msghdr);
+	cmsg->cmsg_len = CMSG_LEN(sizeof(int) * data->nfiledes);
+	cmsg->cmsg_level = SOL_SOCKET;
+	cmsg->cmsg_type = SCM_RIGHTS;
+
+	to_ep = uds_fd_table[minor].owner;
+
+	/* copy to the target endpoint */
+	for (i = 0; i < data->nfiledes; i++) {
+		rc = copy_filp(to_ep, data->filps[i]);
+		if (rc < 0) {
+			/* revert set_filp() calls */
+			for (j = 0; j < data->nfiledes; j++) {
+				put_filp(data->filps[j]);
+			}
+			/* revert copy_filp() calls */
+			for (j = i; j >= 0; j--) {
+				cancel_fd(to_ep, data->fds[j]);
+			}
+			return rc;
+		}
+		data->fds[i] = rc; /* data->fds[i] now has the new FD */
+	}
+
+	for (i = 0; i < data->nfiledes; i++) {
+		put_filp(data->filps[i]);
+#if DEBUG == 1
+		printf("(uds) recv_fds() => %d\n", data->fds[i]);
+#endif
+		((int *)CMSG_DATA(cmsg))[i] = data->fds[i];
+		data->fds[i] = -1;
+		data->filps[i] = NULL;
+	}
+
+	data->nfiledes = 0;
+
+	return OK;
+}
+
+PRIVATE int recv_cred(int minor, struct ancillary *data,
+					struct msg_control *msg_ctrl)
+{
+	struct msghdr msghdr;
+	struct cmsghdr *cmsg;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] recv_cred() call_count=%d\n", minor,
+							++call_count);
+#endif
+
+	msghdr.msg_control = msg_ctrl->msg_control;
+	msghdr.msg_controllen = msg_ctrl->msg_controllen;
+
+	cmsg = CMSG_FIRSTHDR(&msghdr);
+	if (cmsg->cmsg_len > 0) {
+		cmsg = CMSG_NXTHDR(&msghdr, cmsg);
+	}
+
+	cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
+	cmsg->cmsg_level = SOL_SOCKET;
+	cmsg->cmsg_type = SCM_CREDENTIALS;
+	memcpy(CMSG_DATA(cmsg), &(data->cred), sizeof(struct ucred));
+
+	return OK;
+}
+
+PUBLIC int do_sendmsg(message *dev_m_in, message *dev_m_out)
+{
+	int minor, peer, rc, i;
+	struct msg_control msg_ctrl;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] do_sendmsg() call_count=%d\n",
+					uds_minor(dev_m_in), ++call_count);
+#endif
+
+	minor = uds_minor(dev_m_in);
+
+	memset(&msg_ctrl, '\0', sizeof(struct msg_control));
+
+	rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+					(vir_bytes) 0, (vir_bytes) &msg_ctrl,
+					sizeof(struct msg_control), D);
+
+	if (rc != OK) {
+		return EIO;
+	}
+
+	/* locate peer */
+	peer = -1;
+	if (uds_fd_table[minor].type == SOCK_DGRAM) {
+		if (uds_fd_table[minor].target.sun_path[0] == '\0' ||
+			uds_fd_table[minor].target.sun_family != AF_UNIX) {
+
+			return EDESTADDRREQ;
+		}
+
+		for (i = 0; i < NR_FDS; i++) {
+
+			/* look for a SOCK_DGRAM socket that is bound on
+			 * the target address
+			 */
+			if (uds_fd_table[i].type == SOCK_DGRAM &&
+				uds_fd_table[i].addr.sun_family == AF_UNIX &&
+				!strncmp(uds_fd_table[minor].target.sun_path,
+				uds_fd_table[i].addr.sun_path, UNIX_PATH_MAX)){
+
+				peer = i;
+				break;
+			}
+		}
+
+		if (peer == -1) {
+			return ENOENT;
+		}
+	} else {
+		peer = uds_fd_table[minor].peer;
+		if (peer == -1) {
+			return ENOTCONN;
+		}
+	}
+
+#if DEBUG == 1
+	printf("(uds) [%d] sendmsg() -- peer=%d\n", minor, peer);
+#endif
+	/* note: it's possible that there is already some file
+	 * descriptors in ancillary_data if the peer didn't call
+	 * recvmsg() yet. That's okay. The receiver will
+	 * get the current file descriptors plus the new ones.
+	 */
+	rc = msg_control_read(&msg_ctrl, &uds_fd_table[peer].ancillary_data,
+								minor);
+	if (rc != OK) {
+		return rc;
+	}
+
+	return send_fds(minor, &uds_fd_table[peer].ancillary_data);
+}
+
+PUBLIC int do_recvmsg(message *dev_m_in, message *dev_m_out)
+{
+	int minor;
+	int rc;
+	struct msg_control msg_ctrl;
+	socklen_t controllen_avail = 0;
+	socklen_t controllen_needed = 0;
+	socklen_t controllen_desired = 0;
+
+#if DEBUG == 1
+	static int call_count = 0;
+	printf("(uds) [%d] do_sendmsg() call_count=%d\n",
+					uds_minor(dev_m_in), ++call_count);
+#endif
+
+	minor = uds_minor(dev_m_in);
+
+
+#if DEBUG == 1
+	printf("(uds) [%d] CREDENTIALS {pid:%d,uid:%d,gid:%d}\n", minor,
+				uds_fd_table[minor].ancillary_data.cred.pid,
+				uds_fd_table[minor].ancillary_data.cred.uid,
+				uds_fd_table[minor].ancillary_data.cred.gid);
+#endif
+
+	memset(&msg_ctrl, '\0', sizeof(struct msg_control));
+
+	/* get the msg_control from the user, it will include the
+	 * amount of space the user has allocated for control data.
+	 */
+	rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+					(vir_bytes) 0, (vir_bytes) &msg_ctrl,
+					sizeof(struct msg_control), D);
+
+	if (rc != OK) {
+		return EIO;
+	}
+
+	controllen_avail = MIN(msg_ctrl.msg_controllen, MSG_CONTROL_MAX);
+
+	if (uds_fd_table[minor].ancillary_data.nfiledes > 0) {
+		controllen_needed = CMSG_LEN(sizeof(int) *
+				(uds_fd_table[minor].ancillary_data.nfiledes));
+	}
+
+	/* if there is room we also include credentials */
+	controllen_desired = controllen_needed +
+				CMSG_LEN(sizeof(struct ucred));
+
+	if (controllen_needed > controllen_avail) {
+		return EOVERFLOW;
+	}
+
+	rc = recv_fds(minor, &uds_fd_table[minor].ancillary_data, &msg_ctrl);
+	if (rc != OK) {
+		return rc;
+	}
+
+	if (controllen_desired <= controllen_avail) {
+		rc = recv_cred(minor, &uds_fd_table[minor].ancillary_data,
+								&msg_ctrl);
+		if (rc != OK) {
+			return rc;
+		}
+	}
+
+	/* send the user the control data */
+	rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
+		(vir_bytes) 0, (vir_bytes) &msg_ctrl,
+		sizeof(struct msg_control), D);
+
+	return rc ? EIO : OK;
+}
diff --git a/servers/apfs/uds.h b/servers/apfs/uds.h
new file mode 100644
index 000000000..2c3d85520
--- /dev/null
+++ b/servers/apfs/uds.h
@@ -0,0 +1,250 @@
+#ifndef __PFS_UDS_H__
+#define __PFS_UDS_H__
+
+/*
+ * Unix Domain Sockets Implementation (PF_UNIX, PF_LOCAL)
+ *
+ * Also See...
+ *
+ *   dev_uds.c, table.c, uds.c
+ */
+
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/ucred.h>
+#include <sys/un.h>
+
+#include <minix/endpoint.h>
+
+/* max connection backlog for incoming connections */
+#define UDS_SOMAXCONN 64
+
+typedef void* filp_id_t;
+
+/* ancillary data to be sent */
+struct ancillary {
+	filp_id_t filps[OPEN_MAX];
+	int fds[OPEN_MAX];
+	int nfiledes;
+	struct ucred cred;
+};
+
+/*
+ * Internal State Information for a socket descriptor.
+ */
+struct uds_fd {
+
+/* Flags */
+
+	enum UDS_STATE {
+		/* This file descriptor is UDS_FREE and can be allocated. */
+		UDS_FREE  = 0,
+
+		/* OR it is UDS_INUSE and can't be allocated. */
+		UDS_INUSE = 1
+
+	/* state is set to UDS_INUSE in uds_open(). state is Set to
+	 * UDS_FREE in uds_init() and uds_close(). state should be
+	 * checked prior to all operations.
+	 */
+	} state;
+
+/* Owner Info */
+
+	/* Socket Owner */
+	endpoint_t owner;
+
+	/* endpoint for suspend/resume */
+	endpoint_t endpoint;
+
+/* Pipe Housekeeping */
+
+	/* inode number on PFS -- each descriptor is backed by 1
+	 * PIPE which is allocated in uds_open() and freed in
+	 * uds_close(). Data is sent/written to a peer's PIPE.
+	 * Data is recv/read from this PIPE.
+	 */
+	ino_t inode_nr;
+
+
+	/* position in the PIPE where the data starts */
+	off_t pos;
+
+	/* size of data in the PIPE */
+	size_t size;
+
+	/* control read/write, set by uds_open() and shutdown(2).
+	 * Can be set to S_IRUSR|S_IWUSR, S_IRUSR, S_IWUSR, or 0
+	 * for read and write, read only, write only, or neither.
+	 * default is S_IRUSR|S_IWUSR.
+	 */
+	mode_t mode;
+
+/* Socket Info */
+
+
+	/* socket type - SOCK_STREAM, SOCK_DGRAM, or SOCK_SEQPACKET
+	 * Set by uds_ioctl(NWIOSUDSTYPE). It defaults to -1 in
+	 * uds_open(). Any action on a socket with type -1 besides
+	 * uds_ioctl(NWIOSUDSTYPE) and uds_close() will result in
+	 * an error.
+	 */
+	int type;
+
+	/* queue of pending connections for server sockets.
+	 * connect(2) inserts and accept(2) removes from the queue
+	 */
+	int backlog[UDS_SOMAXCONN];
+
+	/* requested connection backlog size. Set by listen(2)
+	 * Bounds (0 <= backlog_size <= UDS_SOMAXCONN)
+	 * Defaults to UDS_SOMAXCONN which is defined above.
+	 */
+	unsigned char backlog_size;
+
+	/* index of peer in uds_fd_table for connected sockets.
+	 * -1 is used to mean no peer. Assumptions: peer != -1 means
+	 * connected.
+	 */
+	int peer;
+
+	/* index of child (client sd returned by accept(2))
+	 * -1 is used to mean no child.
+	 */
+	int child;
+
+	/* address -- the address the socket is bound to.
+	 * Assumptions: addr.sun_family == AF_UNIX means its bound.
+	 */
+	struct sockaddr_un addr;
+
+	/* target -- where DGRAMs are sent to on the next uds_write(). */
+	struct sockaddr_un target;
+
+	/* source -- address where DGRAMs are from. used to fill in the
+	 * from address in recvfrom(2) and recvmsg(2).
+	 */
+	struct sockaddr_un source;
+
+	/* Flag (1 or 0) - listening for incoming connections.
+	 * Default to 0. Set to 1 by do_listen()
+	 */
+	int listening;
+
+	/* stores file pointers and credentials being sent between
+	 * processes with sendmsg(2) and recvmsg(2).
+	 */
+	struct ancillary ancillary_data;
+
+	/* Holds an errno. This is set when a connected socket is
+	 * closed and we need to pass ECONNRESET on to a suspended
+	 * peer.
+	 */
+	int err;
+
+/* Suspend/Revive Housekeeping */
+
+
+	/* SUSPEND State Flags */
+	enum UDS_SUSPENDED {
+
+		/* Socket isn't blocked. */
+		UDS_NOT_SUSPENDED     = 0,
+
+		/* Socket is blocked on read(2) waiting for data to read. */
+		UDS_SUSPENDED_READ    = 1,
+
+		/* Socket is blocked on write(2) for space to write data. */
+		UDS_SUSPENDED_WRITE   = 2,
+
+		/* Socket is blocked on connect(2) waiting for the server. */
+		UDS_SUSPENDED_CONNECT = 4,
+
+		/* Socket is blocked on accept(2) waiting for clients. */
+		UDS_SUSPENDED_ACCEPT  = 8
+	} suspended;
+
+	/* Flag (1 or 0) - thing socket was waiting for is ready.
+	 * If 1, then uds_status() will attempt the operation that
+	 * the socket was blocked on.
+	 */
+	int ready_to_revive;
+
+	/* i/o grant, saved for later use by suspended procs */
+	cp_grant_id_t io_gr;
+
+	/* is of i/o grant, saved for later use by suspended procs */
+	size_t io_gr_size;
+
+	/* Save the call number so that uds_cancel() can unwind the
+	 * call properly.
+	 */
+	int call_nr;
+
+	/* Save the IOCTL so uds_cancel() knows what got cancelled. */
+	int ioctl;
+
+	/* Flag (1 or 0) - the system call completed.
+	 * A doc I read said DEV_CANCEL might be called even though
+	 * the operation is finished. We use this variable to
+	 * determine if we should rollback the changes or not.
+	 */
+	int syscall_done;
+
+/* select() */
+
+	/* Flag (1 or 0) - the process blocked on select(2). When
+	 * selecting is 1 and I/O happens on this socket, then
+	 * select_proc should be notified.
+	 */
+	int selecting;
+
+	/* when a select is in progress, we notify() this endpoint
+	 * of new data.
+	 */
+	endpoint_t select_proc;
+
+	/* Options (SEL_RD, SEL_WR, SEL_ERR) that are requested. */
+	int sel_ops_in;
+
+	/* Options that are available for this socket. */
+	int sel_ops_out;
+
+	/* Flag (1 or 0) to be set to one before calling notify().
+	 * uds_status() will use the flag to locate this descriptor.
+	 */
+	int status_updated;
+};
+
+typedef struct uds_fd uds_fd_t;
+
+/* File Descriptor Table -- Defined in uds.c */
+EXTERN uds_fd_t uds_fd_table[NR_FDS];
+
+/*
+ * Take message m and get the index in uds_fd_table.
+ */
+#define uds_minor(m)	(minor((dev_t) m->DEVICE) & BYTE)
+
+/*
+ * Fill in a reply message.
+ */
+#define uds_set_reply(msg,type,endpoint,io_gr,status)	\
+	do {						\
+		(msg)->m_type = type;			\
+		(msg)->REP_ENDPT = endpoint;		\
+		(msg)->REP_IO_GRANT = io_gr;		\
+		(msg)->REP_STATUS = status;		\
+	} while (0)
+
+#define uds_sel_reply(msg,type,minor,ops)		\
+	do {						\
+		(msg)->m_type = type;			\
+		(msg)->DEV_MINOR = minor;			\
+		(msg)->DEV_SEL_OPS = ops;			\
+	} while (0)
+
+
+
+
+#endif
diff --git a/servers/apfs/utility.c b/servers/apfs/utility.c
new file mode 100644
index 000000000..fac9ec625
--- /dev/null
+++ b/servers/apfs/utility.c
@@ -0,0 +1,33 @@
+#include "fs.h"
+
+
+/*===========================================================================*
+ *				no_sys					     *
+ *===========================================================================*/
+PUBLIC int no_sys(message *pfs_m_in, message *pfs_m_out)
+{
+/* Somebody has used an illegal system call number */
+  printf("no_sys: invalid call 0x%x to pfs\n", req_nr);
+  return(EINVAL);
+}
+
+
+/*===========================================================================*
+ *				clock_time				     *
+ *===========================================================================*/
+PUBLIC time_t clock_time()
+{
+/* This routine returns the time in seconds since 1.1.1970.  MINIX is an
+ * astrophysically naive system that assumes the earth rotates at a constant
+ * rate and that such things as leap seconds do not exist.
+ */
+
+  int r;
+  clock_t uptime;	/* Uptime in ticks */
+  time_t boottime;
+
+  if ((r = getuptime2(&uptime, &boottime)) != OK)
+		panic("clock_time: getuptme2 failed: %d", r);
+
+  return( (time_t) (boottime + (uptime/sys_hz())));
+}
diff --git a/servers/avfs/Makefile b/servers/avfs/Makefile
new file mode 100644
index 000000000..1fc72128e
--- /dev/null
+++ b/servers/avfs/Makefile
@@ -0,0 +1,25 @@
+# Makefile for Virtual File System (VFS)
+.include <bsd.own.mk>
+
+PROG=	vfs
+SRCS=	main.c open.c read.c write.c pipe.c dmap.c \
+	path.c device.c mount.c link.c exec.c \
+	filedes.c stadir.c protect.c time.c \
+	lock.c misc.c utility.c select.c table.c \
+	vnode.c vmnt.c request.c fscall.c \
+	tll.c comm.c worker.c
+
+.if ${MKCOVERAGE} != "no"
+SRCS+=  gcov.c
+CPPFLAGS+= -DUSE_COVERAGE
+.endif
+
+DPADD+=	${LIBSYS} ${LIBTIMERS} ${LIBEXEC}
+LDADD+=	-lsys -ltimers -lexec -lmthread
+
+MAN=
+
+BINDIR?= /usr/sbin
+INSTALLFLAGS+=	-S 16k
+
+.include <minix.bootprog.mk>
diff --git a/servers/avfs/comm.c b/servers/avfs/comm.c
new file mode 100644
index 000000000..b7254463a
--- /dev/null
+++ b/servers/avfs/comm.c
@@ -0,0 +1,163 @@
+#include "fs.h"
+#include "glo.h"
+#include "vmnt.h"
+#include "fproc.h"
+#include <minix/vfsif.h>
+#include <assert.h>
+
+FORWARD _PROTOTYPE( int sendmsg, (struct vmnt *vmp, struct fproc *rfp)	);
+FORWARD _PROTOTYPE( int queuemsg, (struct vmnt *vmp)			);
+
+/*===========================================================================*
+ *				sendmsg					     *
+ *===========================================================================*/
+PRIVATE int sendmsg(vmp, rfp)
+struct vmnt *vmp;
+struct fproc *rfp;
+{
+/* This is the low level function that sends requests to FS processes.
+ */
+  int r, transid;
+
+  if (vmp->m_fs_e == rfp->fp_endpoint) return(EDEADLK);
+  vmp->m_comm.c_cur_reqs++;	/* One more request awaiting a reply */
+
+  transid = rfp->fp_wtid + VFS_TRANSID;
+  rfp->fp_sendrec->m_type = TRNS_ADD_ID(rfp->fp_sendrec->m_type, transid);
+  if ((r = asynsend3(vmp->m_fs_e, rfp->fp_sendrec, AMF_NOREPLY)) != OK) {
+	printf("VFS: sendmsg: error sending message. "
+	       "FS_e: %d req_nr: %d err: %d\n", vmp->m_fs_e,
+	       rfp->fp_sendrec->m_type, r);
+		util_stacktrace();
+	return(r);
+  }
+
+  return(r);
+}
+
+/*===========================================================================*
+ *				send_work				     *
+ *===========================================================================*/
+PUBLIC void send_work(void)
+{
+/* Try to send out as many requests as possible */
+  struct vmnt *vmp;
+
+  if (sending == 0) return;
+  for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; vmp++)
+	fs_sendmore(vmp);
+}
+
+/*===========================================================================*
+ *				fs_sendmore				     *
+ *===========================================================================*/
+PUBLIC void fs_sendmore(struct vmnt *vmp)
+{
+  struct worker_thread *worker;
+
+  /* Can we send more requests? */
+  if (vmp->m_fs_e == NONE) return;
+  if ((worker = vmp->m_comm.c_req_queue) == NULL) /* No process is queued */
+	return;
+  if (vmp->m_comm.c_cur_reqs >= vmp->m_comm.c_max_reqs)/*No room to send more*/
+	return;
+  if (vmp->m_flags & VMNT_BACKCALL)	/* Hold off for now */
+	return;
+
+  vmp->m_comm.c_req_queue = worker->w_next; /* Remove head */
+  worker->w_next = NULL;
+  sending--;
+  assert(sending >= 0);
+  sendmsg(vmp, worker->w_job.j_fp);
+}
+
+/*===========================================================================*
+ *				fs_sendrec				     *
+ *===========================================================================*/
+PUBLIC int fs_sendrec(endpoint_t fs_e, message *reqmp)
+{
+  struct vmnt *vmp;
+  int r;
+
+  if ((vmp = find_vmnt(fs_e)) == NULL)
+	panic("Trying to talk to non-existent FS");
+
+  if (!force_sync) {
+	fp->fp_sendrec = reqmp;	/* Where to store request and reply */
+
+	/* Find out whether we can send right away or have to enqueue */
+	if (	!(vmp->m_flags & VMNT_BACKCALL) &&
+		vmp->m_comm.c_cur_reqs < vmp->m_comm.c_max_reqs) {
+		/* There's still room to send more and no proc is queued */
+		r = sendmsg(vmp, fp);
+	} else {
+		r = queuemsg(vmp);
+	}
+	self->w_next = NULL;	/* End of list */
+
+	if (r != OK) return(r);
+
+	worker_wait();	/* Yield execution until we've received the reply. */
+  } else if (force_sync == 1) {
+	int r;
+	if (OK != (r = sendrec(fs_e, reqmp))) {
+		printf("VFS: sendrec failed: %d\n", r);
+		util_stacktrace();
+		return(r);
+	}
+  } else if (force_sync == 2) {
+	int r, status;
+	if (OK != (r = asynsend(fs_e, reqmp)) ||
+	    OK != (r = receive(fs_e, reqmp, &status))) {
+		printf("VFS: asynrec failed: %d\n", r);
+		util_stacktrace();
+		return(r);
+	}
+  } else if (force_sync == 3) {
+	int r, status;
+	if (OK != (r = send(fs_e, reqmp)) ||
+	    OK != (r = receive(fs_e, reqmp, &status))) {
+		printf("VFS: sendreceive failed: %d\n", r);
+		util_stacktrace();
+		return(r);
+	}
+  }
+
+  if (reqmp->m_type == -EENTERMOUNT || reqmp->m_type == -ELEAVEMOUNT ||
+      reqmp->m_type == -ESYMLINK) {
+	reqmp->m_type = -reqmp->m_type;
+  } else if (force_sync != 0 && reqmp->m_type > 0) {
+	/* XXX: Keep this as long as we're interested in having support
+	 * for synchronous communication. */
+	nested_fs_call(reqmp);
+	return fs_sendrec(fs_e, reqmp);
+  }
+
+  return(reqmp->m_type);
+}
+
+/*===========================================================================*
+ *				queuemsg				     *
+ *===========================================================================*/
+PRIVATE int queuemsg(struct vmnt *vmp)
+{
+/* Put request on queue for vmnt */
+
+  struct worker_thread *queue;
+
+  if (vmp->m_comm.c_req_queue == NULL) {
+	vmp->m_comm.c_req_queue = self;
+  } else {
+	/* Walk the list ... */
+	queue = vmp->m_comm.c_req_queue;
+	while (queue->w_next != NULL) queue = queue->w_next;
+
+	/* ... and append this worker */
+	queue->w_next = self;
+  }
+
+  self->w_next = NULL;	/* End of list */
+  sending++;
+
+  return(OK);
+}
diff --git a/servers/avfs/comm.h b/servers/avfs/comm.h
new file mode 100644
index 000000000..4e0d00cc3
--- /dev/null
+++ b/servers/avfs/comm.h
@@ -0,0 +1,12 @@
+#ifndef __VFS_COMM_H__
+#define __VFS_COMM_H__
+
+/* VFS<->FS communication */
+
+typedef struct {
+  int c_max_reqs;	/* Max requests an FS can handle simultaneously */
+  int c_cur_reqs;	/* Number of requests the FS is currently handling */
+  struct worker_thread *c_req_queue;/* Queue of procs waiting to send a message */
+} comm_t;
+
+#endif
diff --git a/servers/avfs/const.h b/servers/avfs/const.h
new file mode 100644
index 000000000..44a339d7c
--- /dev/null
+++ b/servers/avfs/const.h
@@ -0,0 +1,50 @@
+#ifndef __VFS_CONST_H__
+#define __VFS_CONST_H__
+
+/* Tables sizes */
+#define NR_FILPS         512	/* # slots in filp table */
+#define NR_LOCKS           8	/* # slots in the file locking table */
+#define NR_MNTS           16 	/* # slots in mount table */
+#define NR_VNODES        512	/* # slots in vnode table */
+#define NR_WTHREADS	   8	/* # slots in worker thread table */
+
+#define NR_NONEDEVS	NR_MNTS	/* # slots in nonedev bitmap */
+
+/* Miscellaneous constants */
+#define SU_UID 	 ((uid_t) 0)	/* super_user's uid_t */
+#define SYS_UID  ((uid_t) 0)	/* uid_t for system processes and INIT */
+#define SYS_GID  ((gid_t) 0)	/* gid_t for system processes and INIT */
+
+#define FP_BLOCKED_ON_NONE	0 /* not blocked */
+#define FP_BLOCKED_ON_PIPE	1 /* susp'd on pipe */
+#define FP_BLOCKED_ON_LOCK	2 /* susp'd on lock */
+#define FP_BLOCKED_ON_POPEN	3 /* susp'd on pipe open */
+#define FP_BLOCKED_ON_SELECT	4 /* susp'd on select */
+#define FP_BLOCKED_ON_DOPEN	5 /* susp'd on device open */
+#define FP_BLOCKED_ON_OTHER	6 /* blocked on other process, check
+				     fp_task to find out */
+
+/* test if the process is blocked on something */
+#define fp_is_blocked(fp)	((fp)->fp_blocked_on != FP_BLOCKED_ON_NONE)
+
+#define DUP_MASK        0100	/* mask to distinguish dup2 from dup */
+
+#define LOOK_UP            0 /* tells search_dir to lookup string */
+#define ENTER              1 /* tells search_dir to make dir entry */
+#define DELETE             2 /* tells search_dir to delete entry */
+#define IS_EMPTY           3 /* tells search_dir to ret. OK or ENOTEMPTY */
+
+#define SYMLOOP		16
+
+#define LABEL_MAX	16	/* maximum label size (including '\0'). Should
+				 * not be smaller than 16 or bigger than
+				 * M3_LONG_STRING.
+				 */
+
+/* Args to dev_io */
+#define VFS_DEV_READ	2001
+#define	VFS_DEV_WRITE	2002
+#define VFS_DEV_IOCTL	2005
+#define VFS_DEV_SELECT	2006
+
+#endif
diff --git a/servers/avfs/device.c b/servers/avfs/device.c
new file mode 100644
index 000000000..4fc25ab95
--- /dev/null
+++ b/servers/avfs/device.c
@@ -0,0 +1,1060 @@
+/* When a needed block is not in the cache, it must be fetched from the disk.
+ * Special character files also require I/O.  The routines for these are here.
+ *
+ * The entry points in this file are:
+ *   dev_open:   FS opens a device
+ *   dev_close:  FS closes a device
+ *   dev_io:	 FS does a read or write on a device
+ *   dev_status: FS processes callback request alert
+ *   gen_opcl:   generic call to a task to perform an open/close
+ *   gen_io:     generic call to a task to perform an I/O operation
+ *   no_dev:     open/close processing for devices that don't exist
+ *   no_dev_io:  i/o processing for devices that don't exist
+ *   tty_opcl:   perform tty-specific processing for open/close
+ *   ctty_opcl:  perform controlling-tty-specific processing for open/close
+ *   ctty_io:    perform controlling-tty-specific processing for I/O
+ *   pm_setsid:	 perform VFS's side of setsid system call
+ *   do_ioctl:	 perform the IOCTL system call
+ */
+
+#include "fs.h"
+#include <fcntl.h>
+#include <assert.h>
+#include <sys/stat.h>
+#include <minix/callnr.h>
+#include <minix/com.h>
+#include <minix/endpoint.h>
+#include <minix/ioctl.h>
+#include <minix/u64.h>
+#include "file.h"
+#include "fproc.h"
+#include "dmap.h"
+#include <minix/vfsif.h>
+#include "vnode.h"
+#include "vmnt.h"
+#include "param.h"
+
+FORWARD _PROTOTYPE( void restart_reopen, (int major)			);
+FORWARD _PROTOTYPE( int safe_io_conversion, (endpoint_t, cp_grant_id_t *,
+					     int *,
+					     endpoint_t *, void **,
+					     size_t, u32_t *)	);
+
+PRIVATE int dummyproc;
+
+
+/*===========================================================================*
+ *				dev_open				     *
+ *===========================================================================*/
+PUBLIC int dev_open(
+  dev_t dev,			/* device to open */
+  endpoint_t proc_e,		/* process to open for */
+  int flags			/* mode bits and flags */
+)
+{
+  int major, r;
+
+  /* Determine the major device number call the device class specific
+   * open/close routine.  (This is the only routine that must check the
+   * device number for being in range.  All others can trust this check.)
+   */
+  major = major(dev);
+  if (major < 0 || major >= NR_DEVICES) major = 0;
+  if (dmap[major].dmap_driver == NONE) return(ENXIO);
+  r = (*dmap[major].dmap_opcl)(DEV_OPEN, dev, proc_e, flags);
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				dev_reopen				     *
+ *===========================================================================*/
+PUBLIC int dev_reopen(
+  dev_t dev,			/* device to open */
+  int filp_no,			/* filp to reopen for */
+  int flags			/* mode bits and flags */
+)
+{
+/* Reopen a device after a failing device driver */
+
+  int major, r;
+  struct dmap *dp;
+
+  /* Determine the major device number and call the device class specific
+   * open/close routine.  (This is the only routine that must check the device
+   * number for being in range.  All others can trust this check.)
+   */
+
+  major = major(dev);
+  if (major < 0 || major >= NR_DEVICES) major = 0;
+  dp = &dmap[major];
+  if (dp->dmap_driver == NONE) return(ENXIO);
+  r = (*dp->dmap_opcl)(DEV_REOPEN, dev, filp_no, flags);
+  if (r == SUSPEND) r = OK;
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				dev_close				     *
+ *===========================================================================*/
+PUBLIC int dev_close(
+  dev_t dev,			/* device to close */
+  int filp_no
+)
+{
+/* Close a device */
+  int r, major;
+
+  /* See if driver is roughly valid. */
+  major = major(dev);
+  if (major < 0 || major >= NR_DEVICES) return(ENXIO);
+  if (dmap[major].dmap_driver == NONE) return(ENXIO);
+  r = (*dmap[major].dmap_opcl)(DEV_CLOSE, dev, filp_no, 0);
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				find_suspended_ep			     *
+ *===========================================================================*/
+endpoint_t find_suspended_ep(endpoint_t driver, cp_grant_id_t g)
+{
+/* A process is suspended on a driver for which VFS issued a grant. Find out
+ * which process it was.
+ */
+  struct fproc *rfp;
+  for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+	if(rfp->fp_pid == PID_FREE)
+		continue;
+
+	if(rfp->fp_blocked_on == FP_BLOCKED_ON_OTHER &&
+	   rfp->fp_task == driver && rfp->fp_grant == g)
+		return(rfp->fp_endpoint);
+  }
+
+  return(NONE);
+}
+
+
+/*===========================================================================*
+ *				dev_status				     *
+ *===========================================================================*/
+PUBLIC void dev_status(message *m)
+{
+/* A device sent us a notification it has something for us. Retrieve it. */
+
+  message st;
+  int major, get_more = 1;
+  endpoint_t endpt;
+
+  for (major = 0; major < NR_DEVICES; major++)
+	if (dmap_driver_match(m->m_source, major))
+		break; /* 'major' is the device that sent the message */
+
+  if (major >= NR_DEVICES)	/* Device endpoint not found; nothing to do */
+	return;
+
+  if (dmap[major].dmap_style == STYLE_DEVA ||
+      dmap[major].dmap_style == STYLE_CLONE_A) {
+	printf("VFS: not doing dev_status for async driver %d\n", m->m_source);
+	return;
+  }
+
+  /* Continuously send DEV_STATUS messages until the device has nothing to
+   * say to us anymore. */
+  do {
+	int r;
+	st.m_type = DEV_STATUS;
+	r = sendrec(m->m_source, &st);
+	if (r == OK && st.REP_STATUS == ERESTART) r = EDEADEPT;
+	if (r != OK) {
+		printf("VFS: DEV_STATUS failed to %d: %d\n", m->m_source, r);
+		if (r == EDEADSRCDST || r == EDEADEPT) return;
+		panic("VFS: couldn't sendrec for DEV_STATUS: %d", r);
+	}
+
+	switch(st.m_type) {
+	  case DEV_REVIVE:
+		/* We've got results for a read/write/ioctl call to a
+		 * synchronous character driver */
+		endpt = st.REP_ENDPT;
+		if (endpt == VFS_PROC_NR) {
+			endpt = find_suspended_ep(m->m_source,st.REP_IO_GRANT);
+			if(endpt == NONE) {
+			  printf("VFS: proc with grant %d from %d not found\n",
+				 st.REP_IO_GRANT, st.m_source);
+			  continue;
+			}
+		}
+		revive(endpt, st.REP_STATUS);
+		break;
+	  case DEV_IO_READY:
+		/* Reply to a select request: driver is ready for I/O */
+		select_reply2(st.m_source, st.DEV_MINOR, st.DEV_SEL_OPS);
+		break;
+	  default:
+		printf("VFS: unrecognized reply %d to DEV_STATUS\n",st.m_type);
+		/* Fall through. */
+	  case DEV_NO_STATUS:
+		get_more = 0;
+		break;
+	}
+  } while(get_more);
+}
+
+/*===========================================================================*
+ *				safe_io_conversion			     *
+ *===========================================================================*/
+PRIVATE int safe_io_conversion(driver, gid, op, io_ept, buf, bytes, pos_lo)
+endpoint_t driver;
+cp_grant_id_t *gid;
+int *op;
+endpoint_t *io_ept;
+void **buf;
+size_t bytes;
+u32_t *pos_lo;
+{
+/* Convert operation to the 'safe' variant (i.e., grant based) if applicable.
+ * If no copying of data is involved, there is also no need to convert. */
+
+  int access = 0;
+  size_t size;
+
+  *gid = GRANT_INVALID;		/* Grant to buffer */
+
+  switch(*op) {
+    case VFS_DEV_READ:
+    case VFS_DEV_WRITE:
+	/* Change to safe op. */
+	*op = (*op == VFS_DEV_READ) ? DEV_READ_S : DEV_WRITE_S;
+	*gid = cpf_grant_magic(driver, *io_ept, (vir_bytes) *buf, bytes,
+			       *op == DEV_READ_S ? CPF_WRITE : CPF_READ);
+	if (*gid < 0)
+		panic("VFS: cpf_grant_magic of READ/WRITE buffer failed");
+	break;
+    case VFS_DEV_IOCTL:
+	*pos_lo = *io_ept; /* Old endpoint in POSITION field. */
+	*op = DEV_IOCTL_S;
+	if(_MINIX_IOCTL_IOR(m_in.REQUEST)) access |= CPF_WRITE;
+	if(_MINIX_IOCTL_IOW(m_in.REQUEST)) access |= CPF_READ;
+	if(_MINIX_IOCTL_BIG(m_in.REQUEST))
+		size = _MINIX_IOCTL_SIZE_BIG(m_in.REQUEST);
+	else
+		size = _MINIX_IOCTL_SIZE(m_in.REQUEST);
+
+	/* Grant access to the buffer even if no I/O happens with the ioctl, in
+	 * order to disambiguate requests with DEV_IOCTL_S.
+	 */
+	*gid = cpf_grant_magic(driver, *io_ept, (vir_bytes) *buf, size, access);
+	if (*gid < 0)
+		panic("VFS: cpf_grant_magic IOCTL buffer failed");
+
+	break;
+    case VFS_DEV_SELECT:
+	*op = DEV_SELECT;
+	break;
+    default:
+	panic("VFS: unknown operation %d for safe I/O conversion", *op);
+  }
+
+  /* If we have converted to a safe operation, I/O endpoint becomes VFS if it
+   * wasn't already.
+   */
+  if(GRANT_VALID(*gid)) {
+	*io_ept = VFS_PROC_NR;
+	return(1);
+  }
+
+  /* Not converted to a safe operation (because there is no copying involved in
+   * this operation).
+   */
+  return(0);
+}
+
+/*===========================================================================*
+ *				dev_io					     *
+ *===========================================================================*/
+PUBLIC int dev_io(
+  int op,			/* DEV_READ, DEV_WRITE, DEV_IOCTL, etc. */
+  dev_t dev,			/* major-minor device number */
+  int proc_e,			/* in whose address space is buf? */
+  void *buf,			/* virtual address of the buffer */
+  u64_t pos,			/* byte position */
+  size_t bytes,			/* how many bytes to transfer */
+  int flags,			/* special flags, like O_NONBLOCK */
+  int suspend_reopen		/* Just suspend the process */
+)
+{
+/* Read from or write to a device.  The parameter 'dev' tells which one. */
+  struct dmap *dp;
+  u32_t pos_lo, pos_high;
+  message dev_mess;
+  cp_grant_id_t gid = GRANT_INVALID;
+  int safe, minor_dev, major_dev;
+  void *buf_used;
+  endpoint_t ioproc;
+
+  pos_lo = ex64lo(pos);
+  pos_high = ex64hi(pos);
+  major_dev = major(dev);
+  minor_dev = minor(dev);
+
+  /* Determine task dmap. */
+  dp = &dmap[major_dev];
+
+  /* See if driver is roughly valid. */
+  if (dp->dmap_driver == NONE) {
+	printf("VFS: dev_io: no driver for major %d\n", major_dev);
+	return(ENXIO);
+  }
+
+  if (suspend_reopen) {
+	/* Suspend user. */
+	fp->fp_grant = GRANT_INVALID;
+	fp->fp_ioproc = NONE;
+	wait_for(dp->dmap_driver);
+	fp->fp_flags |= FP_SUSP_REOPEN;
+	return(SUSPEND);
+  }
+
+  if(isokendpt(dp->dmap_driver, &dummyproc) != OK) {
+	printf("VFS: dev_io: old driver for major %x (%d)\n", major_dev,
+		dp->dmap_driver);
+	return(ENXIO);
+  }
+
+  /* By default, these are right. */
+  dev_mess.USER_ENDPT = proc_e;
+  dev_mess.ADDRESS  = buf;
+
+  /* Convert DEV_* to DEV_*_S variants. */
+  buf_used = buf;
+  safe = safe_io_conversion(dp->dmap_driver, &gid, &op,
+			    (endpoint_t *) &dev_mess.USER_ENDPT, &buf_used,
+			    bytes, &pos_lo);
+
+  /* If the safe conversion was done, set the IO_GRANT to
+   * the grant id.
+   */
+  if(safe) dev_mess.IO_GRANT = (char *) gid;
+
+  /* Set up the rest of the message passed to task. */
+  dev_mess.m_type   = op;
+  dev_mess.DEVICE   = minor_dev;
+  dev_mess.POSITION = pos_lo;
+  dev_mess.COUNT    = bytes;
+  dev_mess.HIGHPOS  = pos_high;
+
+  /* This will be used if the i/o is suspended. */
+  ioproc = dev_mess.USER_ENDPT;
+
+  /* Call the task. */
+  (*dp->dmap_io)(dp->dmap_driver, &dev_mess);
+
+  if(dp->dmap_driver == NONE) {
+	/* Driver has vanished. */
+	printf("VFS: driver gone?!\n");
+	if(safe) cpf_revoke(gid);
+	return(EIO);
+  }
+
+  /* Task has completed.  See if call completed. */
+  if (dev_mess.REP_STATUS == SUSPEND) {
+	if ((flags & O_NONBLOCK) && !(dp->dmap_style == STYLE_DEVA ||
+				      dp->dmap_style == STYLE_CLONE_A)) {
+		/* Not supposed to block. */
+		dev_mess.m_type = CANCEL;
+		dev_mess.USER_ENDPT = ioproc;
+		dev_mess.IO_GRANT = (char *) gid;
+
+		/* This R_BIT/W_BIT check taken from suspend()/unpause()
+		 * logic. Mode is expected in the COUNT field.
+		 */
+		dev_mess.COUNT = 0;
+		if (call_nr == READ) 		dev_mess.COUNT = R_BIT;
+		else if (call_nr == WRITE)	dev_mess.COUNT = W_BIT;
+		dev_mess.DEVICE = minor_dev;
+		(*dp->dmap_io)(dp->dmap_driver, &dev_mess);
+		if (dev_mess.REP_STATUS == EINTR) dev_mess.REP_STATUS = EAGAIN;
+	} else {
+		/* select() will do suspending itself. */
+		if(op != DEV_SELECT) {
+			/* Suspend user. */
+			wait_for(dp->dmap_driver);
+		}
+		assert(!GRANT_VALID(fp->fp_grant));
+		fp->fp_grant = gid;	/* revoke this when unsuspended. */
+		fp->fp_ioproc = ioproc;
+
+		if (flags & O_NONBLOCK) {
+			/* Not supposed to block, send cancel message */
+			dev_mess.m_type = CANCEL;
+			dev_mess.USER_ENDPT = ioproc;
+			dev_mess.IO_GRANT = (char *) gid;
+
+			/* This R_BIT/W_BIT check taken from suspend()/unpause()
+			 * logic. Mode is expected in the COUNT field.
+			 */
+			dev_mess.COUNT = 0;
+			if(call_nr == READ) 		dev_mess.COUNT = R_BIT;
+			else if(call_nr == WRITE)	dev_mess.COUNT = W_BIT;
+			dev_mess.DEVICE = minor_dev;
+			(*dp->dmap_io)(dp->dmap_driver, &dev_mess);
+
+			/* Should do something about EINTR -> EAGAIN mapping */
+		}
+		return(SUSPEND);
+	}
+  }
+
+  /* No suspend, or cancelled suspend, so I/O is over and can be cleaned up. */
+  if(safe) cpf_revoke(gid);
+
+  return(dev_mess.REP_STATUS);
+}
+
+/*===========================================================================*
+ *				gen_opcl				     *
+ *===========================================================================*/
+PUBLIC int gen_opcl(
+  int op,			/* operation, DEV_OPEN or DEV_CLOSE */
+  dev_t dev,			/* device to open or close */
+  endpoint_t proc_e,		/* process to open/close for */
+  int flags			/* mode bits and flags */
+)
+{
+/* Called from the dmap struct on opens & closes of special files.*/
+  int r, minor_dev, major_dev;
+  struct dmap *dp;
+  message dev_mess;
+
+  /* Determine task dmap. */
+  major_dev = major(dev);
+  minor_dev = minor(dev);
+  if (major_dev < 0 || major_dev >= NR_DEVICES) return(ENXIO);
+  dp = &dmap[major_dev];
+  if (dp->dmap_driver == NONE) {
+	printf("VFS: gen_opcl: no driver for major %d\n", major_dev);
+	return(ENXIO);
+  }
+
+  dev_mess.m_type   = op;
+  dev_mess.DEVICE   = minor_dev;
+  dev_mess.USER_ENDPT = proc_e;
+  dev_mess.COUNT    = flags;
+
+  /* Call the task. */
+  r = (*dp->dmap_io)(dp->dmap_driver, &dev_mess);
+  if (r != OK) return(r);
+
+  return(dev_mess.REP_STATUS);
+}
+
+/*===========================================================================*
+ *				tty_opcl				     *
+ *===========================================================================*/
+PUBLIC int tty_opcl(
+  int op,			/* operation, DEV_OPEN or DEV_CLOSE */
+  dev_t dev,			/* device to open or close */
+  endpoint_t proc_e,		/* process to open/close for */
+  int flags			/* mode bits and flags */
+)
+{
+/* This procedure is called from the dmap struct on tty open/close. */
+
+  int r;
+  register struct fproc *rfp;
+
+  /* Add O_NOCTTY to the flags if this process is not a session leader, or
+   * if it already has a controlling tty, or if it is someone elses
+   * controlling tty.
+   */
+  if (!(fp->fp_flags & FP_SESLDR) || fp->fp_tty != 0) {
+	flags |= O_NOCTTY;
+  } else {
+	for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+		if(rfp->fp_pid == PID_FREE) continue;
+		if (rfp->fp_tty == dev) flags |= O_NOCTTY;
+	}
+  }
+
+  r = gen_opcl(op, dev, proc_e, flags);
+
+  /* Did this call make the tty the controlling tty? */
+  if (r == 1) {
+	fp->fp_tty = dev;
+	r = OK;
+  }
+
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				ctty_opcl				     *
+ *===========================================================================*/
+PUBLIC int ctty_opcl(
+  int op,			/* operation, DEV_OPEN or DEV_CLOSE */
+  dev_t dev,			/* device to open or close */
+  endpoint_t proc_e,		/* process to open/close for */
+  int flags			/* mode bits and flags */
+)
+{
+/* This procedure is called from the dmap struct on opening or closing
+ * /dev/tty, the magic device that translates to the controlling tty.
+ */
+
+  return(fp->fp_tty == 0 ? ENXIO : OK);
+}
+
+
+/*===========================================================================*
+ *				pm_setsid				     *
+ *===========================================================================*/
+PUBLIC void pm_setsid(proc_e)
+int proc_e;
+{
+/* Perform the VFS side of the SETSID call, i.e. get rid of the controlling
+ * terminal of a process, and make the process a session leader.
+ */
+  register struct fproc *rfp;
+  int slot;
+
+  /* Make the process a session leader with no controlling tty. */
+  okendpt(proc_e, &slot);
+  rfp = &fproc[slot];
+  rfp->fp_flags |= FP_SESLDR;
+  rfp->fp_tty = 0;
+}
+
+
+/*===========================================================================*
+ *				do_ioctl				     *
+ *===========================================================================*/
+PUBLIC int do_ioctl()
+{
+/* Perform the ioctl(ls_fd, request, argx) system call (uses m2 fmt). */
+
+  int r = OK, suspend_reopen;
+  struct filp *f;
+  register struct vnode *vp;
+  dev_t dev;
+
+  if ((f = get_filp(m_in.ls_fd, VNODE_READ)) == NULL) return(err_code);
+  vp = f->filp_vno;		/* get vnode pointer */
+  if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL &&
+      (vp->v_mode & I_TYPE) != I_BLOCK_SPECIAL) {
+	r = ENOTTY;
+  }
+
+  if (r == OK) {
+	suspend_reopen = (f->filp_state != FS_NORMAL);
+	dev = (dev_t) vp->v_sdev;
+
+	r = dev_io(VFS_DEV_IOCTL, dev, who_e, m_in.ADDRESS, cvu64(0),
+		   m_in.REQUEST, f->filp_flags, suspend_reopen);
+  }
+
+  unlock_filp(f);
+
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				gen_io					     *
+ *===========================================================================*/
+PUBLIC int gen_io(task_nr, mess_ptr)
+endpoint_t task_nr;		/* which task to call */
+message *mess_ptr;		/* pointer to message for task */
+{
+/* All file system I/O ultimately comes down to I/O on major/minor device
+ * pairs.  These lead to calls on the following routines via the dmap table.
+ */
+
+  int r, proc_e;
+
+  proc_e = mess_ptr->USER_ENDPT;
+
+  r = sendrec(task_nr, mess_ptr);
+  if (r == OK && mess_ptr->REP_STATUS == ERESTART) r = EDEADEPT;
+  if (r != OK) {
+	if (r == EDEADSRCDST || r == EDEADEPT) {
+		printf("VFS: dead driver %d\n", task_nr);
+		dmap_unmap_by_endpt(task_nr);
+		return(r);
+	} else if (r == ELOCKED) {
+		printf("VFS: ELOCKED talking to %d\n", task_nr);
+		return(r);
+	}
+	panic("call_task: can't send/receive: %d", r);
+  }
+
+  /* Did the process we did the sendrec() for get a result? */
+  if (mess_ptr->REP_ENDPT != proc_e) {
+	printf("VFS: strange device reply from %d, type = %d, "
+		"proc = %d (not %d) (2) ignored\n", mess_ptr->m_source,
+		mess_ptr->m_type, proc_e, mess_ptr->REP_ENDPT);
+
+	return(EIO);
+  }
+
+  return(OK);
+}
+
+
+/*===========================================================================*
+ *				asyn_io					     *
+ *===========================================================================*/
+PUBLIC int asyn_io(task_nr, mess_ptr)
+int task_nr;			/* which task to call */
+message *mess_ptr;		/* pointer to message for task */
+{
+/* All file system I/O ultimately comes down to I/O on major/minor device
+ * pairs. These lead to calls on the following routines via the dmap table.
+ */
+
+  int r;
+
+  fp->fp_sendrec = mess_ptr;	/* Remember where result should be stored */
+  r = asynsend3(task_nr, mess_ptr, AMF_NOREPLY);
+
+  if (r != OK) panic("VFS: asynsend in asyn_io failed: %d", r);
+
+  /* Fake a SUSPEND */
+  mess_ptr->REP_STATUS = SUSPEND;
+  return(OK);
+}
+
+
+/*===========================================================================*
+ *				ctty_io					     *
+ *===========================================================================*/
+PUBLIC int ctty_io(task_nr, mess_ptr)
+int task_nr;			/* not used - for compatibility with dmap_t */
+message *mess_ptr;		/* pointer to message for task */
+{
+/* This routine is only called for one device, namely /dev/tty.  Its job
+ * is to change the message to use the controlling terminal, instead of the
+ * major/minor pair for /dev/tty itself.
+ */
+
+  struct dmap *dp;
+
+  if (fp->fp_tty == 0) {
+	/* No controlling tty present anymore, return an I/O error. */
+	mess_ptr->REP_STATUS = EIO;
+  } else {
+	/* Substitute the controlling terminal device. */
+	dp = &dmap[major(fp->fp_tty)];
+	mess_ptr->DEVICE = minor(fp->fp_tty);
+
+	if (dp->dmap_driver == NONE) {
+		printf("FS: ctty_io: no driver for dev\n");
+		return(EIO);
+	}
+
+	if (isokendpt(dp->dmap_driver, &dummyproc) != OK) {
+		printf("VFS: ctty_io: old driver %d\n", dp->dmap_driver);
+		return(EIO);
+	}
+
+	(*dp->dmap_io)(dp->dmap_driver, mess_ptr);
+  }
+
+  return(OK);
+}
+
+
+/*===========================================================================*
+ *				no_dev					     *
+ *===========================================================================*/
+PUBLIC int no_dev(
+  int UNUSED(op),		/* operation, DEV_OPEN or DEV_CLOSE */
+  dev_t UNUSED(dev),		/* device to open or close */
+  int UNUSED(proc),		/* process to open/close for */
+  int UNUSED(flags)		/* mode bits and flags */
+)
+{
+/* Called when opening a nonexistent device. */
+  return(ENODEV);
+}
+
+/*===========================================================================*
+ *				no_dev_io				     *
+ *===========================================================================*/
+PUBLIC int no_dev_io(int proc, message *m)
+{
+/* Called when doing i/o on a nonexistent device. */
+  printf("VFS: I/O on unmapped device number\n");
+  return(EIO);
+}
+
+
+/*===========================================================================*
+ *				clone_opcl				     *
+ *===========================================================================*/
+PUBLIC int clone_opcl(
+  int op,			/* operation, DEV_OPEN or DEV_CLOSE */
+  dev_t dev,			/* device to open or close */
+  int proc_e,			/* process to open/close for */
+  int flags			/* mode bits and flags */
+)
+{
+/* Some devices need special processing upon open.  Such a device is "cloned",
+ * i.e. on a succesful open it is replaced by a new device with a new unique
+ * minor device number.  This new device number identifies a new object (such
+ * as a new network connection) that has been allocated within a task.
+ */
+  struct dmap *dp;
+  int r, minor_dev, major_dev;
+  message dev_mess;
+
+  /* Determine task dmap. */
+  minor_dev = minor(dev);
+  major_dev = major(dev);
+  if (major_dev < 0 || major_dev >= NR_DEVICES) return(ENXIO);
+  dp = &dmap[major_dev];
+  if (dp->dmap_driver == NONE) {
+	printf("VFS clone_opcl: no driver for major %d\n", major_dev);
+	return(ENXIO);
+  }
+
+  dev_mess.m_type   = op;
+  dev_mess.DEVICE   = minor_dev;
+  dev_mess.USER_ENDPT = proc_e;
+  dev_mess.COUNT    = flags;
+
+  if(isokendpt(dp->dmap_driver, &dummyproc) != OK) {
+	printf("VFS clone_opcl: bad driver endpoint for major %d (%d)\n",
+	       major_dev, dp->dmap_driver);
+	return(ENXIO);
+  }
+
+  /* Call the task. */
+  r = (*dp->dmap_io)(dp->dmap_driver, &dev_mess);
+  if (r != OK) return(r);
+
+  if (op == DEV_OPEN && dp->dmap_style == STYLE_CLONE_A) {
+	/* Wait for reply when driver is asynchronous */
+	worker_wait();
+  }
+
+  if (op == DEV_OPEN && dev_mess.REP_STATUS >= 0) {
+	if (dev_mess.REP_STATUS != minor_dev) {
+                struct vnode *vp;
+                struct node_details res;
+
+		/* A new minor device number has been returned.
+                 * Request PFS to create a temporary device file to hold it.
+                 */
+
+                /* Device number of the new device. */
+		dev = (dev & ~(BYTE << MINOR)) | (dev_mess.REP_STATUS << MINOR);
+
+                /* Issue request */
+		r = req_newnode(PFS_PROC_NR, fp->fp_effuid, fp->fp_effgid,
+			ALL_MODES | I_CHAR_SPECIAL, dev, &res);
+                if (r != OK) {
+			(void) clone_opcl(DEV_CLOSE, dev, proc_e, 0);
+			return r;
+                }
+
+                /* Drop old node and use the new values */
+                vp = fp->fp_filp[m_in.fd]->filp_vno;
+
+		unlock_vnode(vp);
+                put_vnode(vp);
+		if ((vp = get_free_vnode()) == NULL)
+			return(err_code);
+
+		lock_vnode(vp, VNODE_OPCL);
+
+                vp->v_fs_e = res.fs_e;
+                vp->v_vmnt = NULL;
+                vp->v_dev = NO_DEV;
+		vp->v_fs_e = res.fs_e;
+                vp->v_inode_nr = res.inode_nr;
+                vp->v_mode = res.fmode;
+                vp->v_sdev = dev;
+                vp->v_fs_count = 1;
+                vp->v_ref_count = 1;
+		fp->fp_filp[m_in.fd]->filp_vno = vp;
+	}
+	dev_mess.REP_STATUS = OK;
+  }
+  return(dev_mess.REP_STATUS);
+}
+
+
+/*===========================================================================*
+ *				dev_up					     *
+ *===========================================================================*/
+PUBLIC void dev_up(int maj)
+{
+  /* A new device driver has been mapped in. This function
+   * checks if any filesystems are mounted on it, and if so,
+   * dev_open()s them so the filesystem can be reused.
+  */
+  int r, new_driver_e, needs_reopen, fd_nr, found;
+  struct filp *rfilp;
+  struct vmnt *vmp;
+  struct fproc *rfp;
+  struct vnode *vp;
+
+  /* First deal with block devices. We need to consider both mounted file
+   * systems and open block-special files.
+   */
+  if (maj < 0 || maj >= NR_DEVICES) panic("VFS: out-of-bound major");
+  new_driver_e = dmap[maj].dmap_driver;
+
+  /* Tell each affected mounted file system about the new endpoint. This code
+   * is currently useless, as driver endpoints do not change across restarts.
+   */
+  for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) {
+	int minor_dev, major_dev;
+	major_dev = major(vmp->m_dev);
+	minor_dev = minor(vmp->m_dev);
+	if (major_dev != maj) continue;
+
+	/* Send the new driver endpoint to the mounted file system. */
+	if (OK != req_newdriver(vmp->m_fs_e, vmp->m_dev, new_driver_e))
+		printf("VFS dev_up: error sending new driver endpoint."
+		       " FS_e: %d req_nr: %d\n", vmp->m_fs_e, REQ_NEW_DRIVER);
+  }
+
+  /* For each block-special file that was previously opened on the affected
+   * device, we need to reopen it on the new driver.
+   */
+  found = 0;
+  for (rfilp = filp; rfilp < &filp[NR_FILPS]; rfilp++) {
+	if (rfilp->filp_count < 1 || !(vp = rfilp->filp_vno)) continue;
+	if (major(vp->v_sdev) != maj) continue;
+	if (!S_ISBLK(vp->v_mode)) continue;
+
+	/* Reopen the device on the driver, once per filp. */
+	if ((r = dev_open(vp->v_sdev, VFS_PROC_NR, rfilp->filp_mode)) != OK)
+		printf("VFS: mounted dev %d/%d re-open failed: %d.\n",
+			maj, minor(vp->v_sdev), r);
+
+	found = 1;
+  }
+
+  /* If any block-special file was open for this major at all, also inform the
+   * root file system about the new endpoint of the driver. We do this even if
+   * the block-special file is linked to another mounted file system, merely
+   * because it is more work to check for that case.
+   */
+  if (found) {
+	if (OK != req_newdriver(ROOT_FS_E, makedev(maj, 0), new_driver_e))
+		printf("VFSdev_up: error sending new driver endpoint."
+		       " FS_e: %d req_nr: %d\n", ROOT_FS_E, REQ_NEW_DRIVER);
+  }
+
+  /* The rest of the code deals with character-special files. To start with,
+   * look for processes that are suspened in an OPEN call. Set FP_SUSP_REOPEN
+   * to indicate that this process was suspended before the call to dev_up.
+   */
+  for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+	if(rfp->fp_pid == PID_FREE) continue;
+	if(rfp->fp_blocked_on != FP_BLOCKED_ON_DOPEN) continue;
+
+	printf("VFS: dev_up: found process in FP_BLOCKED_ON_DOPEN, fd %d\n",
+		rfp->fp_block_fd);
+	fd_nr = rfp->fp_block_fd;
+	rfilp = rfp->fp_filp[fd_nr];
+	vp = rfilp->filp_vno;
+	if (!vp) panic("VFS: restart_reopen: no vp");
+	if ((vp->v_mode &  I_TYPE) != I_CHAR_SPECIAL) continue;
+	if (major(vp->v_sdev) != maj) continue;
+
+	rfp->fp_flags |= FP_SUSP_REOPEN;
+  }
+
+  needs_reopen= FALSE;
+  for (rfilp = filp; rfilp < &filp[NR_FILPS]; rfilp++) {
+	if (rfilp->filp_count < 1 || !(vp = rfilp->filp_vno)) continue;
+	if (major(vp->v_sdev) != maj) continue;
+	if (!S_ISCHR(vp->v_mode)) continue;
+
+	rfilp->filp_state = FS_NEEDS_REOPEN;
+	needs_reopen = TRUE;
+  }
+
+  if (needs_reopen)
+	restart_reopen(maj);
+
+}
+
+/*===========================================================================*
+ *				open_reply				     *
+ *===========================================================================*/
+PUBLIC void open_reply(void)
+{
+  struct fproc *rfp;
+  endpoint_t proc_e;
+  int slot;
+
+  proc_e = m_in.REP_ENDPT;
+  if (isokendpt(proc_e, &slot) != OK) return;
+  rfp = &fproc[slot];
+  *rfp->fp_sendrec = m_in;
+  worker_signal(worker_get(rfp->fp_wtid));	/* Continue open */
+}
+
+/*===========================================================================*
+ *				restart_reopen				     *
+ *===========================================================================*/
+PRIVATE void restart_reopen(maj)
+int maj;
+{
+  int n, r, minor_dev, major_dev, fd_nr;
+  endpoint_t driver_e;
+  struct vnode *vp;
+  struct filp *rfilp;
+  struct fproc *rfp;
+
+  if (maj < 0 || maj >= NR_DEVICES) panic("VFS: out-of-bound major");
+  for (rfilp = filp; rfilp < &filp[NR_FILPS]; rfilp++) {
+	if (rfilp->filp_count < 1 || !(vp = rfilp->filp_vno)) continue;
+	if (rfilp->filp_state != FS_NEEDS_REOPEN) continue;
+	if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL) continue;
+
+	major_dev = major(vp->v_sdev);
+	minor_dev = minor(vp->v_sdev);
+	if (major_dev != maj) continue;
+
+	if (!(rfilp->filp_flags & O_REOPEN)) {
+		/* File descriptor is to be closed when driver restarts. */
+		n = invalidate(rfilp);
+		if (n != rfilp->filp_count) {
+			printf("VFS: warning: invalidate/count "
+			       "discrepancy (%d, %d)\n", n, rfilp->filp_count);
+		}
+		rfilp->filp_count = 0;
+		continue;
+	}
+
+	r = dev_reopen(vp->v_sdev, rfilp-filp, vp->v_mode & (R_BIT|W_BIT));
+	if (r == OK) return;
+
+	/* Device could not be reopened. Invalidate all filps on that device.*/
+	n = invalidate(rfilp);
+	if (n != rfilp->filp_count) {
+		printf("VFS: warning: invalidate/count "
+			"discrepancy (%d, %d)\n", n, rfilp->filp_count);
+	}
+	rfilp->filp_count = 0;
+	printf("VFS: file on dev %d/%d re-open failed: %d; "
+		"invalidated %d fd's.\n", major_dev, minor_dev, r, n);
+  }
+
+  /* Nothing more to re-open. Restart suspended processes */
+  driver_e = dmap[maj].dmap_driver;
+
+  for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+	if(rfp->fp_pid == PID_FREE) continue;
+	if(rfp->fp_blocked_on == FP_BLOCKED_ON_OTHER &&
+	   rfp->fp_task == driver_e && (rfp->fp_flags & FP_SUSP_REOPEN)) {
+		rfp->fp_flags &= ~FP_SUSP_REOPEN;
+		rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
+		reply(rfp->fp_endpoint, ERESTART);
+	}
+  }
+
+  /* Look for processes that are suspened in an OPEN call */
+  for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+	if (rfp->fp_pid == PID_FREE) continue;
+	if (rfp->fp_blocked_on == FP_BLOCKED_ON_DOPEN ||
+	    !(rfp->fp_flags & FP_SUSP_REOPEN)) continue;
+
+	printf("VFS: restart_reopen: found process in FP_BLOCKED_ON_DOPEN, fd %d\n",
+		rfp->fp_block_fd);
+	fd_nr =	rfp->fp_block_fd;
+	rfilp = rfp->fp_filp[fd_nr];
+
+	if (!rfilp) {
+		/* Open failed, and automatic reopen was not requested */
+		rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
+		FD_CLR(fd_nr, &rfp->fp_filp_inuse);
+		reply(rfp->fp_endpoint, EIO);
+		continue;
+	}
+
+	vp = rfilp->filp_vno;
+	if (!vp) panic("VFS: restart_reopen: no vp");
+	if ((vp->v_mode &  I_TYPE) != I_CHAR_SPECIAL) continue;
+	if (major(vp->v_sdev) != maj) continue;
+
+	rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
+	reply(rfp->fp_endpoint, fd_nr);
+  }
+}
+
+
+/*===========================================================================*
+ *				reopen_reply				     *
+ *===========================================================================*/
+PUBLIC void reopen_reply()
+{
+  endpoint_t driver_e;
+  int filp_no, status, maj;
+  struct filp *rfilp;
+  struct vnode *vp;
+  struct dmap *dp;
+
+  driver_e = m_in.m_source;
+  filp_no = m_in.REP_ENDPT;
+  status = m_in.REP_STATUS;
+
+  if (filp_no < 0 || filp_no >= NR_FILPS) {
+	printf("VFS: reopen_reply: bad filp number %d from driver %d\n",
+		filp_no, driver_e);
+	return;
+  }
+
+  rfilp = &filp[filp_no];
+  if (rfilp->filp_count < 1) {
+	printf("VFS: reopen_reply: filp number %d not inuse (from driver %d)\n",
+	       filp_no, driver_e);
+	return;
+  }
+
+  vp = rfilp->filp_vno;
+  if (!vp) {
+	printf("VFS: reopen_reply: no vnode for filp number %d (from driver "
+		"%d)\n", filp_no, driver_e);
+	return;
+  }
+
+  if (rfilp->filp_state != FS_NEEDS_REOPEN) {
+	printf("VFS: reopen_reply: bad state %d for filp number %d"
+	       " (from driver %d)\n", rfilp->filp_state, filp_no, driver_e);
+	return;
+  }
+
+  if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL) {
+	printf("VFS: reopen_reply: bad mode 0%o for filp number %d"
+	       " (from driver %d)\n", vp->v_mode, filp_no, driver_e);
+	return;
+  }
+
+  maj = major(vp->v_sdev);
+  dp = &dmap[maj];
+  if (dp->dmap_driver != driver_e) {
+	printf("VFS: reopen_reply: bad major %d for filp number %d "
+		"(from driver %d, current driver is %d)\n", maj, filp_no,
+		driver_e, dp->dmap_driver);
+	return;
+  }
+
+  if (status == OK) {
+	rfilp->filp_state= FS_NORMAL;
+  } else {
+	printf("VFS: reopen_reply: should handle error status\n");
+	return;
+  }
+
+  restart_reopen(maj);
+}
diff --git a/servers/avfs/dmap.c b/servers/avfs/dmap.c
new file mode 100644
index 000000000..d73aa1fac
--- /dev/null
+++ b/servers/avfs/dmap.c
@@ -0,0 +1,257 @@
+/* This file contains the table with device <-> driver mappings. It also
+ * contains some routines to dynamically add and/ or remove device drivers
+ * or change mappings.
+ */
+
+#include "fs.h"
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <minix/com.h>
+#include <minix/ds.h>
+#include "fproc.h"
+#include "dmap.h"
+#include "param.h"
+
+/* The order of the entries in the table determines the mapping between major
+ * device numbers and device drivers. Character and block devices
+ * can be intermixed at random.  The ordering determines the device numbers in
+ * /dev. Note that the major device numbers used in /dev are NOT the same as
+ * the process numbers of the device drivers. See <minix/dmap.h> for mappings.
+ */
+
+struct dmap dmap[NR_DEVICES];
+
+#define DT_EMPTY { no_dev, no_dev_io, NONE, "", 0, STYLE_NDEV, NULL }
+
+/*===========================================================================*
+ *				do_mapdriver		 		     *
+ *===========================================================================*/
+PUBLIC int do_mapdriver()
+{
+/* Create a device->driver mapping. RS will tell us which major is driven by
+ * this driver, what type of device it is (regular, TTY, asynchronous, clone,
+ * etc), and its label. This label is registered with DS, and allows us to
+ * retrieve the driver's endpoint.
+ */
+  int r, flags, major;
+  endpoint_t endpoint;
+  vir_bytes label_vir;
+  size_t label_len;
+  char label[LABEL_MAX];
+
+  /* Only RS can map drivers. */
+  if (who_e != RS_PROC_NR) return(EPERM);
+
+  /* Get the label */
+  label_vir = (vir_bytes) m_in.md_label;
+  label_len = (size_t) m_in.md_label_len;
+
+  if (label_len+1 > sizeof(label)) { /* Can we store this label? */
+	printf("VFS: do_mapdriver: label too long\n");
+	return(EINVAL);
+  }
+  r = sys_vircopy(who_e, D, label_vir, SELF, D, (vir_bytes) label, label_len);
+  if (r != OK) {
+	printf("VFS: do_mapdriver: sys_vircopy failed: %d\n", r);
+	return(EINVAL);
+  }
+  label[label_len] = '\0';	/* Terminate label */
+
+  /* Now we know how the driver is called, fetch its endpoint */
+  r = ds_retrieve_label_endpt(label, &endpoint);
+  if (r != OK) {
+	printf("VFS: do_mapdriver: label '%s' unknown\n", label);
+	return(EINVAL);
+  }
+
+  /* Try to update device mapping. */
+  major = m_in.md_major;
+  flags = m_in.md_flags;
+
+  return map_driver(label, major, endpoint, m_in.md_style, flags);
+}
+
+/*===========================================================================*
+ *				map_driver		 		     *
+ *===========================================================================*/
+PUBLIC int map_driver(label, major, proc_nr_e, style, flags)
+const char *label;		/* name of the driver */
+int major;			/* major number of the device */
+endpoint_t proc_nr_e;		/* process number of the driver */
+int style;			/* style of the device */
+int flags;			/* device flags */
+{
+/* Add a new device driver mapping in the dmap table. If the proc_nr is set to
+ * NONE, we're supposed to unmap it.
+ */
+
+  int slot;
+  size_t len;
+  struct dmap *dp;
+
+  /* Get pointer to device entry in the dmap table. */
+  if (major < 0 || major >= NR_DEVICES) return(ENODEV);
+  dp = &dmap[major];
+
+  /* Check if we're supposed to unmap it. */
+ if(proc_nr_e == NONE) {
+	dp->dmap_opcl = no_dev;
+	dp->dmap_io = no_dev_io;
+	dp->dmap_driver = NONE;
+	dp->dmap_flags = flags;
+	return(OK);
+  }
+
+  /* Check process number of new driver if it was alive before mapping */
+  if (! (flags & DRV_FORCED)) {
+	if (isokendpt(proc_nr_e, &slot) != OK)
+		return(EINVAL);
+  }
+
+  if (label != NULL) {
+	len = strlen(label);
+	if (len+1 > sizeof(dp->dmap_label))
+		panic("VFS: map_driver: label too long: %d", len);
+	strcpy(dp->dmap_label, label);
+  }
+
+  /* Store driver I/O routines based on type of device */
+  switch (style) {
+    case STYLE_DEV:
+	dp->dmap_opcl = gen_opcl;
+	dp->dmap_io = gen_io;
+	break;
+    case STYLE_DEVA:
+	dp->dmap_opcl = gen_opcl;
+	dp->dmap_io = asyn_io;
+	break;
+    case STYLE_TTY:
+	dp->dmap_opcl = tty_opcl;
+	dp->dmap_io = gen_io;
+	break;
+    case STYLE_CTTY:
+	dp->dmap_opcl = ctty_opcl;
+	dp->dmap_io = ctty_io;
+	break;
+    case STYLE_CLONE:
+	dp->dmap_opcl = clone_opcl;
+	dp->dmap_io = gen_io;
+	break;
+    case STYLE_CLONE_A:
+	dp->dmap_opcl = clone_opcl;
+	dp->dmap_io = asyn_io;
+	break;
+    default:
+	return(EINVAL);
+  }
+
+  dp->dmap_driver = proc_nr_e;
+  dp->dmap_flags = flags;
+  dp->dmap_style = style;
+
+  return(OK);
+}
+
+/*===========================================================================*
+ *				dmap_unmap_by_endpt	 		     *
+ *===========================================================================*/
+PUBLIC void dmap_unmap_by_endpt(endpoint_t proc_e)
+{
+/* Lookup driver in dmap table by endpoint and unmap it */
+  int major, r;
+
+  for (major = 0; major < NR_DEVICES; major++) {
+	if (dmap_driver_match(proc_e, major)) {
+		/* Found driver; overwrite it with a NULL entry */
+		if ((r = map_driver(NULL, major, NONE, 0, 0)) != OK) {
+			printf("VFS: unmapping driver %d for major %d failed:"
+				" %d\n", proc_e, major, r);
+		}
+	}
+  }
+}
+
+/*===========================================================================*
+ *		               map_service                                   *
+ *===========================================================================*/
+PUBLIC int map_service(struct rprocpub *rpub)
+{
+/* Map a new service by storing its device driver properties. */
+  int r;
+
+  /* Not a driver, nothing more to do. */
+  if(rpub->dev_nr == NO_DEV) return(OK);
+
+  /* Map driver. */
+  r = map_driver(rpub->label, rpub->dev_nr, rpub->endpoint, rpub->dev_style,
+		 rpub->dev_flags);
+  if(r != OK) return(r);
+
+  /* If driver has two major numbers associated, also map the other one. */
+  if(rpub->dev_style2 != STYLE_NDEV) {
+	r = map_driver(rpub->label, rpub->dev_nr+1, rpub->endpoint,
+		       rpub->dev_style2, rpub->dev_flags);
+	if(r != OK) return(r);
+  }
+
+  return(OK);
+}
+
+/*===========================================================================*
+ *				init_dmap		 		     *
+ *===========================================================================*/
+PUBLIC void init_dmap()
+{
+/* Initialize the table with empty device <-> driver mappings. */
+  int i;
+  struct dmap dmap_default = DT_EMPTY;
+
+  for (i = 0; i < NR_DEVICES; i++)
+	dmap[i] = dmap_default;
+}
+
+/*===========================================================================*
+ *				dmap_driver_match	 		     *
+ *===========================================================================*/
+PUBLIC int dmap_driver_match(endpoint_t proc, int major)
+{
+  if (major < 0 || major >= NR_DEVICES) return(0);
+  if (dmap[major].dmap_driver != NONE && dmap[major].dmap_driver == proc)
+	return(1);
+
+  return(0);
+}
+
+/*===========================================================================*
+ *				dmap_endpt_up		 		     *
+ *===========================================================================*/
+PUBLIC void dmap_endpt_up(endpoint_t proc_e)
+{
+/* A device driver with endpoint proc_e has been restarted. Go tell everyone
+ * that might be blocking on it that this device is 'up'.
+ */
+
+  int major;
+  for (major = 0; major < NR_DEVICES; major++)
+	if (dmap_driver_match(proc_e, major))
+		dev_up(major);
+
+}
+
+/*===========================================================================*
+ *				get_dmap		 		     *
+ *===========================================================================*/
+PUBLIC struct dmap *get_dmap(endpoint_t proc_e)
+{
+/* See if 'proc_e' endpoint belongs to a valid dmap entry. If so, return a
+ * pointer */
+
+  int major;
+  for (major = 0; major < NR_DEVICES; major++)
+	if (dmap_driver_match(proc_e, major))
+		return(&dmap[major]);
+
+  return(NULL);
+}
diff --git a/servers/avfs/dmap.h b/servers/avfs/dmap.h
new file mode 100644
index 000000000..6a83bf551
--- /dev/null
+++ b/servers/avfs/dmap.h
@@ -0,0 +1,28 @@
+#ifndef __VFS_DMAP_H__
+#define __VFS_DMAP_H__
+
+/*
+dmap.h
+*/
+
+/*===========================================================================*
+ *               	 Device <-> Driver Table  			     *
+ *===========================================================================*/
+
+/* Device table.  This table is indexed by major device number.  It provides
+ * the link between major device numbers and the routines that process them.
+ * The table can be update dynamically. The field 'dmap_flags' describe an
+ * entry's current status and determines what control options are possible.
+ */
+
+extern struct dmap {
+  int _PROTOTYPE ((*dmap_opcl), (int, dev_t, int, int) );
+  int _PROTOTYPE ((*dmap_io), (int, message *) );
+  endpoint_t dmap_driver;
+  char dmap_label[LABEL_MAX];
+  int dmap_flags;
+  int dmap_style;
+  struct filp *dmap_sel_filp;
+} dmap[];
+
+#endif
diff --git a/servers/avfs/exec.c b/servers/avfs/exec.c
new file mode 100644
index 000000000..dc29c6875
--- /dev/null
+++ b/servers/avfs/exec.c
@@ -0,0 +1,707 @@
+/* This file handles the EXEC system call.  It performs the work as follows:
+ *    - see if the permissions allow the file to be executed
+ *    - read the header and extract the sizes
+ *    - fetch the initial args and environment from the user space
+ *    - allocate the memory for the new process
+ *    - copy the initial stack from PM to the process
+ *    - read in the text and data segments and copy to the process
+ *    - take care of setuid and setgid bits
+ *    - fix up 'mproc' table
+ *    - tell kernel about EXEC
+ *    - save offset to initial argc (for ps)
+ *
+ * The entry points into this file are:
+ *   pm_exec:	 perform the EXEC system call
+ */
+
+#include "fs.h"
+#include <sys/stat.h>
+#include <minix/callnr.h>
+#include <minix/endpoint.h>
+#include <minix/com.h>
+#include <minix/u64.h>
+#include <a.out.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <dirent.h>
+#include <sys/param.h>
+#include "fproc.h"
+#include "path.h"
+#include "param.h"
+#include "vnode.h"
+#include <minix/vfsif.h>
+#include <assert.h>
+#include <libexec.h>
+#include "exec.h"
+
+FORWARD _PROTOTYPE( void lock_exec, (void)				);
+FORWARD _PROTOTYPE( void unlock_exec, (void)				);
+FORWARD _PROTOTYPE( int exec_newmem, (int proc_e, vir_bytes text_addr, vir_bytes text_bytes,
+		       vir_bytes data_addr, vir_bytes data_bytes,
+		       vir_bytes tot_bytes, vir_bytes frame_len, int sep_id,
+		       int is_elf, dev_t st_dev, ino_t st_ino, time_t ctime,
+		       char *progname, int new_uid, int new_gid,
+		       vir_bytes *stack_topp, int *load_textp,
+		       int *allow_setuidp)				);
+FORWARD _PROTOTYPE( int is_script, (const char *exec_hdr, size_t exec_len));
+FORWARD _PROTOTYPE( int patch_stack, (struct vnode *vp, char stack[ARG_MAX],
+		       vir_bytes *stk_bytes, char path[PATH_MAX+1])	);
+FORWARD _PROTOTYPE( int insert_arg, (char stack[ARG_MAX], vir_bytes *stk_bytes,
+					char *arg, int replace)		);
+FORWARD _PROTOTYPE( void patch_ptr, (char stack[ARG_MAX], vir_bytes base));
+FORWARD _PROTOTYPE( void clo_exec, (struct fproc *rfp)			);
+FORWARD _PROTOTYPE( int read_seg, (struct vnode *vp, off_t off, int proc_e,
+					int seg, vir_bytes seg_addr,
+					phys_bytes seg_bytes)		);
+FORWARD _PROTOTYPE( int load_aout, (struct exec_info *execi)		);
+FORWARD _PROTOTYPE( int load_elf, (struct exec_info *execi)		);
+FORWARD _PROTOTYPE( int map_header, (char **exec_hdr,
+					const struct vnode *vp)		);
+
+#define PTRSIZE	sizeof(char *) /* Size of pointers in argv[] and envp[]. */
+
+/* Array of loaders for different object file formats */
+struct exec_loaders {
+	int (*load_object)(struct exec_info *);
+};
+
+PRIVATE const struct exec_loaders exec_loaders[] = {
+	{ load_aout },
+	{ load_elf },
+	{ NULL }
+};
+
+PRIVATE char hdr[PAGE_SIZE]; /* Assume that header is not larger than a page */
+
+/*===========================================================================*
+ *				lock_exec				     *
+ *===========================================================================*/
+PRIVATE void lock_exec(void)
+{
+  message org_m_in;
+  struct fproc *org_fp;
+  struct worker_thread *org_self;
+
+  /* First try to get it right off the bat */
+  if (mutex_trylock(&exec_lock) == 0)
+	return;
+
+  org_m_in = m_in;
+  org_fp = fp;
+  org_self = self;
+
+  if (mutex_lock(&exec_lock) != 0)
+	panic("Could not obtain lock on exec");
+
+  m_in = org_m_in;
+  fp = org_fp;
+  self = org_self;
+}
+
+/*===========================================================================*
+ *				unlock_exec				     *
+ *===========================================================================*/
+PRIVATE void unlock_exec(void)
+{
+  if (mutex_unlock(&exec_lock) != 0)
+	panic("Could not release lock on exec");
+}
+
+/*===========================================================================*
+ *				pm_exec					     *
+ *===========================================================================*/
+PUBLIC int pm_exec(int proc_e, char *path, vir_bytes path_len, char *frame,
+		   vir_bytes frame_len, vir_bytes *pc)
+{
+/* Perform the execve(name, argv, envp) call.  The user library builds a
+ * complete stack image, including pointers, args, environ, etc.  The stack
+ * is copied to a buffer inside VFS, and then to the new core image.
+ */
+  int r, r1, round, slot;
+  vir_bytes vsp;
+  struct fproc *rfp;
+  struct vnode *vp;
+  struct vmnt *vmp;
+  char *cp;
+  static char mbuf[ARG_MAX];	/* buffer for stack and zeroes */
+  struct exec_info execi;
+  int i;
+  char fullpath[PATH_MAX+1];
+  struct lookup resolve;
+
+  lock_exec();
+
+  okendpt(proc_e, &slot);
+  rfp = fp = &fproc[slot];
+  vp = NULL;
+
+  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+  resolve.l_vmnt_lock = VMNT_READ;
+  resolve.l_vnode_lock = VNODE_READ;
+
+  /* Get the exec file name. */
+  if ((r = fetch_name(path, path_len, 0, fullpath)) != OK)
+	goto pm_execfinal;
+
+  /* Fetch the stack from the user before destroying the old core image. */
+  if (frame_len > ARG_MAX) {
+		printf("VFS: pm_exec: stack too big\n");
+		r = ENOMEM; /* stack too big */
+		goto pm_execfinal;
+  }
+  r = sys_datacopy(proc_e, (vir_bytes) frame, SELF, (vir_bytes) mbuf,
+		   (phys_bytes) frame_len);
+  if (r != OK) { /* can't fetch stack (e.g. bad virtual addr) */
+        printf("VFS: pm_exec: sys_datacopy failed\n");
+        goto pm_execfinal;
+  }
+
+  /* The default is to keep the original user and group IDs */
+  execi.new_uid = rfp->fp_effuid;
+  execi.new_gid = rfp->fp_effgid;
+
+  for (round = 0; round < 2; round++) {
+	/* round = 0 (first attempt), or 1 (interpreted script) */
+	/* Save the name of the program */
+	(cp = strrchr(fullpath, '/')) ? cp++ : (cp = fullpath);
+
+	strncpy(execi.progname, cp, PROC_NAME_LEN-1);
+	execi.progname[PROC_NAME_LEN-1] = '\0';
+
+	/* Open executable */
+	if ((vp = eat_path(&resolve, fp)) == NULL) {
+		r = err_code;
+		goto pm_execfinal;
+	}
+	execi.vp = vp;
+	unlock_vmnt(vmp);
+
+	if ((vp->v_mode & I_TYPE) != I_REGULAR)
+		r = ENOEXEC;
+	else if ((r1 = forbidden(vp, X_BIT)) != OK)
+		r = r1;
+	else
+		r = req_stat(vp->v_fs_e, vp->v_inode_nr, VFS_PROC_NR,
+			     (char *) &(execi.sb), 0, 0);
+	if (r != OK) goto pm_execfinal;
+
+        if (round == 0) {
+            /* Deal with setuid/setgid executables */
+            if (vp->v_mode & I_SET_UID_BIT) execi.new_uid = vp->v_uid;
+            if (vp->v_mode & I_SET_GID_BIT) execi.new_gid = vp->v_gid;
+        }
+
+	r = map_header(&execi.hdr, execi.vp);
+	if (r != OK) goto pm_execfinal;
+
+	if (!is_script(execi.hdr, execi.vp->v_size) || round != 0)
+		break;
+
+	/* Get fresh copy of the file name. */
+	if ((r = fetch_name(path, path_len, 0, fullpath)) != OK)
+		printf("VFS pm_exec: 2nd fetch_name failed\n");
+	else if ((r = patch_stack(vp, mbuf, &frame_len, fullpath)) != OK)
+		printf("VFS pm_exec: patch_stack failed\n");
+
+	unlock_vnode(vp);
+	put_vnode(vp);
+	vp = NULL;
+	if (r != OK) goto pm_execfinal;
+  }
+
+  execi.proc_e = proc_e;
+  execi.frame_len = frame_len;
+
+  for (i = 0; exec_loaders[i].load_object != NULL; i++) {
+      r = (*exec_loaders[i].load_object)(&execi);
+      /* Loaded successfully, so no need to try other loaders */
+      if (r == OK) break;
+  }
+
+  if (r != OK) {   /* No exec loader could load the object */
+	r = ENOEXEC;
+	goto pm_execfinal;
+  }
+
+  /* Save off PC */
+  *pc = execi.pc;
+
+  /* Patch up stack and copy it from VFS to new core image. */
+  vsp = execi.stack_top;
+  vsp -= frame_len;
+  patch_ptr(mbuf, vsp);
+  if ((r = sys_datacopy(SELF, (vir_bytes) mbuf, proc_e, (vir_bytes) vsp,
+		   (phys_bytes)frame_len)) != OK) {
+	printf("VFS: datacopy failed (%d) trying to copy to %lu\n", r, vsp);
+	goto pm_execfinal;
+  }
+
+  if (r != OK) goto pm_execfinal;
+  clo_exec(rfp);
+
+  if (execi.allow_setuid) {
+	rfp->fp_effuid = execi.new_uid;
+	rfp->fp_effgid = execi.new_gid;
+  }
+
+pm_execfinal:
+  if (vp != NULL) {
+	unlock_vnode(vp);
+	put_vnode(vp);
+  }
+  unlock_exec();
+  return(r);
+}
+
+/*===========================================================================*
+ *				load_aout				     *
+ *===========================================================================*/
+PRIVATE int load_aout(struct exec_info *execi)
+{
+  int r;
+  struct vnode *vp;
+  int proc_e;
+  off_t off;
+  int hdrlen;
+  int sep_id;
+  vir_bytes text_bytes, data_bytes, bss_bytes;
+  phys_bytes tot_bytes;		/* total space for program, including gap */
+
+  assert(execi != NULL);
+  assert(execi->hdr != NULL);
+  assert(execi->vp != NULL);
+
+  proc_e = execi->proc_e;
+  vp = execi->vp;
+
+  /* Read the file header and extract the segment sizes. */
+  r = read_header_aout(execi->hdr, execi->vp->v_size, &sep_id,
+		       &text_bytes, &data_bytes, &bss_bytes,
+		       &tot_bytes, &execi->pc, &hdrlen);
+  if (r != OK) return(r);
+
+  r = exec_newmem(proc_e, 0 /* text_addr */, text_bytes,
+		  0 /* data_addr */, data_bytes + bss_bytes, tot_bytes,
+		  execi->frame_len, sep_id, 0 /* is_elf */, vp->v_dev, vp->v_inode_nr,
+		  execi->sb.st_ctime,
+		  execi->progname, execi->new_uid, execi->new_gid,
+		  &execi->stack_top, &execi->load_text, &execi->allow_setuid);
+
+  if (r != OK) {
+        printf("VFS: load_aout: exec_newmem failed: %d\n", r);
+        return(r);
+  }
+
+  off = hdrlen;
+
+  /* Read in text and data segments. */
+  if (execi->load_text)
+	r = read_seg(vp, off, proc_e, T, 0, text_bytes);
+  off += text_bytes;
+  if (r == OK)
+	r = read_seg(vp, off, proc_e, D, 0, data_bytes);
+
+  return(r);
+}
+
+/*===========================================================================*
+ *				load_elf				     *
+ *===========================================================================*/
+PRIVATE int load_elf(struct exec_info *execi)
+{
+  int r;
+  struct vnode *vp;
+  int proc_e;
+  phys_bytes tot_bytes;		/* total space for program, including gap */
+  vir_bytes text_vaddr, text_paddr, text_filebytes, text_membytes;
+  vir_bytes data_vaddr, data_paddr, data_filebytes, data_membytes;
+  off_t text_offset, data_offset;
+  int sep_id, is_elf;
+
+  assert(execi != NULL);
+  assert(execi->hdr != NULL);
+  assert(execi->vp != NULL);
+
+  proc_e = execi->proc_e;
+  vp = execi->vp;
+
+  /* Read the file header and extract the segment sizes. */
+  r = read_header_elf(execi->hdr, &text_vaddr, &text_paddr,
+		      &text_filebytes, &text_membytes,
+		      &data_vaddr, &data_paddr,
+		      &data_filebytes, &data_membytes,
+		      &execi->pc, &text_offset, &data_offset);
+  if (r != OK) return(r);
+
+  sep_id = 0;
+  is_elf = 1;
+  tot_bytes = 0; /* Use default stack size */
+  r = exec_newmem(proc_e,
+		  trunc_page(text_vaddr), text_membytes,
+		  trunc_page(data_vaddr), data_membytes,
+		  tot_bytes, execi->frame_len, sep_id, is_elf,
+		  vp->v_dev, vp->v_inode_nr, execi->sb.st_ctime,
+		  execi->progname, execi->new_uid, execi->new_gid,
+		  &execi->stack_top, &execi->load_text, &execi->allow_setuid);
+
+  if (r != OK) {
+        printf("VFS: load_elf: exec_newmem failed: %d\n", r);
+        return(r);
+  }
+
+  /* Read in text and data segments. */
+  if (execi->load_text)
+      r = read_seg(vp, text_offset, proc_e, T, text_vaddr, text_filebytes);
+
+  if (r == OK)
+      r = read_seg(vp, data_offset, proc_e, D, data_vaddr, data_filebytes);
+
+  return(r);
+}
+
+/*===========================================================================*
+ *				exec_newmem				     *
+ *===========================================================================*/
+PRIVATE int exec_newmem(
+  int proc_e,
+  vir_bytes text_addr,
+  vir_bytes text_bytes,
+  vir_bytes data_addr,
+  vir_bytes data_bytes,
+  vir_bytes tot_bytes,
+  vir_bytes frame_len,
+  int sep_id,
+  int is_elf,
+  dev_t st_dev,
+  ino_t st_ino,
+  time_t ctime,
+  char *progname,
+  int new_uid,
+  int new_gid,
+  vir_bytes *stack_topp,
+  int *load_textp,
+  int *allow_setuidp
+)
+{
+/* Allocate a new memory map for a process that tries to exec */
+  int r;
+  struct exec_newmem e;
+  message m;
+
+  e.text_addr = text_addr;
+  e.text_bytes = text_bytes;
+  e.data_addr = data_addr;
+  e.data_bytes = data_bytes;
+  e.tot_bytes  = tot_bytes;
+  e.args_bytes = frame_len;
+  e.sep_id     = sep_id;
+  e.is_elf     = is_elf;
+  e.st_dev     = st_dev;
+  e.st_ino     = st_ino;
+  e.enst_ctime = ctime;
+  e.new_uid    = new_uid;
+  e.new_gid    = new_gid;
+  strncpy(e.progname, progname, sizeof(e.progname)-1);
+  e.progname[sizeof(e.progname)-1] = '\0';
+
+  m.m_type = EXEC_NEWMEM;
+  m.EXC_NM_PROC = proc_e;
+  m.EXC_NM_PTR = (char *)&e;
+  if ((r = sendrec(PM_PROC_NR, &m)) != OK) return(r);
+
+  *stack_topp = m.m1_i1;
+  *load_textp = !!(m.m1_i2 & EXC_NM_RF_LOAD_TEXT);
+  *allow_setuidp = !!(m.m1_i2 & EXC_NM_RF_ALLOW_SETUID);
+
+  return(m.m_type);
+}
+
+/*===========================================================================*
+ *				is_script				     *
+ *===========================================================================*/
+PRIVATE int is_script(const char *exec_hdr, size_t exec_len)
+{
+/* Is Interpreted script? */
+  assert(exec_hdr != NULL);
+
+  return(exec_hdr[0] == '#' && exec_hdr[1] == '!' && exec_len >= 2);
+}
+
+/*===========================================================================*
+ *				patch_stack				     *
+ *===========================================================================*/
+PRIVATE int patch_stack(vp, stack, stk_bytes, path)
+struct vnode *vp;		/* pointer for open script file */
+char stack[ARG_MAX];		/* pointer to stack image within VFS */
+vir_bytes *stk_bytes;		/* size of initial stack */
+char path[PATH_MAX+1];		/* path to script file */
+{
+/* Patch the argument vector to include the path name of the script to be
+ * interpreted, and all strings on the #! line.  Returns the path name of
+ * the interpreter.
+ */
+  enum { INSERT=FALSE, REPLACE=TRUE };
+  int n, r;
+  off_t pos;
+  char *sp, *interp = NULL;
+  u64_t new_pos;
+  unsigned int cum_io;
+  char buf[_MAX_BLOCK_SIZE];
+
+  /* Make 'path' the new argv[0]. */
+  if (!insert_arg(stack, stk_bytes, path, REPLACE)) return(ENOMEM);
+
+  pos = 0;	/* Read from the start of the file */
+
+  /* Issue request */
+  r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, cvul64(pos), READING,
+		    VFS_PROC_NR, buf, _MAX_BLOCK_SIZE, &new_pos, &cum_io);
+  if (r != OK) return(r);
+
+  n = vp->v_size;
+  if (n > _MAX_BLOCK_SIZE)
+	n = _MAX_BLOCK_SIZE;
+  if (n < 2) return ENOEXEC;
+
+  sp = &(buf[2]);				/* just behind the #! */
+  n -= 2;
+  if (n > PATH_MAX) n = PATH_MAX;
+
+  /* Use the 'path' variable for temporary storage */
+  memcpy(path, sp, n);
+
+  if ((sp = memchr(path, '\n', n)) == NULL) /* must be a proper line */
+	return(ENOEXEC);
+
+  /* Move sp backwards through script[], prepending each string to stack. */
+  for (;;) {
+	/* skip spaces behind argument. */
+	while (sp > path && (*--sp == ' ' || *sp == '\t')) {}
+	if (sp == path) break;
+
+	sp[1] = 0;
+	/* Move to the start of the argument. */
+	while (sp > path && sp[-1] != ' ' && sp[-1] != '\t') --sp;
+
+	interp = sp;
+	if (!insert_arg(stack, stk_bytes, sp, INSERT)) {
+		printf("VFS: patch_stack: insert_arg failed\n");
+		return(ENOMEM);
+	}
+  }
+
+  /* Round *stk_bytes up to the size of a pointer for alignment contraints. */
+  *stk_bytes= ((*stk_bytes + PTRSIZE - 1) / PTRSIZE) * PTRSIZE;
+
+  if (interp != path)
+	memmove(path, interp, strlen(interp)+1);
+  return(OK);
+}
+
+/*===========================================================================*
+ *				insert_arg				     *
+ *===========================================================================*/
+PRIVATE int insert_arg(
+char stack[ARG_MAX],		/* pointer to stack image within PM */
+vir_bytes *stk_bytes,		/* size of initial stack */
+char *arg,			/* argument to prepend/replace as new argv[0] */
+int replace
+)
+{
+/* Patch the stack so that arg will become argv[0].  Be careful, the stack may
+ * be filled with garbage, although it normally looks like this:
+ *	nargs argv[0] ... argv[nargs-1] NULL envp[0] ... NULL
+ * followed by the strings "pointed" to by the argv[i] and the envp[i].  The
+ * pointers are really offsets from the start of stack.
+ * Return true iff the operation succeeded.
+ */
+  int offset, a0, a1, old_bytes = *stk_bytes;
+
+  /* Prepending arg adds at least one string and a zero byte. */
+  offset = strlen(arg) + 1;
+
+  a0 = (int) ((char **) stack)[1];	/* argv[0] */
+  if (a0 < 4 * PTRSIZE || a0 >= old_bytes) return(FALSE);
+
+  a1 = a0;			/* a1 will point to the strings to be moved */
+  if (replace) {
+	/* Move a1 to the end of argv[0][] (argv[1] if nargs > 1). */
+	do {
+		if (a1 == old_bytes) return(FALSE);
+		--offset;
+	} while (stack[a1++] != 0);
+  } else {
+	offset += PTRSIZE;	/* new argv[0] needs new pointer in argv[] */
+	a0 += PTRSIZE;		/* location of new argv[0][]. */
+  }
+
+  /* stack will grow by offset bytes (or shrink by -offset bytes) */
+  if ((*stk_bytes += offset) > ARG_MAX) return(FALSE);
+
+  /* Reposition the strings by offset bytes */
+  memmove(stack + a1 + offset, stack + a1, old_bytes - a1);
+
+  strcpy(stack + a0, arg);	/* Put arg in the new space. */
+
+  if (!replace) {
+	/* Make space for a new argv[0]. */
+	memmove(stack + 2 * PTRSIZE, stack + 1 * PTRSIZE, a0 - 2 * PTRSIZE);
+
+	((char **) stack)[0]++;	/* nargs++; */
+  }
+  /* Now patch up argv[] and envp[] by offset. */
+  patch_ptr(stack, (vir_bytes) offset);
+  ((char **) stack)[1] = (char *) a0;	/* set argv[0] correctly */
+  return(TRUE);
+}
+
+
+/*===========================================================================*
+ *				patch_ptr				     *
+ *===========================================================================*/
+PRIVATE void patch_ptr(
+char stack[ARG_MAX],		/* pointer to stack image within PM */
+vir_bytes base			/* virtual address of stack base inside user */
+)
+{
+/* When doing an exec(name, argv, envp) call, the user builds up a stack
+ * image with arg and env pointers relative to the start of the stack.  Now
+ * these pointers must be relocated, since the stack is not positioned at
+ * address 0 in the user's address space.
+ */
+
+  char **ap, flag;
+  vir_bytes v;
+
+  flag = 0;			/* counts number of 0-pointers seen */
+  ap = (char **) stack;		/* points initially to 'nargs' */
+  ap++;				/* now points to argv[0] */
+  while (flag < 2) {
+	if (ap >= (char **) &stack[ARG_MAX]) return;	/* too bad */
+	if (*ap != NULL) {
+		v = (vir_bytes) *ap;	/* v is relative pointer */
+		v += base;		/* relocate it */
+		*ap = (char *) v;	/* put it back */
+	} else {
+		flag++;
+	}
+	ap++;
+  }
+}
+
+/*===========================================================================*
+ *				read_seg				     *
+ *===========================================================================*/
+PRIVATE int read_seg(
+struct vnode *vp, 		/* inode descriptor to read from */
+off_t off,			/* offset in file */
+int proc_e,			/* process number (endpoint) */
+int seg,			/* T, D, or S */
+vir_bytes seg_addr,		/* address to load segment */
+phys_bytes seg_bytes		/* how much is to be transferred? */
+)
+{
+/*
+ * The byte count on read is usually smaller than the segment count, because
+ * a segment is padded out to a click multiple, and the data segment is only
+ * partially initialized.
+ */
+  int r;
+  unsigned n, o;
+  u64_t new_pos;
+  unsigned int cum_io;
+  static char buf[128 * 1024];
+
+  assert((seg == T)||(seg == D));
+
+  /* Make sure that the file is big enough */
+  if (vp->v_size < off+seg_bytes) return(EIO);
+
+  if (seg == T) {
+	/* We have to use a copy loop until safecopies support segments */
+	o = 0;
+	while (o < seg_bytes) {
+		n = seg_bytes - o;
+		if (n > sizeof(buf))
+			n = sizeof(buf);
+
+		if ((r = req_readwrite(vp->v_fs_e,vp->v_inode_nr,cvul64(off+o),
+				       READING, VFS_PROC_NR, buf,
+				       n, &new_pos, &cum_io)) != OK) {
+			printf("VFS: read_seg: req_readwrite failed (text)\n");
+			return(r);
+		}
+
+		if (cum_io != n) {
+			printf(
+		"VFSread_seg segment has not been read properly by exec() \n");
+			return(EIO);
+		}
+
+		if ((r = sys_vircopy(VFS_PROC_NR, D, (vir_bytes)buf, proc_e,
+				     seg, seg_addr + o, n)) != OK) {
+			printf("VFS: read_seg: copy failed (text)\n");
+			return(r);
+		}
+
+		o += n;
+	}
+	return(OK);
+  } else if (seg == D) {
+
+	if ((r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, cvul64(off), READING,
+			 proc_e, (char*)seg_addr, seg_bytes,
+			 &new_pos, &cum_io)) != OK) {
+	    printf("VFS: read_seg: req_readwrite failed (data)\n");
+	    return(r);
+	}
+
+	if (r == OK && cum_io != seg_bytes)
+	    printf("VFS: read_seg segment has not been read properly by exec()\n");
+
+	return(r);
+  }
+
+  return(OK);
+}
+
+
+/*===========================================================================*
+ *				clo_exec				     *
+ *===========================================================================*/
+PRIVATE void clo_exec(struct fproc *rfp)
+{
+/* Files can be marked with the FD_CLOEXEC bit (in fp->fp_cloexec).
+ */
+  int i;
+
+  /* Check the file desriptors one by one for presence of FD_CLOEXEC. */
+  for (i = 0; i < OPEN_MAX; i++)
+	if ( FD_ISSET(i, &rfp->fp_cloexec_set))
+		(void) close_fd(rfp, i);
+}
+
+/*===========================================================================*
+ *				map_header				     *
+ *===========================================================================*/
+PRIVATE int map_header(char **exec_hdr, const struct vnode *vp)
+{
+  int r;
+  u64_t new_pos;
+  unsigned int cum_io;
+  off_t pos;
+
+  pos = 0;	/* Read from the start of the file */
+
+  r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, cvul64(pos), READING,
+		    VFS_PROC_NR, hdr, MIN(vp->v_size, PAGE_SIZE),
+		    &new_pos, &cum_io);
+  if (r != OK) {
+	printf("VFS: exec: map_header: req_readwrite failed\n");
+	return(r);
+  }
+
+  *exec_hdr = hdr;
+  return(OK);
+}
diff --git a/servers/avfs/exec.h b/servers/avfs/exec.h
new file mode 100644
index 000000000..32114d6ab
--- /dev/null
+++ b/servers/avfs/exec.h
@@ -0,0 +1,19 @@
+#ifndef _VFS_EXEC_H_
+#define _VFS_EXEC_H_ 1
+
+struct exec_info {
+    int  proc_e;			/* Process endpoint */
+    char *hdr;				/* Exec file's header */
+    vir_bytes pc;			/* Entry point of exec file */
+    vir_bytes stack_top;		/* Top of the stack */
+    vir_bytes frame_len;		/* Stack size */
+    uid_t new_uid;			/* Process UID after exec */
+    gid_t new_gid;			/* Process GID after exec */
+    int load_text;			/* Load text section? */
+    int allow_setuid;			/* Allow setuid execution? */
+    struct vnode *vp;			/* Exec file's vnode */
+    struct stat sb;			/* Exec file's stat structure */
+    char progname[PROC_NAME_LEN];	/* Program name */
+};
+
+#endif /* !_VFS_EXEC_H_ */
diff --git a/servers/avfs/file.h b/servers/avfs/file.h
new file mode 100644
index 000000000..52a5773c6
--- /dev/null
+++ b/servers/avfs/file.h
@@ -0,0 +1,48 @@
+#ifndef __VFS_FILE_H__
+#define __VFS_FILE_H__
+
+/* This is the filp table.  It is an intermediary between file descriptors and
+ * inodes.  A slot is free if filp_count == 0.
+ */
+
+EXTERN struct filp {
+  mode_t filp_mode;		/* RW bits, telling how file is opened */
+  int filp_flags;		/* flags from open and fcntl */
+  int filp_state;		/* state for crash recovery */
+  int filp_count;		/* how many file descriptors share this slot?*/
+  struct vnode *filp_vno;	/* vnode belonging to this file */
+  u64_t filp_pos;		/* file position */
+  mutex_t filp_lock;		/* lock to gain exclusive access */
+  struct fproc *filp_softlock;	/* if not NULL; this filp didn't lock the
+				 * vnode. Another filp already holds a lock
+				 * for this thread */
+
+  /* the following fields are for select() and are owned by the generic
+   * select() code (i.e., fd-type-specific select() code can't touch these).
+   */
+  int filp_selectors;		/* select()ing processes blocking on this fd */
+  int filp_select_ops;		/* interested in these SEL_* operations */
+  int filp_select_flags;	/* Select flags for the filp */
+
+  /* following are for fd-type-specific select() */
+  int filp_pipe_select_ops;
+} filp[NR_FILPS];
+
+#define FILP_CLOSED	0	/* filp_mode: associated device closed */
+
+#define FS_NORMAL	0	/* file descriptor can be used normally */
+#define FS_NEEDS_REOPEN	1	/* file descriptor needs to be re-opened */
+
+#define FSF_UPDATE	001	/* The driver should be informed about new
+				 * state.
+				 */
+#define FSF_BUSY	002	/* Select operation sent to driver but no
+				 * reply yet.
+				 */
+#define FSF_RD_BLOCK	010	/* Read request is blocking, the driver should
+				 * keep state.
+				 */
+#define FSF_WR_BLOCK	020	/* Write request is blocking */
+#define FSF_ERR_BLOCK	040	/* Exception request is blocking */
+#define FSF_BLOCKED	070
+#endif
diff --git a/servers/avfs/filedes.c b/servers/avfs/filedes.c
new file mode 100644
index 000000000..dd788eaf9
--- /dev/null
+++ b/servers/avfs/filedes.c
@@ -0,0 +1,556 @@
+/* This file contains the procedures that manipulate file descriptors.
+ *
+ * The entry points into this file are
+ *   get_fd:	    look for free file descriptor and free filp slots
+ *   get_filp:	    look up the filp entry for a given file descriptor
+ *   find_filp:	    find a filp slot that points to a given vnode
+ *   inval_filp:    invalidate a filp and associated fd's, only let close()
+ *                  happen on it
+ *   do_verify_fd:  verify whether the given file descriptor is valid for
+ *                  the given endpoint.
+ *   do_set_filp:   marks a filp as in-flight.
+ *   do_copy_filp:  copies a filp to another endpoint.
+ *   do_put_filp:   marks a filp as not in-flight anymore.
+ *   do_cancel_fd:  cancel the transaction when something goes wrong for
+ *                  the receiver.
+ */
+
+#include <sys/select.h>
+#include <minix/callnr.h>
+#include <minix/u64.h>
+#include <assert.h>
+#include "fs.h"
+#include "file.h"
+#include "fproc.h"
+#include "vnode.h"
+
+
+FORWARD _PROTOTYPE( filp_id_t verify_fd, (endpoint_t ep, int fd)	);
+
+#if LOCK_DEBUG
+/*===========================================================================*
+ *				check_filp_locks			     *
+ *===========================================================================*/
+PUBLIC void check_filp_locks_by_me(void)
+{
+/* Check whether this thread still has filp locks held */
+  struct filp *f;
+  int r;
+
+  for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
+	r = mutex_trylock(&f->filp_lock);
+	if (r == -EDEADLK)
+		panic("Thread %d still holds filp lock on filp %p call_nr=%d\n",
+		      mthread_self(), f, call_nr);
+	else if (r == 0) {
+		/* We just obtained the lock, release it */
+		mutex_unlock(&f->filp_lock);
+	}
+  }
+}
+#endif
+
+/*===========================================================================*
+ *				check_filp_locks			     *
+ *===========================================================================*/
+PUBLIC void check_filp_locks(void)
+{
+  struct filp *f;
+  int r, count = 0;
+
+  for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
+	r = mutex_trylock(&f->filp_lock);
+	if (r == -EBUSY) {
+		/* Mutex is still locked */
+		count++;
+	} else if (r == 0) {
+		/* We just obtained a lock, don't want it */
+		mutex_unlock(&f->filp_lock);
+	} else
+		panic("filp_lock weird state");
+  }
+  if (count) panic("locked filps");
+#if 0
+  else printf("check_filp_locks OK\n");
+#endif
+}
+
+/*===========================================================================*
+ *				init_filps					     *
+ *===========================================================================*/
+PUBLIC void init_filps(void)
+{
+/* Initialize filps */
+  struct filp *f;
+
+  for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
+	mutex_init(&f->filp_lock, NULL);
+  }
+
+}
+
+/*===========================================================================*
+ *				get_fd					     *
+ *===========================================================================*/
+PUBLIC int get_fd(int start, mode_t bits, int *k, struct filp **fpt)
+{
+/* Look for a free file descriptor and a free filp slot.  Fill in the mode word
+ * in the latter, but don't claim either one yet, since the open() or creat()
+ * may yet fail.
+ */
+
+  register struct filp *f;
+  register int i;
+
+  /* Search the fproc fp_filp table for a free file descriptor. */
+  for (i = start; i < OPEN_MAX; i++) {
+	if (fp->fp_filp[i] == NULL && !FD_ISSET(i, &fp->fp_filp_inuse)) {
+		/* A file descriptor has been located. */
+		*k = i;
+		break;
+	}
+  }
+
+  /* Check to see if a file descriptor has been found. */
+  if (i >= OPEN_MAX) return(EMFILE);
+
+  /* If we don't care about a filp, return now */
+  if (fpt == NULL) return(OK);
+
+  /* Now that a file descriptor has been found, look for a free filp slot. */
+  for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
+	assert(f->filp_count >= 0);
+	if (f->filp_count == 0 && mutex_trylock(&f->filp_lock) == 0) {
+		if (verbose) printf("get_fd: locking filp=%p\n", f);
+		f->filp_mode = bits;
+		f->filp_pos = cvu64(0);
+		f->filp_selectors = 0;
+		f->filp_select_ops = 0;
+		f->filp_pipe_select_ops = 0;
+		f->filp_flags = 0;
+		f->filp_state = FS_NORMAL;
+		f->filp_select_flags = 0;
+		f->filp_softlock = NULL;
+		*fpt = f;
+		return(OK);
+	}
+  }
+
+  /* If control passes here, the filp table must be full.  Report that back. */
+  return(ENFILE);
+}
+
+
+/*===========================================================================*
+ *				get_filp				     *
+ *===========================================================================*/
+PUBLIC struct filp *get_filp(fild, locktype)
+int fild;			/* file descriptor */
+tll_access_t locktype;
+{
+/* See if 'fild' refers to a valid file descr.  If so, return its filp ptr. */
+
+  return get_filp2(fp, fild, locktype);
+}
+
+
+/*===========================================================================*
+ *				get_filp2				     *
+ *===========================================================================*/
+PUBLIC struct filp *get_filp2(rfp, fild, locktype)
+register struct fproc *rfp;
+int fild;			/* file descriptor */
+tll_access_t locktype;
+{
+/* See if 'fild' refers to a valid file descr.  If so, return its filp ptr. */
+  struct filp *filp;
+
+  err_code = EBADF;
+  if (fild < 0 || fild >= OPEN_MAX ) return(NULL);
+  if (rfp->fp_filp[fild] == NULL && FD_ISSET(fild, &rfp->fp_filp_inuse))
+	err_code = EIO;	/* The filedes is not there, but is not closed either.
+			 */
+  if ((filp = rfp->fp_filp[fild]) != NULL) lock_filp(filp, locktype);
+
+  return(filp);	/* may also be NULL */
+}
+
+
+/*===========================================================================*
+ *				find_filp				     *
+ *===========================================================================*/
+PUBLIC struct filp *find_filp(struct vnode *vp, mode_t bits)
+{
+/* Find a filp slot that refers to the vnode 'vp' in a way as described
+ * by the mode bit 'bits'. Used for determining whether somebody is still
+ * interested in either end of a pipe.  Also used when opening a FIFO to
+ * find partners to share a filp field with (to shared the file position).
+ * Like 'get_fd' it performs its job by linear search through the filp table.
+ */
+
+  struct filp *f;
+
+  for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
+	if (f->filp_count != 0 && f->filp_vno == vp && (f->filp_mode & bits)) {
+		return(f);
+	}
+  }
+
+  /* If control passes here, the filp wasn't there.  Report that back. */
+  return(NULL);
+}
+
+/*===========================================================================*
+ *				invalidate				     *
+ *===========================================================================*/
+PUBLIC int invalidate(struct filp *fp)
+{
+/* Invalidate filp. fp_filp_inuse is not cleared, so filp can't be reused
+   until it is closed first. */
+
+  int f, fd, n = 0;
+  for(f = 0; f < NR_PROCS; f++) {
+	if(fproc[f].fp_pid == PID_FREE) continue;
+	for(fd = 0; fd < OPEN_MAX; fd++) {
+		if(fproc[f].fp_filp[fd] && fproc[f].fp_filp[fd] == fp) {
+			fproc[f].fp_filp[fd] = NULL;
+			n++;
+		}
+	}
+  }
+
+  return(n);	/* Report back how often this filp has been invalidated. */
+}
+
+/*===========================================================================*
+ *				lock_filp				     *
+ *===========================================================================*/
+PUBLIC void lock_filp(filp, locktype)
+struct filp *filp;
+tll_access_t locktype;
+{
+  message org_m_in;
+  struct fproc *org_fp;
+  struct worker_thread *org_self;
+  struct vnode *vp;
+
+  assert(filp->filp_count > 0);
+  vp = filp->filp_vno;
+  assert(vp != NULL);
+
+  if (verbose)
+	printf("lock_filp: filp=%p locking vnode %p with locktype %d\n", filp,
+		vp, locktype);
+
+  /* Lock vnode only if we haven't already locked it. If already locked by us,
+   * we're allowed to have one additional 'soft' lock. */
+  if (tll_locked_by_me(&vp->v_lock)) {
+	assert(filp->filp_softlock == NULL);
+	filp->filp_softlock = fp;
+  } else {
+	lock_vnode(vp, locktype);
+  }
+
+  assert(vp->v_ref_count > 0);	/* vnode still in use? */
+  assert(filp->filp_vno == vp);	/* vnode still what we think it is? */
+  assert(filp->filp_count > 0); /* filp still in use? */
+
+  /* First try to get filp lock right off the bat */
+  if (mutex_trylock(&filp->filp_lock) != 0) {
+
+	/* Already in use, let's wait for our turn */
+	org_m_in = m_in;
+	org_fp = fp;
+	org_self = self;
+	assert(mutex_lock(&filp->filp_lock) == 0);
+	m_in = org_m_in;
+	fp = org_fp;
+	self = org_self;
+  }
+
+  assert(filp->filp_count > 0);	/* Yet again; filp still in use? */
+}
+
+/*===========================================================================*
+ *				unlock_filp				     *
+ *===========================================================================*/
+PUBLIC void unlock_filp(filp)
+struct filp *filp;
+{
+  /* If this filp holds a soft lock on the vnode, we must be the owner */
+  if (filp->filp_softlock != NULL)
+	assert(filp->filp_softlock == fp);
+
+  if (filp->filp_count > 0) {
+	/* Only unlock vnode if filp is still in use */
+
+	/* and if we don't hold a soft lock */
+	if (filp->filp_softlock == NULL) {
+		assert(tll_islocked(&(filp->filp_vno->v_lock)));
+		unlock_vnode(filp->filp_vno);
+	}
+  }
+
+  filp->filp_softlock = NULL;
+  assert(mutex_unlock(&filp->filp_lock) == 0);
+}
+
+/*===========================================================================*
+ *				unlock_filps				     *
+ *===========================================================================*/
+PUBLIC void unlock_filps(filp1, filp2)
+struct filp *filp1;
+struct filp *filp2;
+{
+/* Unlock two filps that are tied to the same vnode. As a thread can lock a
+ * vnode only once, unlocking the vnode twice would result in an error. */
+
+  /* No NULL pointers and not equal */
+  assert(filp1);
+  assert(filp2);
+  assert(filp1 != filp2);
+
+  /* Must be tied to the same vnode and not NULL */
+  assert(filp1->filp_vno == filp2->filp_vno);
+  assert(filp1->filp_vno != NULL);
+
+  if (filp1->filp_count > 0 && filp2->filp_count > 0) {
+	/* Only unlock vnode if filps are still in use */
+	unlock_vnode(filp1->filp_vno);
+  }
+
+  filp1->filp_softlock = NULL;
+  filp2->filp_softlock = NULL;
+  assert(mutex_unlock(&filp2->filp_lock) == 0);
+  assert(mutex_unlock(&filp1->filp_lock) == 0);
+}
+
+/*===========================================================================*
+ *				verify_fd				     *
+ *===========================================================================*/
+PRIVATE filp_id_t verify_fd(ep, fd)
+endpoint_t ep;
+int fd;
+{
+/* Verify whether the file descriptor 'fd' is valid for the endpoint 'ep'. When
+ * the file descriptor is valid, verify_fd returns a pointer to that filp, else
+ * it returns NULL.
+ */
+  int slot;
+  struct filp *rfilp;
+
+  if (isokendpt(ep, &slot) != OK)
+	return(NULL);
+
+  rfilp = get_filp2(&fproc[slot], fd, VNODE_READ);
+
+  return(rfilp);
+}
+
+/*===========================================================================*
+ *                              do_verify_fd                                 *
+ *===========================================================================*/
+PUBLIC int do_verify_fd(void)
+{
+  struct filp *rfilp;
+  rfilp = (struct filp *) verify_fd(m_in.USER_ENDPT, m_in.COUNT);
+  m_out.ADDRESS = (void *) rfilp;
+  if (rfilp != NULL) unlock_filp(rfilp);
+  return (rfilp != NULL) ? OK : EINVAL;
+}
+
+/*===========================================================================*
+ *                              set_filp                                     *
+ *===========================================================================*/
+PUBLIC int set_filp(sfilp)
+filp_id_t sfilp;
+{
+  if (sfilp == NULL) return(EINVAL);
+
+  lock_filp(sfilp, VNODE_READ);
+  sfilp->filp_count++;
+  unlock_filp(sfilp);
+
+  return(OK);
+}
+
+/*===========================================================================*
+ *                              do_set_filp                                  *
+ *===========================================================================*/
+PUBLIC int do_set_filp(void)
+{
+  return set_filp((filp_id_t) m_in.ADDRESS);
+}
+
+/*===========================================================================*
+ *                              copy_filp                                    *
+ *===========================================================================*/
+PUBLIC int copy_filp(to_ep, cfilp)
+endpoint_t to_ep;
+filp_id_t cfilp;
+{
+  int fd;
+  int slot;
+  struct fproc *rfp;
+
+  if (isokendpt(to_ep, &slot) != OK) return(EINVAL);
+  rfp = &fproc[slot];
+
+  /* Find an open slot in fp_filp */
+  for (fd = 0; fd < OPEN_MAX; fd++) {
+	if (rfp->fp_filp[fd] == NULL &&
+	    !FD_ISSET(fd, &rfp->fp_filp_inuse)) {
+
+		/* Found a free slot, add descriptor */
+		FD_SET(fd, &rfp->fp_filp_inuse);
+		rfp->fp_filp[fd] = cfilp;
+		rfp->fp_filp[fd]->filp_count++;
+		return(fd);
+	}
+  }
+
+  /* File descriptor table is full */
+  return(EMFILE);
+}
+
+/*===========================================================================*
+ *                              do_copy_filp                                 *
+ *===========================================================================*/
+PUBLIC int do_copy_filp(void)
+{
+  return copy_filp(m_in.USER_ENDPT, (filp_id_t) m_in.ADDRESS);
+}
+
+/*===========================================================================*
+ *                              put_filp                                     *
+ *===========================================================================*/
+PUBLIC int put_filp(pfilp)
+filp_id_t pfilp;
+{
+  if (pfilp == NULL) {
+	return EINVAL;
+  } else {
+	lock_filp(pfilp, VNODE_OPCL);
+	close_filp(pfilp);
+	return(OK);
+  }
+}
+
+/*===========================================================================*
+ *                              do_put_filp                                  *
+ *===========================================================================*/
+PUBLIC int do_put_filp(void)
+{
+  return put_filp((filp_id_t) m_in.ADDRESS);
+}
+
+/*===========================================================================*
+ *                             cancel_fd				     *
+ *===========================================================================*/
+PUBLIC int cancel_fd(ep, fd)
+endpoint_t ep;
+int fd;
+{
+  int slot;
+  struct fproc *rfp;
+  struct filp *rfilp;
+
+  if (isokendpt(ep, &slot) != OK) return(EINVAL);
+  rfp = &fproc[slot];
+
+  /* Check that the input 'fd' is valid */
+  rfilp = (struct filp *) verify_fd(ep, fd);
+  if (rfilp != NULL) {
+	/* Found a valid descriptor, remove it */
+	FD_CLR(fd, &rfp->fp_filp_inuse);
+	if (rfp->fp_filp[fd]->filp_count == 0) {
+		unlock_filp(rfilp);
+		printf("VFS: filp_count for slot %d fd %d already zero", slot,
+		      fd);
+		return(EINVAL);
+	}
+	rfp->fp_filp[fd]->filp_count--;
+	rfp->fp_filp[fd] = NULL;
+	unlock_filp(rfilp);
+	return(fd);
+  }
+
+  /* File descriptor is not valid for the endpoint. */
+  return(EINVAL);
+}
+
+/*===========================================================================*
+ *                              do_cancel_fd                                 *
+ *===========================================================================*/
+PUBLIC int do_cancel_fd(void)
+{
+  return cancel_fd(m_in.USER_ENDPT, m_in.COUNT);
+}
+
+/*===========================================================================*
+ *				close_filp				     *
+ *===========================================================================*/
+PUBLIC void close_filp(f)
+struct filp *f;
+{
+/* Close a file. Will also unlock filp when done */
+
+  int mode_word, rw;
+  dev_t dev;
+  struct vnode *vp;
+
+  /* Must be locked */
+  assert(mutex_trylock(&f->filp_lock) == -EDEADLK);
+  assert(tll_islocked(&f->filp_vno->v_lock));
+
+  vp = f->filp_vno;
+
+  if (f->filp_count - 1 == 0 && f->filp_mode != FILP_CLOSED) {
+	/* Check to see if the file is special. */
+	mode_word = vp->v_mode & I_TYPE;
+	if (mode_word == I_CHAR_SPECIAL || mode_word == I_BLOCK_SPECIAL) {
+		dev = (dev_t) vp->v_sdev;
+		if (mode_word == I_BLOCK_SPECIAL)  {
+			lock_bsf();
+			if (vp->v_bfs_e == ROOT_FS_E) {
+				/* Invalidate the cache unless the special is
+				 * mounted. Assume that the root filesystem's
+				 * is open only for fsck.
+				 */
+				req_flush(vp->v_bfs_e, dev);
+			}
+			unlock_bsf();
+		}
+		/* Do any special processing on device close. */
+		(void) dev_close(dev, f-filp); /* Ignore any errors, even
+						 * SUSPEND. */
+		f->filp_mode = FILP_CLOSED;
+	}
+  }
+
+  /* If the inode being closed is a pipe, release everyone hanging on it. */
+  if (vp->v_pipe == I_PIPE) {
+	rw = (f->filp_mode & R_BIT ? WRITE : READ);
+	release(vp, rw, NR_PROCS);
+  }
+
+  /* If a write has been done, the inode is already marked as DIRTY. */
+  if (--f->filp_count == 0) {
+	if (vp->v_pipe == I_PIPE) {
+		/* Last reader or writer is going. Tell PFS about latest
+		 * pipe size.
+		 */
+		truncate_vnode(vp, vp->v_size);
+	}
+
+	unlock_vnode(f->filp_vno);
+	put_vnode(f->filp_vno);
+  } else if (f->filp_count < 0) {
+	panic("VFS: invalid filp count: %d ino %d/%d", f->filp_count,
+	      vp->v_dev, vp->v_inode_nr);
+  } else {
+	unlock_vnode(f->filp_vno);
+  }
+
+  mutex_unlock(&f->filp_lock);
+}
diff --git a/servers/avfs/fproc.h b/servers/avfs/fproc.h
new file mode 100644
index 000000000..465c8ed64
--- /dev/null
+++ b/servers/avfs/fproc.h
@@ -0,0 +1,72 @@
+#ifndef __VFS_FPROC_H__
+#define __VFS_FPROC_H__
+
+#include "threads.h"
+
+#include <sys/select.h>
+#include <minix/safecopies.h>
+
+/* This is the per-process information.  A slot is reserved for each potential
+ * process. Thus NR_PROCS must be the same as in the kernel. It is not
+ * possible or even necessary to tell when a slot is free here.
+ */
+#define LOCK_DEBUG 0
+EXTERN struct fproc {
+  unsigned fp_flags;
+
+  pid_t fp_pid;			/* process id */
+  endpoint_t fp_endpoint;	/* kernel endpoint number of this process */
+
+  struct vnode *fp_wd;		/* working directory; NULL during reboot */
+  struct vnode *fp_rd;		/* root directory; NULL during reboot */
+
+  struct filp *fp_filp[OPEN_MAX];/* the file descriptor table */
+  fd_set fp_filp_inuse;		/* which fd's are in use? */
+  fd_set fp_cloexec_set;	/* bit map for POSIX Table 6-2 FD_CLOEXEC */
+
+  dev_t fp_tty;			/* major/minor of controlling tty */
+  int fp_block_fd;		/* place to save fd if rd/wr can't finish */
+  int fp_block_callnr;		/* blocked call if rd/wr can't finish */
+  char *fp_buffer;		/* place to save buffer if rd/wr can't finish*/
+  int  fp_nbytes;		/* place to save bytes if rd/wr can't finish */
+  int  fp_cum_io_partial;	/* partial byte count if rd/wr can't finish */
+  endpoint_t fp_task;		/* which task is proc suspended on */
+  int fp_blocked_on;		/* what is it blocked on */
+  endpoint_t fp_ioproc;		/* proc no. in suspended-on i/o message */
+
+  cp_grant_id_t fp_grant;	/* revoke this grant on unsuspend if > -1 */
+
+  uid_t fp_realuid;		/* real user id */
+  uid_t fp_effuid;		/* effective user id */
+  gid_t fp_realgid;		/* real group id */
+  gid_t fp_effgid;		/* effective group id */
+  int fp_ngroups;		/* number of supplemental groups */
+  gid_t fp_sgroups[NGROUPS_MAX];/* supplemental groups */
+  mode_t fp_umask;		/* mask set by umask system call */
+  message *fp_sendrec;		/* request/reply to/from FS/driver */
+  mutex_t fp_lock;		/* mutex to lock fproc object */
+  struct job fp_job;		/* pending job */
+  thread_t fp_wtid;		/* Thread ID of worker */
+#if LOCK_DEBUG
+  int fp_vp_rdlocks;		/* number of read-only locks on vnodes */
+  int fp_vmnt_rdlocks;		/* number of read-only locks on vmnts */
+#endif
+} fproc[NR_PROCS];
+
+/* fp_flags */
+#define FP_NOFLAGS	00
+#define FP_SUSP_REOPEN	01	/* Process is suspended until the reopens are
+				 * completed (after the restart of a driver).
+				 */
+#define FP_REVIVED	02	/* Indicates process is being revived */
+#define FP_SESLDR	04	/* Set if process is session leader */
+#define FP_PENDING	010	/* Set if process has pending work */
+#define FP_EXITING	020	/* Set if process is exiting */
+#define FP_PM_PENDING	040	/* Set if process has pending PM request */
+
+/* Field values. */
+#define NOT_REVIVING       0xC0FFEEE	/* process is not being revived */
+#define REVIVING           0xDEEAD	/* process is being revived from suspension */
+#define PID_FREE	   0	/* process slot free */
+
+#endif /* __VFS_FPROC_H__ */
diff --git a/servers/avfs/fs.h b/servers/avfs/fs.h
new file mode 100644
index 000000000..9531c2207
--- /dev/null
+++ b/servers/avfs/fs.h
@@ -0,0 +1,51 @@
+#ifndef __VFS_FS_H__
+#define __VFS_FS_H__
+
+/* This is the master header for fs.  It includes some other files
+ * and defines the principal constants.
+ */
+#define _POSIX_SOURCE      1	/* tell headers to include POSIX stuff */
+#define _MINIX             1	/* tell headers to include MINIX stuff */
+#define _SYSTEM            1	/* tell headers that this is the kernel */
+
+#define DO_SANITYCHECKS	   0
+
+#if DO_SANITYCHECKS
+#define SANITYCHECK do { 			\
+	if(!check_vrefs() || !check_pipe()) {				\
+	   printf("VFS:%s:%d: call_nr %d who_e %d\n", \
+			__FILE__, __LINE__, call_nr, who_e); 	\
+	   panic("sanity check failed");	\
+	}							\
+} while(0)
+#else
+#define SANITYCHECK
+#endif
+
+/* The following are so basic, all the *.c files get them automatically. */
+#include <minix/config.h>	/* MUST be first */
+#include <minix/ansi.h>		/* MUST be second */
+#include <sys/types.h>
+#include <minix/const.h>
+#include <minix/type.h>
+#include <minix/dmap.h>
+#include <minix/ds.h>
+#include <minix/rs.h>
+
+#include <limits.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include <minix/syslib.h>
+#include <minix/sysutil.h>
+#include <minix/timers.h>
+
+#include "const.h"
+#include "dmap.h"
+#include "proto.h"
+#include "threads.h"
+#include "glo.h"
+#include "comm.h"
+#include "vmnt.h"
+
+#endif
diff --git a/servers/avfs/fscall.c b/servers/avfs/fscall.c
new file mode 100644
index 000000000..2ef4f62d7
--- /dev/null
+++ b/servers/avfs/fscall.c
@@ -0,0 +1,136 @@
+/* This file handles nested counter-request calls to VFS sent by file system
+ * (FS) servers in response to VFS requests.
+ *
+ * The entry points into this file are
+ *   nested_fs_call	perform a nested call from a file system server
+ *   nested_dev_call	perform a nested call from a device driver server
+ *
+ */
+
+#include "fs.h"
+#include "fproc.h"
+#include <string.h>
+#include <assert.h>
+#include <minix/callnr.h>
+#include <minix/endpoint.h>
+#include <minix/vfsif.h>
+
+/* maximum nested call stack depth */
+#define MAX_DEPTH 1
+
+/* global variables stack */
+PRIVATE struct {
+  struct fproc *g_fp;			/* pointer to caller process */
+  message g_m_in;			/* request message */
+  message g_m_out;			/* reply message */
+  int g_who_e;				/* endpoint of caller process */
+  int g_who_p;				/* slot number of caller process */
+  int g_call_nr;			/* call number */
+  int g_super_user;			/* is the caller root? */
+  char g_user_fullpath[PATH_MAX+1];	/* path to look up */
+} globals[MAX_DEPTH];
+
+PRIVATE int depth = 0;			/* current globals stack level */
+
+#if ENABLE_SYSCALL_STATS
+EXTERN unsigned long calls_stats[NCALLS];
+#endif
+
+FORWARD _PROTOTYPE( int push_globals, (void)				);
+FORWARD _PROTOTYPE( void pop_globals, (void)				);
+FORWARD _PROTOTYPE( void set_globals, (message *m)			);
+
+/*===========================================================================*
+ *				push_globals				     *
+ *===========================================================================*/
+PRIVATE int push_globals()
+{
+/* Save the global variables of the current call onto the globals stack.
+ */
+
+  if (depth == MAX_DEPTH)
+	return(EPERM);
+
+  globals[depth].g_fp = fp;
+  globals[depth].g_m_in = m_in;
+  globals[depth].g_m_out = m_out;
+  globals[depth].g_super_user = super_user;
+
+  /* err_code is not used across blocking calls */
+  depth++;
+  return(OK);
+}
+
+/*===========================================================================*
+ *				pop_globals				     *
+ *===========================================================================*/
+PRIVATE void pop_globals()
+{
+/* Restore the global variables of a call from the globals stack.
+ */
+
+  if (depth == 0)
+	panic("Popping from empty globals stack!");
+
+  depth--;
+
+  fp = globals[depth].g_fp;
+  m_in = globals[depth].g_m_in;
+  m_out = globals[depth].g_m_out;
+
+}
+
+/*===========================================================================*
+ *				set_globals				     *
+ *===========================================================================*/
+PRIVATE void set_globals(m)
+message *m;				/* request message */
+{
+/* Initialize global variables based on a request message.
+ */
+  int proc_p;
+
+  m_in = *m;
+
+  proc_p = _ENDPOINT_P(m_in.m_source);
+  fp = &fproc[proc_p];
+
+  /* the rest need not be initialized */
+}
+
+/*===========================================================================*
+ *				nested_fs_call				     *
+ *===========================================================================*/
+PUBLIC void nested_fs_call(m)
+message *m;				/* request/reply message pointer */
+{
+/* Handle a nested call from a file system server.
+ */
+  int r;
+
+  /* Save global variables of the current call */
+  if ((r = push_globals()) != OK) {
+	printf("VFS: error saving global variables in call %d from FS %d\n",
+		m->m_type, m->m_source);
+  } else {
+	/* Initialize global variables for the nested call */
+	set_globals(m);
+
+	/* Perform the nested call - only getsysinfo() is allowed right now */
+	if (call_nr == COMMON_GETSYSINFO) {
+		r = do_getsysinfo();
+	} else {
+		printf("VFS: invalid nested call %d from FS %d\n", call_nr,
+			who_e);
+
+		r = ENOSYS;
+	}
+
+	/* Store the result, and restore original global variables */
+	*m = m_out;
+
+	pop_globals();
+  }
+
+  m->m_type = r;
+}
diff --git a/servers/avfs/gcov.c b/servers/avfs/gcov.c
new file mode 100644
index 000000000..50bed6451
--- /dev/null
+++ b/servers/avfs/gcov.c
@@ -0,0 +1,66 @@
+
+#include "fs.h"
+#include "file.h"
+#include "fproc.h"
+
+_PROTOTYPE( int gcov_flush, (cp_grant_id_t grantid, size_t size ));
+
+/*===========================================================================*
+ *				do_gcov_flush				*
+ *===========================================================================*/
+PUBLIC int do_gcov_flush()
+{
+/* A userland tool has requested the gcov data from another
+ * process (possibly vfs itself). Grant the target process
+ * access to the supplied buffer, and perform the call that
+ * makes the target copy its buffer to the caller (incl vfs
+ * itself).
+ */
+  struct fproc *rfp;
+  ssize_t size;
+  cp_grant_id_t grantid;
+  int r, n;
+  pid_t target;
+  message m;
+
+  size = m_in.GCOV_BUFF_SZ;
+  target = m_in.GCOV_PID;
+
+  /* If the wrong process is sent to, the system hangs; so make this root-only.
+   */
+
+  if (!super_user) return(EPERM);
+
+  /* Find target gcov process. */
+  for(n = 0; n < NR_PROCS; n++) {
+	if(fproc[n].fp_endpoint != NONE && fproc[n].fp_pid == target)
+		 break;
+  }
+  if(n >= NR_PROCS) {
+	printf("VFS: gcov process %d not found\n", target);
+	return(ESRCH);
+  }
+  rfp = &fproc[n];
+
+  /* Grant target process to requestor's buffer. */
+  if ((grantid = cpf_grant_magic(rfp->fp_endpoint, who_e,
+				 (vir_bytes) m_in.GCOV_BUFF_P, size,
+				 CPF_WRITE)) < 0) {
+	printf("VFS: gcov_flush: grant failed\n");
+	return(ENOMEM);
+  }
+
+  if(rfp->fp_endpoint == VFS_PROC_NR) {
+	/* Request is for VFS itself. */
+	r = gcov_flush(grantid, size);
+  } else {
+	/* Perform generic GCOV request. */
+	m.GCOV_GRANT = grantid;
+	m.GCOV_BUFF_SZ = size;
+	r = _taskcall(rfp->fp_endpoint, COMMON_REQ_GCOV_DATA, &m);
+  }
+
+  cpf_revoke(grantid);
+
+  return(r);
+}
diff --git a/servers/avfs/glo.h b/servers/avfs/glo.h
new file mode 100644
index 000000000..ed50c1b5a
--- /dev/null
+++ b/servers/avfs/glo.h
@@ -0,0 +1,57 @@
+#ifndef __VFS_GLO_H__
+#define __VFS_GLO_H__
+
+/* EXTERN should be extern except for the table file */
+#ifdef _TABLE
+#undef EXTERN
+#define EXTERN
+#endif
+
+/* File System global variables */
+EXTERN struct fproc *fp;	/* pointer to caller's fproc struct */
+EXTERN int susp_count;		/* number of procs suspended on pipe */
+EXTERN int nr_locks;		/* number of locks currently in place */
+EXTERN int reviving;		/* number of pipe processes to be revived */
+EXTERN int pending;
+EXTERN int sending;
+
+EXTERN dev_t ROOT_DEV;		/* device number of the root device */
+EXTERN int ROOT_FS_E;           /* kernel endpoint of the root FS proc */
+EXTERN u32_t system_hz;		/* system clock frequency. */
+
+/* The parameters of the call are kept here. */
+EXTERN message m_in;		/* the input message itself */
+EXTERN message m_out;		/* the output message used for reply */
+# define who_p		((int) (fp - fproc))
+# define isokslot(p)	(p >= 0 && \
+			 p < (int)(sizeof(fproc) / sizeof(struct fproc)))
+#if 0
+# define who_e		(isokslot(who_p) ? fp->fp_endpoint : m_in.m_source)
+#else
+# define who_e		(isokslot(who_p) && fp->fp_endpoint != NONE ? \
+					fp->fp_endpoint : m_in.m_source)
+#endif
+# define call_nr	(m_in.m_type)
+# define super_user	(fp->fp_effuid == SU_UID ? 1 : 0)
+EXTERN struct worker_thread *self;
+EXTERN endpoint_t receive_from;/* endpoint with pending reply */
+EXTERN int force_sync;		/* toggle forced synchronous communication */
+EXTERN int verbose;
+EXTERN int deadlock_resolving;
+EXTERN mutex_t exec_lock;
+EXTERN mutex_t bsf_lock;/* Global lock for access to block special files */
+EXTERN struct worker_thread workers[NR_WTHREADS];
+EXTERN struct worker_thread sys_worker;
+EXTERN struct worker_thread dl_worker;
+EXTERN char mount_label[LABEL_MAX];	/* label of file system to mount */
+
+/* The following variables are used for returning results to the caller. */
+EXTERN int err_code;		/* temporary storage for error number */
+
+/* Data initialized elsewhere. */
+extern _PROTOTYPE (int (*call_vec[]), (void) ); /* sys call table */
+extern _PROTOTYPE (int (*pfs_call_vec[]), (void) ); /* pfs callback table */
+extern char dot1[2];   /* dot1 (&dot1[0]) and dot2 (&dot2[0]) have a special */
+extern char dot2[3];   /* meaning to search_dir: no access permission check. */
+
+#endif
diff --git a/servers/avfs/job.h b/servers/avfs/job.h
new file mode 100644
index 000000000..bfcdc5b9e
--- /dev/null
+++ b/servers/avfs/job.h
@@ -0,0 +1,11 @@
+#ifndef __VFS_WORK_H__
+#define __VFS_WORK_H__
+
+struct job {
+  struct fproc *j_fp;
+  message j_m_in;
+  void *(*j_func)(void *arg);
+  struct job *j_next;
+};
+
+#endif
diff --git a/servers/avfs/link.c b/servers/avfs/link.c
new file mode 100644
index 000000000..90bf4a148
--- /dev/null
+++ b/servers/avfs/link.c
@@ -0,0 +1,455 @@
+/* This file handles the LINK and UNLINK system calls.  It also deals with
+ * deallocating the storage used by a file when the last UNLINK is done to a
+ * file and the blocks must be returned to the free block pool.
+ *
+ * The entry points into this file are
+ *   do_link:         perform the LINK system call
+ *   do_unlink:	      perform the UNLINK and RMDIR system calls
+ *   do_rename:	      perform the RENAME system call
+ *   do_truncate:     perform the TRUNCATE system call
+ *   do_ftruncate:    perform the FTRUNCATE system call
+ *   do_rdlink:       perform the RDLNK system call
+ */
+
+#include "fs.h"
+#include <sys/stat.h>
+#include <string.h>
+#include <minix/com.h>
+#include <minix/callnr.h>
+#include <minix/vfsif.h>
+#include <dirent.h>
+#include <assert.h>
+#include "file.h"
+#include "fproc.h"
+#include "path.h"
+#include "vnode.h"
+#include "param.h"
+
+/*===========================================================================*
+ *				do_link					     *
+ *===========================================================================*/
+PUBLIC int do_link()
+{
+/* Perform the link(name1, name2) system call. */
+  int r = OK;
+  struct vnode *vp = NULL, *dirp = NULL;
+  struct vmnt *vmp1 = NULL, *vmp2 = NULL;
+  char fullpath[PATH_MAX+1];
+  struct lookup resolve;
+
+  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp1, &vp);
+  resolve.l_vmnt_lock = VMNT_WRITE;
+  resolve.l_vnode_lock = VNODE_READ;
+
+  /* See if 'name1' (file to be linked to) exists. */
+  if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
+	return(err_code);
+  if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
+
+  /* Does the final directory of 'name2' exist? */
+  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp2, &dirp);
+  resolve.l_vmnt_lock = VMNT_READ;
+  resolve.l_vnode_lock = VNODE_READ;
+  if (fetch_name(m_in.name2, m_in.name2_length, M1, fullpath) != OK)
+	r = err_code;
+  else if ((dirp = last_dir(&resolve, fp)) == NULL)
+	r = err_code;
+
+  if (r != OK) {
+	unlock_vnode(vp);
+	unlock_vmnt(vmp1);
+	put_vnode(vp);
+	return(r);
+  }
+
+  /* Check for links across devices. */
+  if (vp->v_fs_e != dirp->v_fs_e)
+	r = EXDEV;
+  else
+	r = forbidden(dirp, W_BIT | X_BIT);
+
+  if (r == OK)
+	r = req_link(vp->v_fs_e, dirp->v_inode_nr, fullpath,
+		     vp->v_inode_nr);
+
+  unlock_vnode(vp);
+  unlock_vnode(dirp);
+  if (vmp2 != NULL) unlock_vmnt(vmp2);
+  unlock_vmnt(vmp1);
+  put_vnode(vp);
+  put_vnode(dirp);
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				do_unlink				     *
+ *===========================================================================*/
+PUBLIC int do_unlink()
+{
+/* Perform the unlink(name) or rmdir(name) system call. The code for these two
+ * is almost the same.  They differ only in some condition testing.  Unlink()
+ * may be used by the superuser to do dangerous things; rmdir() may not.
+ */
+  struct vnode *dirp, *vp;
+  struct vmnt *vmp, *vmp2;
+  int r;
+  char fullpath[PATH_MAX+1];
+  struct lookup resolve;
+
+  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &dirp);
+  resolve.l_vmnt_lock = VMNT_WRITE;
+  resolve.l_vnode_lock = VNODE_READ;
+
+  /* Get the last directory in the path. */
+  if (fetch_name(m_in.name, m_in.name_length, M3, fullpath) != OK)
+	return(err_code);
+
+  if ((dirp = last_dir(&resolve, fp)) == NULL) return(err_code);
+
+  /* Make sure that the object is a directory */
+  if ((dirp->v_mode & I_TYPE) != I_DIRECTORY) {
+	unlock_vnode(dirp);
+	unlock_vmnt(vmp);
+	put_vnode(dirp);
+	return(ENOTDIR);
+  }
+
+  /* The caller must have both search and execute permission */
+  if ((r = forbidden(dirp, X_BIT | W_BIT)) != OK) {
+	unlock_vnode(dirp);
+	unlock_vmnt(vmp);
+	put_vnode(dirp);
+	return(r);
+  }
+
+  /* Also, if the sticky bit is set, only the owner of the file or a privileged
+     user is allowed to unlink */
+  if ((dirp->v_mode & S_ISVTX) == S_ISVTX) {
+	/* Look up inode of file to unlink to retrieve owner */
+	resolve.l_flags = PATH_RET_SYMLINK;
+	resolve.l_vmp = &vmp2;	/* Shouldn't actually get locked */
+	resolve.l_vmnt_lock = VMNT_READ;
+	resolve.l_vnode = &vp;
+	resolve.l_vnode_lock = VNODE_READ;
+	vp = advance(dirp, &resolve, fp);
+	assert(vmp2 == NULL);
+	if (vp != NULL) {
+		if (vp->v_uid != fp->fp_effuid && fp->fp_effuid != SU_UID)
+			r = EPERM;
+		unlock_vnode(vp);
+		put_vnode(vp);
+	} else
+		r = err_code;
+	if (r != OK) {
+		unlock_vnode(dirp);
+		unlock_vmnt(vmp);
+		put_vnode(dirp);
+		return(r);
+	}
+  }
+
+  tll_upgrade(&vmp->m_lock);
+
+  if(call_nr == UNLINK)
+	  r = req_unlink(dirp->v_fs_e, dirp->v_inode_nr, fullpath);
+  else
+	  r = req_rmdir(dirp->v_fs_e, dirp->v_inode_nr, fullpath);
+  unlock_vnode(dirp);
+  unlock_vmnt(vmp);
+  put_vnode(dirp);
+  return(r);
+}
+
+/*===========================================================================*
+ *				do_rename				     *
+ *===========================================================================*/
+PUBLIC int do_rename()
+{
+/* Perform the rename(name1, name2) system call. */
+  int r = OK, r1;
+  struct vnode *old_dirp, *new_dirp = NULL, *vp;
+  struct vmnt *oldvmp, *newvmp, *vmp2;
+  char old_name[PATH_MAX+1];
+  char fullpath[PATH_MAX+1];
+  struct lookup resolve;
+
+  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &oldvmp, &old_dirp);
+  /* Do not yet request exclusive lock on vmnt to prevent deadlocks later on */
+  resolve.l_vmnt_lock = VMNT_WRITE;
+  resolve.l_vnode_lock = VNODE_READ;
+
+  /* See if 'name1' (existing file) exists.  Get dir and file inodes. */
+  if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
+	return(err_code);
+  if ((old_dirp = last_dir(&resolve, fp)) == NULL)
+	return(err_code);
+
+  /* If the sticky bit is set, only the owner of the file or a privileged
+     user is allowed to rename */
+  if ((old_dirp->v_mode & S_ISVTX) == S_ISVTX) {
+	/* Look up inode of file to unlink to retrieve owner */
+	resolve.l_flags = PATH_RET_SYMLINK;
+	resolve.l_vmp = &vmp2;	/* Shouldn't actually get locked */
+	resolve.l_vmnt_lock = VMNT_READ;
+	resolve.l_vnode = &vp;
+	resolve.l_vnode_lock = VNODE_READ;
+	resolve.l_flags = PATH_RET_SYMLINK;
+	vp = advance(old_dirp, &resolve, fp);
+	assert(vmp2 == NULL);
+	if (vp != NULL) {
+		if(vp->v_uid != fp->fp_effuid && fp->fp_effuid != SU_UID)
+			r = EPERM;
+		unlock_vnode(vp);
+		put_vnode(vp);
+	} else
+		r = err_code;
+	if (r != OK) {
+		unlock_vnode(old_dirp);
+		unlock_vmnt(oldvmp);
+		put_vnode(old_dirp);
+		return(r);
+	}
+  }
+
+  /* Save the last component of the old name */
+  if(strlen(fullpath) >= sizeof(old_name)) {
+	unlock_vnode(old_dirp);
+	unlock_vmnt(oldvmp);
+	put_vnode(old_dirp);
+	return(ENAMETOOLONG);
+  }
+  strcpy(old_name, fullpath);
+
+  /* See if 'name2' (new name) exists.  Get dir inode */
+  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &newvmp, &new_dirp);
+  resolve.l_vmnt_lock = VMNT_READ;
+  resolve.l_vnode_lock = VNODE_READ;
+  if (fetch_name(m_in.name2, m_in.name2_length, M1, fullpath) != OK)
+	r = err_code;
+  else if ((new_dirp = last_dir(&resolve, fp)) == NULL)
+	r = err_code;
+
+  if (r != OK) {
+	unlock_vnode(old_dirp);
+	unlock_vmnt(oldvmp);
+	put_vnode(old_dirp);
+	return(r);
+  }
+
+  /* Both parent directories must be on the same device. */
+  if (old_dirp->v_fs_e != new_dirp->v_fs_e) r = EXDEV;
+
+  /* Parent dirs must be writable, searchable and on a writable device */
+  if ((r1 = forbidden(old_dirp, W_BIT|X_BIT)) != OK ||
+      (r1 = forbidden(new_dirp, W_BIT|X_BIT)) != OK) r = r1;
+
+  if (r == OK) {
+	tll_upgrade(&oldvmp->m_lock); /* Upgrade to exclusive access */
+	r = req_rename(old_dirp->v_fs_e, old_dirp->v_inode_nr, old_name,
+		       new_dirp->v_inode_nr, fullpath);
+  }
+  unlock_vnode(old_dirp);
+  unlock_vnode(new_dirp);
+  unlock_vmnt(oldvmp);
+  if (newvmp) unlock_vmnt(newvmp);
+
+  put_vnode(old_dirp);
+  put_vnode(new_dirp);
+
+  return(r);
+}
+
+/*===========================================================================*
+ *				do_truncate				     *
+ *===========================================================================*/
+PUBLIC int do_truncate()
+{
+/* truncate_vnode() does the actual work of do_truncate() and do_ftruncate().
+ * do_truncate() and do_ftruncate() have to get hold of the inode, either
+ * by name or fd, do checks on it, and call truncate_inode() to do the
+ * work.
+ */
+  struct vnode *vp;
+  struct vmnt *vmp;
+  int r;
+  char fullpath[PATH_MAX+1];
+  struct lookup resolve;
+
+  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+  resolve.l_vmnt_lock = VMNT_EXCL;
+  resolve.l_vnode_lock = VNODE_WRITE;
+
+  if ((off_t) m_in.flength < 0) return(EINVAL);
+
+  /* Temporarily open file */
+  if (fetch_name(m_in.m2_p1, m_in.m2_i1, M1, fullpath) != OK) return(err_code);
+  if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
+
+  /* Ask FS to truncate the file */
+  if ((r = forbidden(vp, W_BIT)) == OK)
+	r = truncate_vnode(vp, m_in.flength);
+
+  unlock_vnode(vp);
+  unlock_vmnt(vmp);
+  put_vnode(vp);
+  return(r);
+}
+
+/*===========================================================================*
+ *				do_ftruncate				     *
+ *===========================================================================*/
+PUBLIC int do_ftruncate()
+{
+/* As with do_truncate(), truncate_vnode() does the actual work. */
+  struct filp *rfilp;
+  int r;
+
+  if ((off_t) m_in.flength < 0) return(EINVAL);
+
+  /* File is already opened; get a vnode pointer from filp */
+  if ((rfilp = get_filp(m_in.m2_i1, VNODE_WRITE)) == NULL) return(err_code);
+
+  if (!(rfilp->filp_mode & W_BIT))
+	r = EBADF;
+  else
+	r = truncate_vnode(rfilp->filp_vno, m_in.flength);
+
+  unlock_filp(rfilp);
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				truncate_vnode				     *
+ *===========================================================================*/
+PUBLIC int truncate_vnode(vp, newsize)
+struct vnode *vp;
+off_t newsize;
+{
+/* Truncate a regular file or a pipe */
+  int r, file_type;
+
+  assert(tll_locked_by_me(&vp->v_lock));
+  file_type = vp->v_mode & I_TYPE;
+  if (file_type != I_REGULAR && file_type != I_NAMED_PIPE) return(EINVAL);
+  if ((r = req_ftrunc(vp->v_fs_e, vp->v_inode_nr, newsize, 0)) == OK)
+	vp->v_size = newsize;
+  return(r);
+}
+
+
+/*===========================================================================*
+ *                             do_slink					     *
+ *===========================================================================*/
+PUBLIC int do_slink()
+{
+/* Perform the symlink(name1, name2) system call. */
+  int r;
+  struct vnode *vp;
+  struct vmnt *vmp;
+  char fullpath[PATH_MAX+1];
+  struct lookup resolve;
+
+  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+  resolve.l_vmnt_lock = VMNT_WRITE;
+  resolve.l_vnode_lock = VNODE_READ;
+
+  if (m_in.name1_length <= 1) return(ENOENT);
+  if (m_in.name1_length >= SYMLINK_MAX) return(ENAMETOOLONG);
+
+  /* Get dir inode of 'name2' */
+  if (fetch_name(m_in.name2, m_in.name2_length, M1, fullpath) != OK)
+	return(err_code);
+
+  if ((vp = last_dir(&resolve, fp)) == NULL) return(err_code);
+
+  if ((r = forbidden(vp, W_BIT|X_BIT)) == OK) {
+	r = req_slink(vp->v_fs_e, vp->v_inode_nr, fullpath, who_e,
+		      m_in.name1, m_in.name1_length - 1, fp->fp_effuid,
+		      fp->fp_effgid);
+  }
+
+  unlock_vnode(vp);
+  unlock_vmnt(vmp);
+  put_vnode(vp);
+
+  return(r);
+}
+
+/*===========================================================================*
+ *                              rdlink_direct                                *
+ *===========================================================================*/
+PUBLIC int rdlink_direct(orig_path, link_path, rfp)
+char *orig_path;
+char *link_path; /* should have length PATH_MAX+1 */
+struct fproc *rfp;
+{
+/* Perform a readlink()-like call from within the VFS */
+  int r;
+  struct vnode *vp;
+  struct vmnt *vmp;
+  char fullpath[PATH_MAX+1];
+  struct lookup resolve;
+
+  lookup_init(&resolve, fullpath, PATH_RET_SYMLINK, &vmp, &vp);
+  resolve.l_vmnt_lock = VMNT_READ;
+  resolve.l_vnode_lock = VNODE_READ;
+
+  /* Temporarily open the file containing the symbolic link */
+  strncpy(fullpath, orig_path, PATH_MAX);
+  if ((vp = eat_path(&resolve, rfp)) == NULL) return(err_code);
+
+  /* Make sure this is a symbolic link */
+  if ((vp->v_mode & I_TYPE) != I_SYMBOLIC_LINK)
+	r = EINVAL;
+  else
+	r = req_rdlink(vp->v_fs_e, vp->v_inode_nr, (endpoint_t) 0,
+						link_path, PATH_MAX+1, 1);
+
+  if (r > 0) link_path[r] = '\0';	/* Terminate string when succesful */
+
+  unlock_vnode(vp);
+  unlock_vmnt(vmp);
+  put_vnode(vp);
+
+  return r;
+}
+
+/*===========================================================================*
+ *                             do_rdlink                                    *
+ *===========================================================================*/
+PUBLIC int do_rdlink()
+{
+/* Perform the readlink(name, buf, bufsize) system call. */
+  int r, copylen;
+  struct vnode *vp;
+  struct vmnt *vmp;
+  char fullpath[PATH_MAX+1];
+  struct lookup resolve;
+
+  lookup_init(&resolve, fullpath, PATH_RET_SYMLINK, &vmp, &vp);
+  resolve.l_vmnt_lock = VMNT_READ;
+  resolve.l_vnode_lock = VNODE_READ;
+
+  copylen = m_in.nbytes;
+  if (copylen < 0) return(EINVAL);
+
+  /* Temporarily open the file containing the symbolic link */
+  if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
+	return(err_code);
+  if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
+
+  /* Make sure this is a symbolic link */
+  if ((vp->v_mode & I_TYPE) != I_SYMBOLIC_LINK)
+	r = EINVAL;
+  else
+	r = req_rdlink(vp->v_fs_e, vp->v_inode_nr, who_e, m_in.name2,
+		       copylen, 0);
+
+  unlock_vnode(vp);
+  unlock_vmnt(vmp);
+  put_vnode(vp);
+
+  return(r);
+}
diff --git a/servers/avfs/lock.c b/servers/avfs/lock.c
new file mode 100644
index 000000000..8b459e91f
--- /dev/null
+++ b/servers/avfs/lock.c
@@ -0,0 +1,191 @@
+/* This file handles advisory file locking as required by POSIX.
+ *
+ * The entry points into this file are
+ *   lock_op:	perform locking operations for FCNTL system call
+ *   lock_revive: revive processes when a lock is released
+ */
+
+#include "fs.h"
+#include <minix/com.h>
+#include <minix/u64.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "file.h"
+#include "fproc.h"
+#include "lock.h"
+#include "vnode.h"
+#include "param.h"
+
+/*===========================================================================*
+ *				lock_op					     *
+ *===========================================================================*/
+PUBLIC int lock_op(f, req)
+struct filp *f;
+int req;			/* either F_SETLK or F_SETLKW */
+{
+/* Perform the advisory locking required by POSIX. */
+
+  int r, ltype, i, conflict = 0, unlocking = 0;
+  mode_t mo;
+  off_t first, last;
+  struct flock flock;
+  vir_bytes user_flock;
+  struct file_lock *flp, *flp2, *empty;
+
+  /* Fetch the flock structure from user space. */
+  user_flock = (vir_bytes) m_in.name1;
+  r = sys_datacopy(who_e, (vir_bytes) user_flock, VFS_PROC_NR,
+		   (vir_bytes) &flock, (phys_bytes) sizeof(flock));
+  if (r != OK) return(EINVAL);
+
+  /* Make some error checks. */
+  ltype = flock.l_type;
+  mo = f->filp_mode;
+  if (ltype != F_UNLCK && ltype != F_RDLCK && ltype != F_WRLCK) return(EINVAL);
+  if (req == F_GETLK && ltype == F_UNLCK) return(EINVAL);
+  if ( (f->filp_vno->v_mode & I_TYPE) != I_REGULAR) return(EINVAL);
+  if (req != F_GETLK && ltype == F_RDLCK && (mo & R_BIT) == 0) return(EBADF);
+  if (req != F_GETLK && ltype == F_WRLCK && (mo & W_BIT) == 0) return(EBADF);
+
+  /* Compute the first and last bytes in the lock region. */
+  switch (flock.l_whence) {
+    case SEEK_SET:	first = 0; break;
+    case SEEK_CUR:
+	if (ex64hi(f->filp_pos) != 0)
+		panic("lock_op: position in file too high");
+	first = ex64lo(f->filp_pos);
+	break;
+    case SEEK_END:	first = f->filp_vno->v_size; break;
+    default:	return(EINVAL);
+  }
+
+  /* Check for overflow. */
+  if (((long) flock.l_start > 0) && ((first + flock.l_start) < first))
+	return(EINVAL);
+  if (((long) flock.l_start < 0) && ((first + flock.l_start) > first))
+	return(EINVAL);
+  first = first + flock.l_start;
+  last = first + flock.l_len - 1;
+  if (flock.l_len == 0) last = MAX_FILE_POS;
+  if (last < first) return(EINVAL);
+
+  /* Check if this region conflicts with any existing lock. */
+  empty = NULL;
+  for (flp = &file_lock[0]; flp < &file_lock[NR_LOCKS]; flp++) {
+	if (flp->lock_type == 0) {
+		if (empty == NULL) empty = flp;
+		continue;	/* 0 means unused slot */
+	}
+	if (flp->lock_vnode != f->filp_vno) continue;	/* different file */
+	if (last < flp->lock_first) continue;	/* new one is in front */
+	if (first > flp->lock_last) continue;	/* new one is afterwards */
+	if (ltype == F_RDLCK && flp->lock_type == F_RDLCK) continue;
+	if (ltype != F_UNLCK && flp->lock_pid == fp->fp_pid) continue;
+
+	/* There might be a conflict.  Process it. */
+	conflict = 1;
+	if (req == F_GETLK) break;
+
+	/* If we are trying to set a lock, it just failed. */
+	if (ltype == F_RDLCK || ltype == F_WRLCK) {
+		if (req == F_SETLK) {
+			/* For F_SETLK, just report back failure. */
+			return(EAGAIN);
+		} else {
+			/* For F_SETLKW, suspend the process. */
+			suspend(FP_BLOCKED_ON_LOCK);
+			return(SUSPEND);
+		}
+	}
+
+	/* We are clearing a lock and we found something that overlaps. */
+	unlocking = 1;
+	if (first <= flp->lock_first && last >= flp->lock_last) {
+		flp->lock_type = 0;	/* mark slot as unused */
+		nr_locks--;		/* number of locks is now 1 less */
+		continue;
+	}
+
+	/* Part of a locked region has been unlocked. */
+	if (first <= flp->lock_first) {
+		flp->lock_first = last + 1;
+		continue;
+	}
+
+	if (last >= flp->lock_last) {
+		flp->lock_last = first - 1;
+		continue;
+	}
+
+	/* Bad luck. A lock has been split in two by unlocking the middle. */
+	if (nr_locks == NR_LOCKS) return(ENOLCK);
+	for (i = 0; i < NR_LOCKS; i++)
+		if (file_lock[i].lock_type == 0) break;
+	flp2 = &file_lock[i];
+	flp2->lock_type = flp->lock_type;
+	flp2->lock_pid = flp->lock_pid;
+	flp2->lock_vnode = flp->lock_vnode;
+	flp2->lock_first = last + 1;
+	flp2->lock_last = flp->lock_last;
+	flp->lock_last = first - 1;
+	nr_locks++;
+  }
+  if (unlocking) lock_revive();
+
+  if (req == F_GETLK) {
+	if (conflict) {
+		/* GETLK and conflict. Report on the conflicting lock. */
+		flock.l_type = flp->lock_type;
+		flock.l_whence = SEEK_SET;
+		flock.l_start = flp->lock_first;
+		flock.l_len = flp->lock_last - flp->lock_first + 1;
+		flock.l_pid = flp->lock_pid;
+
+	} else {
+		/* It is GETLK and there is no conflict. */
+		flock.l_type = F_UNLCK;
+	}
+
+	/* Copy the flock structure back to the caller. */
+	r = sys_datacopy(VFS_PROC_NR, (vir_bytes) &flock,
+		who_e, (vir_bytes) user_flock, (phys_bytes) sizeof(flock));
+	return(r);
+  }
+
+  if (ltype == F_UNLCK) return(OK);	/* unlocked a region with no locks */
+
+  /* There is no conflict.  If space exists, store new lock in the table. */
+  if (empty == NULL) return(ENOLCK);	/* table full */
+  empty->lock_type = ltype;
+  empty->lock_pid = fp->fp_pid;
+  empty->lock_vnode = f->filp_vno;
+  empty->lock_first = first;
+  empty->lock_last = last;
+  nr_locks++;
+  return(OK);
+}
+
+
+/*===========================================================================*
+ *				lock_revive				     *
+ *===========================================================================*/
+PUBLIC void lock_revive()
+{
+/* Go find all the processes that are waiting for any kind of lock and
+ * revive them all.  The ones that are still blocked will block again when
+ * they run.  The others will complete.  This strategy is a space-time
+ * tradeoff.  Figuring out exactly which ones to unblock now would take
+ * extra code, and the only thing it would win would be some performance in
+ * extremely rare circumstances (namely, that somebody actually used
+ * locking).
+ */
+
+  struct fproc *fptr;
+
+  for (fptr = &fproc[0]; fptr < &fproc[NR_PROCS]; fptr++){
+	if (fptr->fp_pid == PID_FREE) continue;
+	if (fptr->fp_blocked_on == FP_BLOCKED_ON_LOCK) {
+		revive(fptr->fp_endpoint, 0);
+	}
+  }
+}
diff --git a/servers/avfs/lock.h b/servers/avfs/lock.h
new file mode 100644
index 000000000..c2baa651e
--- /dev/null
+++ b/servers/avfs/lock.h
@@ -0,0 +1,15 @@
+#ifndef __VFS_LOCK_H__
+#define __VFS_LOCK_H__
+
+/* This is the file locking table.  Like the filp table, it points to the
+ * inode table, however, in this case to achieve advisory locking.
+ */
+EXTERN struct file_lock {
+  short lock_type;		/* F_RDLOCK or F_WRLOCK; 0 means unused slot */
+  pid_t lock_pid;		/* pid of the process holding the lock */
+  struct vnode *lock_vnode;
+  off_t lock_first;		/* offset of first byte locked */
+  off_t lock_last;		/* offset of last byte locked */
+} file_lock[NR_LOCKS];
+
+#endif
diff --git a/servers/avfs/main.c b/servers/avfs/main.c
new file mode 100644
index 000000000..2e548b80b
--- /dev/null
+++ b/servers/avfs/main.c
@@ -0,0 +1,967 @@
+/*
+ * a loop that gets messages requesting work, carries out the work, and sends
+ * replies.
+ *
+ * The entry points into this file are:
+ *   main:	main program of the Virtual File System
+ *   reply:	send a reply to a process after the requested work is done
+ *
+ */
+
+#include "fs.h"
+#include <fcntl.h>
+#include <string.h>
+#include <stdio.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <sys/ioc_memory.h>
+#include <sys/svrctl.h>
+#include <sys/select.h>
+#include <minix/callnr.h>
+#include <minix/com.h>
+#include <minix/keymap.h>
+#include <minix/const.h>
+#include <minix/endpoint.h>
+#include <minix/safecopies.h>
+#include <minix/debug.h>
+#include <minix/vfsif.h>
+#include "file.h"
+#include "dmap.h"
+#include "fproc.h"
+#include "vmnt.h"
+#include "vnode.h"
+#include "job.h"
+#include "param.h"
+
+#if ENABLE_SYSCALL_STATS
+EXTERN unsigned long calls_stats[NCALLS];
+#endif
+
+/* Thread related prototypes */
+FORWARD _PROTOTYPE( void thread_cleanup_f, (struct fproc *rfp, char *f,
+					    int l)			);
+#define thread_cleanup(x) thread_cleanup_f(x, __FILE__, __LINE__)
+FORWARD _PROTOTYPE( void *do_async_dev_result, (void *arg)		);
+FORWARD _PROTOTYPE( void *do_control_msgs, (void *arg)			);
+FORWARD _PROTOTYPE( void *do_fs_reply, (struct job *job)			);
+FORWARD _PROTOTYPE( void *do_work, (void *arg)				);
+FORWARD _PROTOTYPE( void *do_pm, (void *arg)				);
+FORWARD _PROTOTYPE( void *do_init_root, (void *arg)			);
+FORWARD _PROTOTYPE( void handle_work, (void *(*func)(void *arg))		);
+
+FORWARD _PROTOTYPE( void get_work, (void)				);
+FORWARD _PROTOTYPE( void lock_pm, (void)				);
+FORWARD _PROTOTYPE( void unlock_pm, (void)				);
+FORWARD _PROTOTYPE( void service_pm, (void)				);
+FORWARD _PROTOTYPE( void service_pm_postponed, (void)				);
+FORWARD _PROTOTYPE( int unblock, (struct fproc *rfp)			);
+
+/* SEF functions and variables. */
+FORWARD _PROTOTYPE( void sef_local_startup, (void) );
+FORWARD _PROTOTYPE( int sef_cb_init_fresh, (int type, sef_init_info_t *info) );
+PRIVATE mutex_t pm_lock;
+
+/*===========================================================================*
+ *				main					     *
+ *===========================================================================*/
+PUBLIC int main(void)
+{
+/* This is the main program of the file system.  The main loop consists of
+ * three major activities: getting new work, processing the work, and sending
+ * the reply.  This loop never terminates as long as the file system runs.
+ */
+  int transid, req;
+  struct job *job;
+
+  /* SEF local startup. */
+  sef_local_startup();
+
+  printf("Started AVFS\n");
+  verbose = 0;
+
+  /* This is the main loop that gets work, processes it, and sends replies. */
+  while (TRUE) {
+	yield_all();	/* let other threads run */
+	send_work();
+	get_work();
+
+	transid = TRNS_GET_ID(m_in.m_type);
+	req = TRNS_DEL_ID(m_in.m_type);
+	job = worker_getjob( (thread_t) transid - VFS_TRANSID);
+
+	/* Transaction encoding changes original m_type value; restore. */
+	if (job == NULL)
+		m_in.m_type = transid;
+	else
+		m_in.m_type = req;
+
+	if (job != NULL) {
+		do_fs_reply(job);
+		continue;
+	} else if (who_e == PM_PROC_NR) { /* Calls from PM */
+		/* Special control messages from PM */
+		sys_worker_start(do_pm);
+		continue;
+	} else if (is_notify(call_nr)) {
+		/* A task notify()ed us */
+		sys_worker_start(do_control_msgs);
+		continue;
+	} else if (who_p < 0) { /* i.e., message comes from a task */
+		/* We're going to ignore this message. Tasks should
+		 * send notify()s only.
+		 */
+		 printf("VFS: ignoring message from %d (%d)\n", who_e, call_nr);
+		 continue;
+	}
+
+	/* At this point we either have results from an asynchronous device
+	 * or a new system call. In both cases a new worker thread has to be
+	 * started and there might not be one available from the pool. This is
+	 * not a problem (requests/replies are simply queued), except when
+	 * they're from an FS endpoint, because these can cause a deadlock.
+	 * handle_work() takes care of the details. */
+	if (IS_DEV_RS(call_nr)) {
+		/* We've got results for a device request */
+		handle_work(do_async_dev_result);
+		continue;
+	} else {
+		/* Normal syscall. */
+		handle_work(do_work);
+	}
+  }
+  return(OK);				/* shouldn't come here */
+}
+
+/*===========================================================================*
+ *			       handle_work				     *
+ *===========================================================================*/
+PRIVATE void handle_work(void *(*func)(void *arg))
+{
+/* Handle asynchronous device replies and new system calls. If the originating
+ * endpoint is an FS endpoint, take extra care not to get in deadlock. */
+ struct vmnt *vmp;
+
+  if ((vmp = find_vmnt(who_e)) != NULL) {
+	/* A back call or dev result from an FS endpoint */
+	if (worker_available() == 0) {
+		/* No worker threads available to handle call */
+		if (deadlock_resolving) {
+			/* Already trying to resolve a deadlock, can't
+			 * handle more, sorry */
+
+			reply(who_e, EAGAIN);
+			return;
+		}
+		deadlock_resolving = 1;
+		vmp->m_flags |= VMNT_BACKCALL;
+		dl_worker_start(func);
+		return;
+	}
+  }
+
+  worker_start(func);
+}
+
+/*===========================================================================*
+ *			       do_async_dev_result				     *
+ *===========================================================================*/
+PRIVATE void *do_async_dev_result(void *arg)
+{
+  endpoint_t endpt;
+  struct job my_job;
+
+  my_job = *((struct job *) arg);
+  fp = my_job.j_fp;
+  m_in = my_job.j_m_in;
+
+  /* An asynchronous character driver has results for us */
+  if (call_nr == DEV_REVIVE) {
+	endpt = m_in.REP_ENDPT;
+	if (endpt == VFS_PROC_NR)
+		endpt = find_suspended_ep(m_in.m_source, m_in.REP_IO_GRANT);
+
+	if (endpt == NONE) {
+		printf("VFS: proc with grant %d from %d not found\n",
+			m_in.REP_IO_GRANT, m_in.m_source);
+	} else if (m_in.REP_STATUS == SUSPEND) {
+		printf("VFS: got SUSPEND on DEV_REVIVE: not reviving proc\n");
+	} else
+		revive(endpt, m_in.REP_STATUS);
+  }
+  else if (call_nr == DEV_OPEN_REPL) open_reply();
+  else if (call_nr == DEV_REOPEN_REPL) reopen_reply();
+  else if (call_nr == DEV_CLOSE_REPL) close_reply();
+  else if (call_nr == DEV_SEL_REPL1)
+	select_reply1(m_in.m_source, m_in.DEV_MINOR, m_in.DEV_SEL_OPS);
+  else if (call_nr == DEV_SEL_REPL2)
+	select_reply2(m_in.m_source, m_in.DEV_MINOR, m_in.DEV_SEL_OPS);
+
+  if (deadlock_resolving) {
+	struct vmnt *vmp;
+	if ((vmp = find_vmnt(who_e)) != NULL)
+		vmp->m_flags &= ~VMNT_BACKCALL;
+
+	if (fp != NULL && fp->fp_wtid == dl_worker.w_tid)
+		deadlock_resolving = 0;
+  }
+
+  thread_cleanup(NULL);
+  return(NULL);
+}
+
+/*===========================================================================*
+ *			       do_control_msgs				     *
+ *===========================================================================*/
+PRIVATE void *do_control_msgs(void *arg)
+{
+  struct job my_job;
+
+  my_job = *((struct job *) arg);
+  fp = my_job.j_fp;
+  m_in = my_job.j_m_in;
+
+  /* Check for special control messages. */
+  if (who_e == CLOCK) {
+	/* Alarm timer expired. Used only for select(). Check it. */
+	expire_timers(m_in.NOTIFY_TIMESTAMP);
+  } else if (who_e == DS_PROC_NR) {
+	/* DS notifies us of an event. */
+	ds_event();
+  } else {
+	/* Device notifies us of an event. */
+	dev_status(&m_in);
+  }
+
+  thread_cleanup(NULL);
+  return(NULL);
+}
+
+/*===========================================================================*
+ *			       do_fs_reply				     *
+ *===========================================================================*/
+PRIVATE void *do_fs_reply(struct job *job)
+{
+  struct vmnt *vmp;
+  struct fproc *rfp;
+
+  if (verbose) printf("VFS: reply to request!\n");
+  if ((vmp = find_vmnt(who_e)) == NULL)
+	panic("Couldn't find vmnt for endpoint %d", who_e);
+
+  rfp = job->j_fp;
+
+  if (rfp == NULL || rfp->fp_endpoint == NONE) {
+	printf("VFS: spurious reply from %d\n", who_e);
+	return(NULL);
+  }
+
+  *rfp->fp_sendrec = m_in;
+  vmp->m_comm.c_cur_reqs--;	/* We've got our reply, make room for others */
+
+  worker_signal(worker_get(rfp->fp_wtid));/* Continue this worker thread */
+  return(NULL);
+}
+
+/*===========================================================================*
+ *				lock_pm					     *
+ *===========================================================================*/
+PRIVATE void lock_pm(void)
+{
+  message org_m_in;
+  struct fproc *org_fp;
+  struct worker_thread *org_self;
+
+  /* First try to get it right off the bat */
+  if (mutex_trylock(&pm_lock) == 0)
+	return;
+
+  org_m_in = m_in;
+  org_fp = fp;
+  org_self = self;
+
+  if (mutex_lock(&pm_lock) != 0)
+	panic("Could not obtain lock on pm\n");
+
+  m_in = org_m_in;
+  fp = org_fp;
+  self = org_self;
+}
+
+/*===========================================================================*
+ *				unlock_pm				     *
+ *===========================================================================*/
+PRIVATE void unlock_pm(void)
+{
+  if (mutex_unlock(&pm_lock) != 0)
+	panic("Could not release lock on pm");
+}
+
+/*===========================================================================*
+ *			       do_pm					     *
+ *===========================================================================*/
+PRIVATE void *do_pm(void *arg)
+{
+  struct job my_job;
+  struct fproc *rfp;
+
+  my_job = *((struct job *) arg);
+  rfp = fp = my_job.j_fp;
+  m_in = my_job.j_m_in;
+
+  lock_pm();
+  service_pm();
+  unlock_pm();
+
+  thread_cleanup(NULL);
+  return(NULL);
+}
+
+/*===========================================================================*
+ *			       do_pending_pipe					     *
+ *===========================================================================*/
+PRIVATE void *do_pending_pipe(void *arg)
+{
+  int r, fd_nr;
+  struct filp *f;
+  struct job my_job;
+  tll_access_t locktype;
+
+  my_job = *((struct job *) arg);
+  fp = my_job.j_fp;
+  m_in = my_job.j_m_in;
+
+  lock_proc(fp, 1 /* force lock */);
+
+  fd_nr = fp->fp_block_fd;
+  locktype = (call_nr == READ) ? VNODE_READ : VNODE_WRITE;
+  f = get_filp(fd_nr, locktype);
+  assert(f != NULL);
+
+  r = rw_pipe((call_nr == READ) ? READING : WRITING, who_e, fd_nr, f,
+	      fp->fp_buffer, fp->fp_nbytes);
+
+  if (r != SUSPEND)  /* Do we have results to report? */
+	reply(who_e, r);
+
+  unlock_filp(f);
+
+  thread_cleanup(fp);
+  return(NULL);
+}
+
+/*===========================================================================*
+ *			       do_dummy					     *
+ *===========================================================================*/
+PUBLIC void *do_dummy(void *arg)
+{
+  struct job my_job;
+  int r;
+
+  my_job = *((struct job *) arg);
+  fp = my_job.j_fp;
+  m_in = my_job.j_m_in;
+
+  if ((r = mutex_trylock(&fp->fp_lock)) == 0) {
+	thread_cleanup(fp);
+  } else {
+	/* Proc is busy, let that worker thread carry out the work */
+	thread_cleanup(NULL);
+  }
+  return(NULL);
+}
+
+/*===========================================================================*
+ *			       do_work					     *
+ *===========================================================================*/
+PRIVATE void *do_work(void *arg)
+{
+  int error;
+  struct job my_job;
+
+  my_job = *((struct job *) arg);
+  fp = my_job.j_fp;
+  m_in = my_job.j_m_in;
+
+  lock_proc(fp, 0); /* This proc is busy */
+
+  if (call_nr == MAPDRIVER) {
+	error = do_mapdriver();
+  } else if (call_nr == COMMON_GETSYSINFO) {
+	error = do_getsysinfo();
+  } else if (IS_PFS_VFS_RQ(call_nr)) {
+	if (who_e != PFS_PROC_NR) {
+		printf("VFS: only PFS is allowed to make nested VFS calls\n");
+		error = ENOSYS;
+	} else if (call_nr <= PFS_BASE || call_nr >= PFS_BASE + PFS_NREQS) {
+		error = ENOSYS;
+	} else {
+		call_nr -= PFS_BASE;
+		error = (*pfs_call_vec[call_nr])();
+	}
+  } else {
+	/* We're dealing with a POSIX system call from a normal
+	 * process. Call the internal function that does the work.
+	 */
+	if (call_nr < 0 || call_nr >= NCALLS) {
+		error = ENOSYS;
+	} else if (fp->fp_flags & FP_EXITING) {
+		error = SUSPEND;
+	} else if (fp->fp_pid == PID_FREE) {
+		/* Process vanished before we were able to handle request.
+		 * Replying has no use. Just drop it. */
+		error = SUSPEND;
+	} else {
+#if ENABLE_SYSCALL_STATS
+		calls_stats[call_nr]++;
+#endif
+		error = (*call_vec[call_nr])();
+	}
+  }
+
+  /* Copy the results back to the user and send reply. */
+  if (error != SUSPEND) {
+	if (deadlock_resolving) {
+		struct vmnt *vmp;
+		if ((vmp = find_vmnt(who_e)) != NULL)
+			vmp->m_flags &= ~VMNT_BACKCALL;
+
+		if (fp->fp_wtid == dl_worker.w_tid)
+			deadlock_resolving = 0;
+	}
+	reply(who_e, error );
+  }
+
+  thread_cleanup(fp);
+  return(NULL);
+}
+
+/*===========================================================================*
+ *			       sef_local_startup			     *
+ *===========================================================================*/
+PRIVATE void sef_local_startup()
+{
+  /* Register init callbacks. */
+  sef_setcb_init_fresh(sef_cb_init_fresh);
+  sef_setcb_init_restart(sef_cb_init_fail);
+
+  /* No live update support for now. */
+
+  /* Let SEF perform startup. */
+  sef_startup();
+}
+
+/*===========================================================================*
+ *				sef_cb_init_fresh			     *
+ *===========================================================================*/
+PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info)
+{
+/* Initialize the virtual file server. */
+  int s, i;
+  struct fproc *rfp;
+  message mess;
+  struct rprocpub rprocpub[NR_BOOT_PROCS];
+
+  force_sync = 0;
+
+  /* Initialize proc endpoints to NONE */
+  for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+	rfp->fp_endpoint = NONE;
+	rfp->fp_pid = PID_FREE;
+  }
+
+  /* Initialize the process table with help of the process manager messages.
+   * Expect one message for each system process with its slot number and pid.
+   * When no more processes follow, the magic process number NONE is sent.
+   * Then, stop and synchronize with the PM.
+   */
+  do {
+	if ((s = sef_receive(PM_PROC_NR, &mess)) != OK)
+		panic("VFS: couldn't receive from PM: %d", s);
+
+	if (mess.m_type != PM_INIT)
+		panic("unexpected message from PM: %d", mess.m_type);
+
+	if (NONE == mess.PM_PROC) break;
+
+	rfp = &fproc[mess.PM_SLOT];
+	rfp->fp_flags = FP_NOFLAGS;
+	rfp->fp_pid = mess.PM_PID;
+	rfp->fp_endpoint = mess.PM_PROC;
+	rfp->fp_grant = GRANT_INVALID;
+	rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
+	rfp->fp_realuid = (uid_t) SYS_UID;
+	rfp->fp_effuid = (uid_t) SYS_UID;
+	rfp->fp_realgid = (gid_t) SYS_GID;
+	rfp->fp_effgid = (gid_t) SYS_GID;
+	rfp->fp_umask = ~0;
+  } while (TRUE);			/* continue until process NONE */
+  mess.m_type = OK;			/* tell PM that we succeeded */
+  s = send(PM_PROC_NR, &mess);		/* send synchronization message */
+
+  /* All process table entries have been set. Continue with initialization. */
+  fp = &fproc[_ENDPOINT_P(VFS_PROC_NR)];/* During init all communication with
+					 * FSes is on behalf of myself */
+  init_dmap();			/* Initialize device table. */
+  system_hz = sys_hz();
+
+  /* Map all the services in the boot image. */
+  if ((s = sys_safecopyfrom(RS_PROC_NR, info->rproctab_gid, 0,
+			    (vir_bytes) rprocpub, sizeof(rprocpub), S)) != OK){
+	panic("sys_safecopyfrom failed: %d", s);
+  }
+  for (i = 0; i < NR_BOOT_PROCS; i++) {
+	if (rprocpub[i].in_use) {
+		if ((s = map_service(&rprocpub[i])) != OK) {
+			panic("VFS: unable to map service: %d", s);
+		}
+	}
+  }
+
+  /* Subscribe to driver events for VFS drivers. */
+  if ((s = ds_subscribe("drv\\.vfs\\..*", DSF_INITIAL | DSF_OVERWRITE) != OK)){
+	panic("VFS: can't subscribe to driver events (%d)", s);
+  }
+
+#if DO_SANITYCHECKS
+  FIXME("VFS: DO_SANITYCHECKS is on");
+#endif
+
+  /* Initialize worker threads */
+  for (i = 0; i < NR_WTHREADS; i++)  {
+	worker_init(&workers[i]);
+  }
+  worker_init(&sys_worker); /* exclusive system worker thread */
+  worker_init(&dl_worker); /* exclusive worker thread to resolve deadlocks */
+
+  /* Initialize global locks */
+  if (mthread_mutex_init(&pm_lock, NULL) != 0)
+	panic("VFS: couldn't initialize pm lock mutex");
+  if (mthread_mutex_init(&exec_lock, NULL) != 0)
+	panic("VFS: couldn't initialize exec lock");
+  if (mthread_mutex_init(&bsf_lock, NULL) != 0)
+	panic("VFS: couldn't initialize block special file lock");
+
+  /* Initialize event resources for boot procs and locks for all procs */
+  for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+	assert(mutex_init(&rfp->fp_lock, NULL) == 0);
+#if LOCK_DEBUG
+	rfp->fp_vp_rdlocks = 0;
+	rfp->fp_vmnt_rdlocks = 0;
+#endif
+  }
+
+  init_vnodes();		/* init vnodes */
+  init_vmnts();			/* init vmnt structures */
+  init_select();		/* init select() structures */
+  init_filps();			/* Init filp structures */
+  mount_pfs();			/* mount Pipe File Server */
+  worker_start(do_init_root);	/* mount initial ramdisk as file system root */
+
+  return(OK);
+}
+
+/*===========================================================================*
+ *			       do_init_root				     *
+ *===========================================================================*/
+PRIVATE void *do_init_root(void *arg)
+{
+  struct fproc *rfp;
+  struct job my_job;
+  int r;
+  char *mount_label = "fs_imgrd"; /* FIXME: obtain this from RS */
+
+  my_job = *((struct job *) arg);
+  fp = my_job.j_fp;
+
+  lock_proc(fp, 1 /* force lock */); /* This proc is busy */
+  lock_pm();
+
+  /* Initialize process directories. mount_fs will set them to the correct
+   * values */
+  for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+	FD_ZERO(&(rfp->fp_filp_inuse));
+	rfp->fp_rd = NULL;
+	rfp->fp_wd = NULL;
+  }
+
+  if ((r = mount_fs(DEV_IMGRD, "/", MFS_PROC_NR, 0, mount_label)) != OK)
+	panic("Failed to initialize root");
+
+  unlock_pm();
+  thread_cleanup(fp);
+  return(NULL);
+}
+
+/*===========================================================================*
+ *				lock_proc				     *
+ *===========================================================================*/
+PUBLIC void lock_proc(struct fproc *rfp, int force_lock)
+{
+  int r;
+  message org_m_in;
+  struct fproc *org_fp;
+  struct worker_thread *org_self;
+
+  r = mutex_trylock(&rfp->fp_lock);
+
+  /* Were we supposed to obtain this lock immediately? */
+  if (force_lock) {
+	assert(r == 0);
+	return;
+  }
+
+  if (r == 0) return;
+
+  org_m_in = m_in;
+  org_fp = fp;
+  org_self = self;
+  assert(mutex_lock(&rfp->fp_lock) == 0);
+  m_in = org_m_in;
+  fp = org_fp;
+  self = org_self;
+}
+
+/*===========================================================================*
+ *				unlock_proc				     *
+ *===========================================================================*/
+PUBLIC void unlock_proc(struct fproc *rfp)
+{
+  int r;
+
+  if ((r = mutex_unlock(&rfp->fp_lock)) != 0)
+	panic("Failed to unlock: %d", r);
+}
+
+/*===========================================================================*
+ *				thread_cleanup				     *
+ *===========================================================================*/
+PRIVATE void thread_cleanup_f(struct fproc *rfp, char *f, int l)
+{
+/* Clean up worker thread. Skip parts if this thread is not associated
+ * with a particular process (i.e., rfp is NULL) */
+
+  if (verbose) printf("AVFS: thread %d is cleaning up for fp=%p (%s:%d)\n",
+			mthread_self(), rfp, f, l);
+
+  assert(mthread_self() != -1);
+
+#if LOCK_DEBUG
+  if (rfp != NULL) {
+	check_filp_locks_by_me();
+	check_vnode_locks_by_me(rfp);
+	check_vmnt_locks_by_me(rfp);
+  }
+#endif
+
+  if (rfp != NULL && rfp->fp_flags & FP_PM_PENDING) {	/* Postponed PM call */
+	m_in = rfp->fp_job.j_m_in;
+	rfp->fp_flags &= ~FP_PM_PENDING;
+	service_pm_postponed();
+  }
+
+#if LOCK_DEBUG
+  if (rfp != NULL) {
+	check_filp_locks_by_me();
+	check_vnode_locks_by_me(rfp);
+	check_vmnt_locks_by_me(rfp);
+  }
+#endif
+
+  if (rfp != NULL) unlock_proc(rfp);
+
+#if 0
+  mthread_exit(NULL);
+#endif
+}
+
+/*===========================================================================*
+ *				get_work				     *
+ *===========================================================================*/
+PRIVATE void get_work()
+{
+  /* Normally wait for new input.  However, if 'reviving' is
+   * nonzero, a suspended process must be awakened.
+   */
+  int r, found_one, proc_p;
+  register struct fproc *rp;
+
+  if (verbose) printf("VFS: get_work looking for work\n");
+
+  while (reviving != 0) {
+	found_one = FALSE;
+
+	/* Find a suspended process. */
+	for (rp = &fproc[0]; rp < &fproc[NR_PROCS]; rp++)
+		if (rp->fp_pid != PID_FREE && (rp->fp_flags & FP_REVIVED)) {
+			found_one = TRUE; /* Found a suspended process */
+			if (unblock(rp))
+				return;	/* So main loop can process job */
+			send_work();
+		}
+
+	if (!found_one)	/* Consistency error */
+		panic("VFS: get_work couldn't revive anyone");
+  }
+
+  for(;;) {
+	/* Normal case.  No one to revive. Get a useful request. */
+	if ((r = sef_receive(ANY, &m_in)) != OK) {
+		panic("VFS: sef_receive error: %d", r);
+	}
+
+	proc_p = _ENDPOINT_P(m_in.m_source);
+	if (proc_p < 0) fp = NULL;
+	else fp = &fproc[proc_p];
+
+	if (m_in.m_type == EDEADSRCDST) return;	/* Failed 'sendrec' */
+
+	if (verbose) printf("AVFS: got work from %d (fp=%p)\n", m_in.m_source,
+			    fp);
+
+	/* Negative who_p is never used to access the fproc array. Negative
+	 * numbers (kernel tasks) are treated in a special way.
+	 */
+	if (who_p >= (int)(sizeof(fproc) / sizeof(struct fproc)))
+		panic("receive process out of range: %d", who_p);
+	if (who_p >= 0 && fproc[who_p].fp_endpoint == NONE) {
+		printf("VFS: ignoring request from %d, endpointless slot %d (%d)\n",
+			m_in.m_source, who_p, m_in.m_type);
+		continue;
+	}
+
+	/* Internal consistency check; our mental image of process numbers and
+	 * endpoints must match with how the rest of the system thinks of them.
+	 */
+	if (who_p >= 0 && fproc[who_p].fp_endpoint != who_e) {
+		if (fproc[who_p].fp_endpoint == NONE)
+			printf("slot unknown even\n");
+
+		printf("VFS: receive endpoint inconsistent (source %d, who_p "
+			"%d, stored ep %d, who_e %d).\n", m_in.m_source, who_p,
+			fproc[who_p].fp_endpoint, who_e);
+		panic("VFS: inconsistent endpoint ");
+	}
+
+	return;
+  }
+}
+
+
+/*===========================================================================*
+ *				reply					     *
+ *===========================================================================*/
+PUBLIC void reply(whom, result)
+int whom;			/* process to reply to */
+int result;			/* result of the call (usually OK or error #) */
+{
+/* Send a reply to a user process.  If the send fails, just ignore it. */
+  int r;
+
+  m_out.reply_type = result;
+  r = sendnb(whom, &m_out);
+  if (r != OK) {
+	printf("VFS: couldn't send reply %d to %d: %d\n", result, whom, r);
+	panic("Yikes %d", call_nr);
+  }
+}
+
+/*===========================================================================*
+ *				service_pm_postponed			     *
+ *===========================================================================*/
+PRIVATE void service_pm_postponed(void)
+{
+  int r;
+  vir_bytes pc;
+
+#if 0
+  printf("executing postponed: ");
+  if (call_nr == PM_EXEC)	printf("PM_EXEC");
+  if (call_nr == PM_EXIT)	printf("PM_EXIT");
+  if (call_nr == PM_DUMPCORE)	printf("PM_DUMPCORE");
+  printf("\n");
+#endif
+
+  switch(call_nr) {
+    case PM_EXEC:
+	r = pm_exec(m_in.PM_PROC, m_in.PM_PATH, m_in.PM_PATH_LEN,
+		    m_in.PM_FRAME, m_in.PM_FRAME_LEN, &pc);
+
+	/* Reply status to PM */
+	m_out.m_type = PM_EXEC_REPLY;
+	m_out.PM_PROC = m_in.PM_PROC;
+	m_out.PM_PC = (void*)pc;
+	m_out.PM_STATUS = r;
+
+	break;
+
+    case PM_EXIT:
+	pm_exit(m_in.PM_PROC);
+
+	/* Reply dummy status to PM for synchronization */
+	m_out.m_type = PM_EXIT_REPLY;
+	m_out.PM_PROC = m_in.PM_PROC;
+
+	break;
+
+    case PM_DUMPCORE:
+	r = pm_dumpcore(m_in.PM_PROC,
+			NULL /* (struct mem_map *) m_in.PM_SEGPTR */);
+
+	/* Reply status to PM */
+	m_out.m_type = PM_CORE_REPLY;
+	m_out.PM_PROC = m_in.PM_PROC;
+	m_out.PM_STATUS = r;
+
+	break;
+
+    default:
+	panic("Unhandled postponed PM call %d", m_in.m_type);
+  }
+
+  r = send(PM_PROC_NR, &m_out);
+  if (r != OK)
+	panic("service_pm_postponed: send failed: %d", r);
+}
+
+/*===========================================================================*
+ *				service_pm				     *
+ *===========================================================================*/
+PRIVATE void service_pm()
+{
+  int r, slot;
+
+  if (verbose) printf("service_pm: %d (%d)\n", call_nr, mthread_self());
+  switch (call_nr) {
+    case PM_SETUID:
+	pm_setuid(m_in.PM_PROC, m_in.PM_EID, m_in.PM_RID);
+
+	m_out.m_type = PM_SETUID_REPLY;
+	m_out.PM_PROC = m_in.PM_PROC;
+
+	break;
+
+    case PM_SETGID:
+	pm_setgid(m_in.PM_PROC, m_in.PM_EID, m_in.PM_RID);
+
+	m_out.m_type = PM_SETGID_REPLY;
+	m_out.PM_PROC = m_in.PM_PROC;
+
+	break;
+
+    case PM_SETSID:
+	pm_setsid(m_in.PM_PROC);
+
+	m_out.m_type = PM_SETSID_REPLY;
+	m_out.PM_PROC = m_in.PM_PROC;
+
+	break;
+
+    case PM_EXEC:
+    case PM_EXIT:
+    case PM_DUMPCORE:
+	okendpt(m_in.PM_PROC, &slot);
+	fp = &fproc[slot];
+
+	assert(!(fp->fp_flags & FP_PENDING));
+	fp->fp_job.j_m_in = m_in;
+	fp->fp_flags |= FP_PM_PENDING;
+
+#if 0
+	printf("Postponing: ");
+	if (call_nr == PM_EXEC)		printf("PM_EXEC");
+	if (call_nr == PM_EXIT)		printf("PM_EXIT");
+	if (call_nr == PM_DUMPCORE)	printf("PM_DUMPCORE");
+	printf("\n");
+#endif
+
+        /* PM requests on behalf of a proc are handled after the system call
+         * that might be in progress for that proc has finished. If the proc
+         * is not busy, we start a dummy call */
+	if (!(fp->fp_flags & FP_PENDING) && mutex_trylock(&fp->fp_lock) == 0) {
+		mutex_unlock(&fp->fp_lock);
+		worker_start(do_dummy);
+		yield();
+        }
+
+	return;
+
+    case PM_FORK:
+    case PM_SRV_FORK:
+	pm_fork(m_in.PM_PPROC, m_in.PM_PROC, m_in.PM_CPID);
+
+	m_out.m_type = (call_nr == PM_FORK) ? PM_FORK_REPLY : PM_SRV_FORK_REPLY;
+	m_out.PM_PROC = m_in.PM_PROC;
+
+	break;
+    case PM_SETGROUPS:
+	pm_setgroups(m_in.PM_PROC, m_in.PM_GROUP_NO, m_in.PM_GROUP_ADDR);
+
+	m_out.m_type = PM_SETGROUPS_REPLY;
+	m_out.PM_PROC = m_in.PM_PROC;
+
+	break;
+
+    case PM_UNPAUSE:
+	unpause(m_in.PM_PROC);
+
+	m_out.m_type = PM_UNPAUSE_REPLY;
+	m_out.PM_PROC = m_in.PM_PROC;
+
+	break;
+
+    case PM_REBOOT:
+	pm_reboot();
+
+	/* Reply dummy status to PM for synchronization */
+	m_out.m_type = PM_REBOOT_REPLY;
+
+	break;
+
+    default:
+	printf("VFS: don't know how to handle PM request %d\n", call_nr);
+
+	return;
+  }
+
+  r = send(PM_PROC_NR, &m_out);
+  if (r != OK)
+	panic("service_pm: send failed: %d", r);
+
+}
+
+
+/*===========================================================================*
+ *				unblock					     *
+ *===========================================================================*/
+PRIVATE int unblock(rfp)
+struct fproc *rfp;
+{
+  int blocked_on;
+
+  fp = rfp;
+  blocked_on = rfp->fp_blocked_on;
+  m_in.m_type = rfp->fp_block_callnr;
+  m_in.fd = rfp->fp_block_fd;
+  m_in.buffer = rfp->fp_buffer;
+  m_in.nbytes = rfp->fp_nbytes;
+
+  rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;	/* no longer blocked */
+  rfp->fp_flags &= ~FP_REVIVED;
+  reviving--;
+  assert(reviving >= 0);
+
+  /* This should be a pipe I/O, not a device I/O. If it is, it'll 'leak'
+   * grants.
+   */
+  assert(!GRANT_VALID(rfp->fp_grant));
+
+  /* Pending pipe reads/writes can be handled directly */
+  if (blocked_on == FP_BLOCKED_ON_PIPE) {
+	worker_start(do_pending_pipe);
+	yield();	/* Give thread a chance to run */
+	return(0);	/* Retrieve more work */
+  }
+
+  return(1);	/* We've unblocked a process */
+}
diff --git a/servers/avfs/misc.c b/servers/avfs/misc.c
new file mode 100644
index 000000000..0f0cba7cd
--- /dev/null
+++ b/servers/avfs/misc.c
@@ -0,0 +1,617 @@
+/* This file contains a collection of miscellaneous procedures.  Some of them
+ * perform simple system calls.  Some others do a little part of system calls
+ * that are mostly performed by the Memory Manager.
+ *
+ * The entry points into this file are
+ *   do_dup:	  perform the DUP system call
+ *   do_fcntl:	  perform the FCNTL system call
+ *   do_sync:	  perform the SYNC system call
+ *   do_fsync:	  perform the FSYNC system call
+ *   pm_reboot:	  sync disks and prepare for shutdown
+ *   pm_fork:	  adjust the tables after PM has performed a FORK system call
+ *   do_exec:	  handle files with FD_CLOEXEC on after PM has done an EXEC
+ *   do_exit:	  a process has exited; note that in the tables
+ *   do_set:	  set uid or gid for some process
+ *   do_revive:	  revive a process that was waiting for something (e.g. TTY)
+ *   do_svrctl:	  file system control
+ *   do_getsysinfo:	request copy of FS data structure
+ *   pm_dumpcore: create a core dump
+ */
+
+#include "fs.h"
+#include <fcntl.h>
+#include <assert.h>
+#include <unistd.h>
+#include <string.h>
+#include <minix/callnr.h>
+#include <minix/safecopies.h>
+#include <minix/endpoint.h>
+#include <minix/com.h>
+#include <minix/sysinfo.h>
+#include <minix/u64.h>
+#include <sys/ptrace.h>
+#include <sys/svrctl.h>
+#include "file.h"
+#include "fproc.h"
+#include "dmap.h"
+#include <minix/vfsif.h>
+#include "vnode.h"
+#include "vmnt.h"
+#include "param.h"
+
+#define CORE_NAME	"core"
+#define CORE_MODE	0777	/* mode to use on core image files */
+
+#if ENABLE_SYSCALL_STATS
+PUBLIC unsigned long calls_stats[NCALLS];
+#endif
+
+FORWARD _PROTOTYPE( void free_proc, (struct fproc *freed, int flags)	);
+/*
+FORWARD _PROTOTYPE( int dumpcore, (int proc_e, struct mem_map *seg_ptr)	);
+FORWARD _PROTOTYPE( int write_bytes, (struct inode *rip, off_t off,
+				      char *buf, size_t bytes)		);
+FORWARD _PROTOTYPE( int write_seg, (struct inode *rip, off_t off, int proc_e,
+			int seg, off_t seg_off, phys_bytes seg_bytes)	);
+*/
+
+/*===========================================================================*
+ *				do_getsysinfo				     *
+ *===========================================================================*/
+PUBLIC int do_getsysinfo()
+{
+  vir_bytes src_addr, dst_addr;
+  size_t len;
+
+  /* Only su may call do_getsysinfo. This call may leak information (and is not
+   * stable enough to be part of the API/ABI).
+   */
+
+  if (!super_user) return(EPERM);
+
+  /* This call should no longer be used by user applications. In the future,
+   * requests from non-system processes should be denied. For now, just warn.
+   */
+  if (call_nr == GETSYSINFO) {
+	printf("VFS: obsolete call of do_getsysinfo() by proc %d\n",
+		fp->fp_endpoint);
+  }
+
+  switch(m_in.info_what) {
+    case SI_PROC_TAB:
+	src_addr = (vir_bytes) fproc;
+	len = sizeof(struct fproc) * NR_PROCS;
+	break;
+    case SI_DMAP_TAB:
+	src_addr = (vir_bytes) dmap;
+	len = sizeof(struct dmap) * NR_DEVICES;
+	break;
+#if ENABLE_SYSCALL_STATS
+    case SI_CALL_STATS:
+	src_addr = (vir_bytes) calls_stats;
+	len = sizeof(calls_stats);
+	break;
+#endif
+    default:
+	return(EINVAL);
+  }
+
+  dst_addr = (vir_bytes) m_in.info_where;
+  return sys_datacopy(SELF, src_addr, who_e, dst_addr, len);
+}
+
+/*===========================================================================*
+ *				do_dup					     *
+ *===========================================================================*/
+PUBLIC int do_dup()
+{
+/* Perform the dup(fd) or dup2(fd,fd2) system call. These system calls are
+ * obsolete.  In fact, it is not even possible to invoke them using the
+ * current library because the library routines call fcntl().  They are
+ * provided to permit old binary programs to continue to run.
+ */
+
+  register int rfd;
+  register struct filp *f;
+  int r = OK;
+
+  /* Is the file descriptor valid? */
+  rfd = m_in.fd & ~DUP_MASK;		/* kill off dup2 bit, if on */
+  if ((f = get_filp(rfd, VNODE_READ)) == NULL) return(err_code);
+
+  /* Distinguish between dup and dup2. */
+  if (m_in.fd == rfd) {			/* bit not on */
+	/* dup(fd) */
+	r = get_fd(0, 0, &m_in.fd2, NULL);
+  } else {
+	/* dup2(old_fd, new_fd) */
+	if (m_in.fd2 < 0 || m_in.fd2 >= OPEN_MAX) {
+		r = EBADF;
+	} else if (rfd == m_in.fd2) {	/* ignore the call: dup2(x, x) */
+		r = m_in.fd2;
+	} else {
+		/* All is fine, close new_fd if necessary */
+		m_in.fd = m_in.fd2;	/* prepare to close fd2 */
+		unlock_filp(f);		/* or it might deadlock on do_close */
+		(void) do_close();	/* cannot fail */
+		f = get_filp(rfd, VNODE_READ); /* lock old_fd again */
+	}
+  }
+
+  if (r == OK) {
+	/* Success. Set up new file descriptors. */
+	f->filp_count++;
+	fp->fp_filp[m_in.fd2] = f;
+	FD_SET(m_in.fd2, &fp->fp_filp_inuse);
+	r = m_in.fd2;
+  }
+
+  unlock_filp(f);
+  return(r);
+}
+
+/*===========================================================================*
+ *				do_fcntl				     *
+ *===========================================================================*/
+PUBLIC int do_fcntl()
+{
+/* Perform the fcntl(fd, request, ...) system call. */
+
+  register struct filp *f;
+  int new_fd, fl, r = OK;
+  tll_access_t locktype;
+
+  /* Is the file descriptor valid? */
+  locktype = (m_in.request == F_FREESP) ? VNODE_WRITE : VNODE_READ;
+  if ((f = get_filp(m_in.fd, locktype)) == NULL) return(err_code);
+
+  switch (m_in.request) {
+    case F_DUPFD:
+	/* This replaces the old dup() system call. */
+	if (m_in.addr < 0 || m_in.addr >= OPEN_MAX) r = EINVAL;
+	else if ((r = get_fd(m_in.addr, 0, &new_fd, NULL)) == OK) {
+		f->filp_count++;
+		fp->fp_filp[new_fd] = f;
+		r = new_fd;
+	}
+	break;
+
+    case F_GETFD:
+	/* Get close-on-exec flag (FD_CLOEXEC in POSIX Table 6-2). */
+	r = FD_ISSET(m_in.fd, &fp->fp_cloexec_set) ? FD_CLOEXEC : 0;
+	break;
+
+    case F_SETFD:
+	/* Set close-on-exec flag (FD_CLOEXEC in POSIX Table 6-2). */
+	if(m_in.addr & FD_CLOEXEC)
+		FD_SET(m_in.fd, &fp->fp_cloexec_set);
+	else
+		FD_CLR(m_in.fd, &fp->fp_cloexec_set);
+	break;
+
+    case F_GETFL:
+	/* Get file status flags (O_NONBLOCK and O_APPEND). */
+	fl = f->filp_flags & (O_NONBLOCK | O_APPEND | O_ACCMODE);
+	r = fl;
+	break;
+
+    case F_SETFL:
+	/* Set file status flags (O_NONBLOCK and O_APPEND). */
+	fl = O_NONBLOCK | O_APPEND | O_REOPEN;
+	f->filp_flags = (f->filp_flags & ~fl) | (m_in.addr & fl);
+	break;
+
+    case F_GETLK:
+    case F_SETLK:
+    case F_SETLKW:
+	/* Set or clear a file lock. */
+	r = lock_op(f, m_in.request);
+	break;
+
+    case F_FREESP:
+     {
+	/* Free a section of a file. Preparation is done here, actual freeing
+	 * in freesp_inode().
+	 */
+	off_t start, end;
+	struct flock flock_arg;
+	signed long offset;
+
+	/* Check if it's a regular file. */
+	if ((f->filp_vno->v_mode & I_TYPE) != I_REGULAR) r = EINVAL;
+	else if (!(f->filp_mode & W_BIT)) r = EBADF;
+	else
+		/* Copy flock data from userspace. */
+		r = sys_datacopy(who_e, (vir_bytes) m_in.name1, SELF,
+				 (vir_bytes) &flock_arg,
+				 (phys_bytes) sizeof(flock_arg));
+
+	if (r != OK) break;
+
+	/* Convert starting offset to signed. */
+	offset = (signed long) flock_arg.l_start;
+
+	/* Figure out starting position base. */
+	switch(flock_arg.l_whence) {
+	  case SEEK_SET: start = 0; break;
+	  case SEEK_CUR:
+		if (ex64hi(f->filp_pos) != 0)
+			panic("do_fcntl: position in file too high");
+		start = ex64lo(f->filp_pos);
+		break;
+	  case SEEK_END: start = f->filp_vno->v_size; break;
+	  default: r = EINVAL;
+	}
+	if (r != OK) break;
+
+	/* Check for overflow or underflow. */
+	if (offset > 0 && start + offset < start) r = EINVAL;
+	else if (offset < 0 && start + offset > start) r = EINVAL;
+	else {
+		start += offset;
+		if (start < 0) r = EINVAL;
+	}
+	if (r != OK) break;
+
+	if (flock_arg.l_len != 0) {
+		if (start >= f->filp_vno->v_size) r = EINVAL;
+		else if ((end = start + flock_arg.l_len) <= start) r = EINVAL;
+		else if (end > f->filp_vno->v_size) end = f->filp_vno->v_size;
+	} else {
+                end = 0;
+	}
+	if (r != OK) break;
+
+	r = req_ftrunc(f->filp_vno->v_fs_e, f->filp_vno->v_inode_nr,start,end);
+
+	if (r == OK && flock_arg.l_len == 0)
+		f->filp_vno->v_size = start;
+
+	break;
+     }
+
+    default:
+	r = EINVAL;
+  }
+
+  unlock_filp(f);
+  return(r);
+}
+
+/*===========================================================================*
+ *				do_sync					     *
+ *===========================================================================*/
+PUBLIC int do_sync()
+{
+  struct vmnt *vmp;
+  for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) {
+	lock_vmnt(vmp, VMNT_EXCL);
+	if (vmp->m_dev != NO_DEV && vmp->m_fs_e != NONE)
+		req_sync(vmp->m_fs_e);
+	unlock_vmnt(vmp);
+  }
+
+  return(OK);
+}
+
+/*===========================================================================*
+ *				do_fsync				     *
+ *===========================================================================*/
+PUBLIC int do_fsync()
+{
+/* Perform the fsync() system call. For now, don't be unnecessarily smart. */
+  struct filp *rfilp;
+  struct vmnt *vmp;
+  dev_t dev;
+
+  if ((rfilp = get_filp(m_in.m1_i1, VNODE_READ)) == NULL) return(err_code);
+  dev = rfilp->filp_vno->v_dev;
+  for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) {
+	lock_vmnt(vmp, VMNT_EXCL);
+	if (vmp->m_dev != NO_DEV && vmp->m_dev == dev && vmp->m_fs_e != NONE)
+		req_sync(vmp->m_fs_e);
+	unlock_vmnt(vmp);
+  }
+
+  unlock_filp(rfilp);
+
+  return(OK);
+}
+
+/*===========================================================================*
+ *				pm_reboot				     *
+ *===========================================================================*/
+PUBLIC void pm_reboot()
+{
+  /* Perform the VFS side of the reboot call. */
+  int i;
+  struct fproc *rfp;
+
+  do_sync();
+
+  /* Do exit processing for all leftover processes and servers,
+   * but don't actually exit them (if they were really gone, PM
+   * will tell us about it).
+   */
+  for (i = 0; i < NR_PROCS; i++) {
+	/* Don't just free the proc right away, but let it finish what it was
+	 * doing first */
+	rfp = &fproc[i];
+	if (rfp->fp_endpoint != NONE) {
+		lock_proc(rfp, 0);
+		free_proc(rfp, 0);
+		unlock_proc(rfp);
+	}
+  }
+
+  unmount_all();
+}
+
+/*===========================================================================*
+ *				pm_fork					     *
+ *===========================================================================*/
+PUBLIC void pm_fork(pproc, cproc, cpid)
+int pproc;	/* Parent process */
+int cproc;	/* Child process */
+int cpid;	/* Child process id */
+{
+/* Perform those aspects of the fork() system call that relate to files.
+ * In particular, let the child inherit its parent's file descriptors.
+ * The parent and child parameters tell who forked off whom. The file
+ * system uses the same slot numbers as the kernel.  Only PM makes this call.
+ */
+
+  register struct fproc *cp, *pp;
+  int i, parentno, childno;
+  mutex_t c_fp_lock;
+
+  /* Check up-to-dateness of fproc. */
+  okendpt(pproc, &parentno);
+
+  /* PM gives child endpoint, which implies process slot information.
+   * Don't call isokendpt, because that will verify if the endpoint
+   * number is correct in fproc, which it won't be.
+   */
+  childno = _ENDPOINT_P(cproc);
+  if (childno < 0 || childno >= NR_PROCS)
+	panic("VFS: bogus child for forking: %d", m_in.child_endpt);
+  if (fproc[childno].fp_pid != PID_FREE)
+	panic("VFS: forking on top of in-use child: %d", childno);
+
+  /* Copy the parent's fproc struct to the child. */
+  /* However, the mutex variables belong to a slot and must stay the same. */
+  c_fp_lock = fproc[childno].fp_lock;
+  fproc[childno] = fproc[parentno];
+  fproc[childno].fp_lock = c_fp_lock;
+
+  /* Increase the counters in the 'filp' table. */
+  cp = &fproc[childno];
+  pp = &fproc[parentno];
+
+  for (i = 0; i < OPEN_MAX; i++)
+	if (cp->fp_filp[i] != NULL) cp->fp_filp[i]->filp_count++;
+
+  /* Fill in new process and endpoint id. */
+  cp->fp_pid = cpid;
+  cp->fp_endpoint = cproc;
+
+  /* A forking process never has an outstanding grant, as it isn't blocking on
+   * I/O. */
+  if(GRANT_VALID(pp->fp_grant)) {
+	panic("VFS: fork: pp (endpoint %d) has grant %d\n", pp->fp_endpoint,
+	       pp->fp_grant);
+  }
+  if(GRANT_VALID(cp->fp_grant)) {
+	panic("VFS: fork: cp (endpoint %d) has grant %d\n", cp->fp_endpoint,
+	       cp->fp_grant);
+  }
+
+  /* A child is not a process leader, not being revived, etc. */
+  cp->fp_flags = FP_NOFLAGS;
+
+  /* Record the fact that both root and working dir have another user. */
+  if (cp->fp_rd) dup_vnode(cp->fp_rd);
+  if (cp->fp_wd) dup_vnode(cp->fp_wd);
+}
+
+/*===========================================================================*
+ *				free_proc				     *
+ *===========================================================================*/
+PRIVATE void free_proc(struct fproc *exiter, int flags)
+{
+  int i;
+  register struct fproc *rfp;
+  register struct filp *rfilp;
+  register struct vnode *vp;
+  dev_t dev;
+
+  if (exiter->fp_endpoint == NONE)
+	panic("free_proc: already free");
+
+  if (fp_is_blocked(exiter))
+	unpause(exiter->fp_endpoint);
+
+  /* Loop on file descriptors, closing any that are open. */
+  for (i = 0; i < OPEN_MAX; i++) {
+	(void) close_fd(exiter, i);
+  }
+
+  /* Check if any process is SUSPENDed on this driver.
+   * If a driver exits, unmap its entries in the dmap table.
+   * (unmapping has to be done after the first step, because the
+   * dmap table is used in the first step.)
+   */
+  unsuspend_by_endpt(exiter->fp_endpoint);
+
+  /* Release root and working directories. */
+  if (exiter->fp_rd) { put_vnode(exiter->fp_rd); exiter->fp_rd = NULL; }
+  if (exiter->fp_wd) { put_vnode(exiter->fp_wd); exiter->fp_wd = NULL; }
+
+  /* The rest of these actions is only done when processes actually exit. */
+  if (!(flags & FP_EXITING)) return;
+
+  /* Invalidate endpoint number for error and sanity checks. */
+  exiter->fp_endpoint = NONE;
+  exiter->fp_flags |= FP_EXITING;
+
+  /* If a session leader exits and it has a controlling tty, then revoke
+   * access to its controlling tty from all other processes using it.
+   */
+  if ((exiter->fp_flags & FP_SESLDR) && exiter->fp_tty != 0) {
+      dev = exiter->fp_tty;
+      for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+	  if(rfp->fp_pid == PID_FREE) continue;
+          if (rfp->fp_tty == dev) rfp->fp_tty = 0;
+
+          for (i = 0; i < OPEN_MAX; i++) {
+		if ((rfilp = rfp->fp_filp[i]) == NULL) continue;
+		if (rfilp->filp_mode == FILP_CLOSED) continue;
+		vp = rfilp->filp_vno;
+		if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL) continue;
+		if ((dev_t) vp->v_sdev != dev) continue;
+		lock_filp(rfilp, VNODE_READ);
+		(void) dev_close(dev, rfilp-filp); /* Ignore any errors, even
+						    * SUSPEND. */
+
+		rfilp->filp_mode = FILP_CLOSED;
+		unlock_filp(rfilp);
+          }
+      }
+  }
+
+  /* Exit done. Mark slot as free. */
+  exiter->fp_pid = PID_FREE;
+  if (exiter->fp_flags & FP_PENDING)
+	pending--;	/* No longer pending job, not going to do it */
+  exiter->fp_flags = FP_NOFLAGS;
+}
+
+/*===========================================================================*
+ *				pm_exit					     *
+ *===========================================================================*/
+PUBLIC void pm_exit(proc)
+int proc;
+{
+/* Perform the file system portion of the exit(status) system call. */
+  int exitee_p;
+
+  /* Nevertheless, pretend that the call came from the user. */
+  okendpt(proc, &exitee_p);
+  fp = &fproc[exitee_p];
+  free_proc(fp, FP_EXITING);
+}
+
+/*===========================================================================*
+ *				pm_setgid				     *
+ *===========================================================================*/
+PUBLIC void pm_setgid(proc_e, egid, rgid)
+int proc_e;
+int egid;
+int rgid;
+{
+  register struct fproc *tfp;
+  int slot;
+
+  okendpt(proc_e, &slot);
+  tfp = &fproc[slot];
+
+  tfp->fp_effgid =  egid;
+  tfp->fp_realgid = rgid;
+}
+
+
+/*===========================================================================*
+ *				pm_setgroups				     *
+ *===========================================================================*/
+PUBLIC void pm_setgroups(proc_e, ngroups, groups)
+int proc_e;
+int ngroups;
+gid_t *groups;
+{
+  struct fproc *rfp;
+  int slot;
+
+  okendpt(proc_e, &slot);
+  rfp = &fproc[slot];
+  if (ngroups * sizeof(gid_t) > sizeof(rfp->fp_sgroups))
+	panic("VFS: pm_setgroups: too much data to copy");
+  if (sys_datacopy(who_e, (vir_bytes) groups, SELF, (vir_bytes) rfp->fp_sgroups,
+		   ngroups * sizeof(gid_t)) == OK) {
+	rfp->fp_ngroups = ngroups;
+  } else
+	panic("VFS: pm_setgroups: datacopy failed");
+}
+
+
+/*===========================================================================*
+ *				pm_setuid				     *
+ *===========================================================================*/
+PUBLIC void pm_setuid(proc_e, euid, ruid)
+int proc_e;
+int euid;
+int ruid;
+{
+  struct fproc *tfp;
+  int slot;
+
+  okendpt(proc_e, &slot);
+  tfp = &fproc[slot];
+
+  tfp->fp_effuid =  euid;
+  tfp->fp_realuid = ruid;
+}
+
+/*===========================================================================*
+ *				do_svrctl				     *
+ *===========================================================================*/
+PUBLIC int do_svrctl()
+{
+  switch (m_in.svrctl_req) {
+  /* No control request implemented yet. */
+    default:
+	return(EINVAL);
+  }
+}
+
+/*===========================================================================*
+ *				pm_dumpcore				     *
+ *===========================================================================*/
+PUBLIC int pm_dumpcore(proc_e, seg_ptr)
+int proc_e;
+struct mem_map *seg_ptr;
+{
+  int slot;
+
+  okendpt(proc_e, &slot);
+  free_proc(&fproc[slot], FP_EXITING);
+  return(OK);
+}
+
+/*===========================================================================*
+ *				 ds_event				     *
+ *===========================================================================*/
+PUBLIC void ds_event()
+{
+  char key[DS_MAX_KEYLEN];
+  char *drv_prefix = "drv.vfs.";
+  u32_t value;
+  int type, r;
+  endpoint_t owner_endpoint;
+
+  /* Get the event and the owner from DS. */
+  if ((r = ds_check(key, &type, &owner_endpoint)) != OK) {
+	if(r != ENOENT) printf("VFS: ds_event: ds_check failed: %d\n", r);
+	return;
+  }
+  if ((r = ds_retrieve_u32(key, &value)) != OK) {
+	printf("VFS: ds_event: ds_retrieve_u32 failed\n");
+	return;
+  }
+
+  /* Only check for VFS driver up events. */
+  if (strncmp(key, drv_prefix, sizeof(drv_prefix)) || value != DS_DRIVER_UP)
+	return;
+
+  /* Perform up. */
+  dmap_endpt_up(owner_endpoint);
+}
diff --git a/servers/avfs/mount.c b/servers/avfs/mount.c
new file mode 100644
index 000000000..00f8c8258
--- /dev/null
+++ b/servers/avfs/mount.c
@@ -0,0 +1,605 @@
+/* This file performs the MOUNT and UMOUNT system calls.
+ *
+ * The entry points into this file are
+ *   do_fsready:	perform the FS_READY system call
+ *   do_mount:		perform the MOUNT system call
+ *   do_umount:		perform the UMOUNT system call
+ *   unmount:		unmount a file system
+ */
+
+#include "fs.h"
+#include <fcntl.h>
+#include <string.h>
+#include <minix/callnr.h>
+#include <minix/com.h>
+#include <minix/keymap.h>
+#include <minix/const.h>
+#include <minix/endpoint.h>
+#include <minix/syslib.h>
+#include <minix/bitmap.h>
+#include <minix/ds.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <dirent.h>
+#include <assert.h>
+#include "file.h"
+#include "fproc.h"
+#include "dmap.h"
+#include <minix/vfsif.h>
+#include "vnode.h"
+#include "vmnt.h"
+#include "path.h"
+#include "param.h"
+
+/* Allow the root to be replaced before the first 'real' mount. */
+PRIVATE int have_root = 0;
+
+/* Bitmap of in-use "none" pseudo devices. */
+PRIVATE bitchunk_t nonedev[BITMAP_CHUNKS(NR_NONEDEVS)] = { 0 };
+
+#define alloc_nonedev(dev) SET_BIT(nonedev, minor(dev) - 1)
+#define free_nonedev(dev) UNSET_BIT(nonedev, minor(dev) - 1)
+
+FORWARD _PROTOTYPE( dev_t name_to_dev, (int allow_mountpt,
+					char path[PATH_MAX+1])		);
+FORWARD _PROTOTYPE( int is_nonedev, (dev_t dev)				);
+FORWARD _PROTOTYPE( dev_t find_free_nonedev, (void)			);
+FORWARD _PROTOTYPE( void update_bspec, (dev_t dev, endpoint_t fs_e,
+				      int send_drv_e)			);
+
+/*===========================================================================*
+ *				update_bspec				     *
+ *===========================================================================*/
+PRIVATE void update_bspec(dev_t dev, endpoint_t fs_e, int send_drv_e)
+{
+/* Update all block special files for a certain device, to use a new FS endpt
+ * to route raw block I/O requests through.
+ */
+  struct vnode *vp;
+  struct dmap *dp;
+  int r, major;
+
+  for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp)
+	if (vp->v_ref_count > 0 && S_ISBLK(vp->v_mode) && vp->v_sdev == dev) {
+		vp->v_bfs_e = fs_e;
+		if (send_drv_e) {
+			major = major(dev);
+			if (major < 0 || major >= NR_DEVICES) {
+				/* Can't update driver endpoint for out of
+				 * range major */
+				continue;
+			}
+			dp = &dmap[major(dev)];
+			if (dp->dmap_driver == NONE) {
+				/* Can't send new driver endpoint for
+				 * vanished driver */
+				printf("VFS: can't send new driver endpt\n");
+				continue;
+			}
+
+			if ((r = req_newdriver(fs_e, vp->v_sdev,
+						dp->dmap_driver)) != OK) {
+				printf("VFS: Failed to send new driver endpoint"
+				       " for moved block special file\n");
+			}
+		}
+	}
+}
+
+/*===========================================================================*
+ *                              do_fsready                                   *
+ *===========================================================================*/
+PUBLIC int do_fsready()
+{
+  /* deprecated */
+  return(SUSPEND);
+}
+
+/*===========================================================================*
+ *                              do_mount                                     *
+ *===========================================================================*/
+PUBLIC int do_mount()
+{
+/* Perform the mount(name, mfile, mount_flags) system call. */
+  endpoint_t fs_e;
+  int r, slot, rdonly, nodev;
+  char fullpath[PATH_MAX+1];
+  char mount_label[LABEL_MAX];
+  dev_t dev;
+
+  /* Only the super-user may do MOUNT. */
+  if (!super_user) return(EPERM);
+
+  /* FS process' endpoint number */
+  if (m_in.mount_flags & MS_LABEL16) {
+	/* Get the label from the caller, and ask DS for the endpoint. */
+	r = sys_datacopy(who_e, (vir_bytes) m_in.fs_label, SELF,
+		(vir_bytes) mount_label, (phys_bytes) sizeof(mount_label));
+	if (r != OK) return(r);
+
+	mount_label[sizeof(mount_label)-1] = 0;
+
+	r = ds_retrieve_label_endpt(mount_label, &fs_e);
+	if (r != OK) return(r);
+  } else {
+	/* Legacy support: get the endpoint from the request itself. */
+	fs_e = (endpoint_t) m_in.fs_label;
+	mount_label[0] = 0;
+  }
+
+  /* Sanity check on process number. */
+  if (isokendpt(fs_e, &slot) != OK) return(EINVAL);
+
+  /* Should the file system be mounted read-only? */
+  rdonly = (m_in.mount_flags & MS_RDONLY);
+
+  /* A null string for block special device means don't use a device at all. */
+  nodev = (m_in.name1_length == 0);
+  if (!nodev) {
+	/* If 'name' is not for a block special file, return error. */
+	if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
+		return(err_code);
+	if ((dev = name_to_dev(FALSE /*allow_mountpt*/, fullpath)) == NO_DEV)
+		return(err_code);
+  } else {
+	/* Find a free pseudo-device as substitute for an actual device. */
+	if ((dev = find_free_nonedev()) == NO_DEV)
+		return(err_code);
+  }
+
+  /* Fetch the name of the mountpoint */
+  if (fetch_name(m_in.name2, m_in.name2_length, M1, fullpath) != OK)
+	return(err_code);
+
+  /* Do the actual job */
+  return mount_fs(dev, fullpath, fs_e, rdonly, mount_label);
+}
+
+
+/*===========================================================================*
+ *                              mount_fs				     *
+ *===========================================================================*/
+PUBLIC int mount_fs(
+dev_t dev,
+char mountpoint[PATH_MAX+1],
+endpoint_t fs_e,
+int rdonly,
+char mount_label[LABEL_MAX] )
+{
+  int rdir, mdir;               /* TRUE iff {root|mount} file is dir */
+  int i, r = OK, found, isroot, mount_root;
+  struct fproc *tfp;
+  struct dmap *dp;
+  struct vnode *root_node, *vp = NULL, *bspec;
+  struct vmnt *new_vmp, *parent_vmp;
+  char *label;
+  struct node_details res;
+  struct lookup resolve;
+
+  /* Look up block device driver label when dev is not a pseudo-device */
+  label = "";
+  if (!is_nonedev(dev)) {
+	/* Get driver process' endpoint */
+	dp = &dmap[major(dev)];
+	if (dp->dmap_driver == NONE) {
+		printf("VFS: no driver for dev %d\n", dev);
+		return(EINVAL);
+	}
+
+	label = dp->dmap_label;
+	assert(strlen(label) > 0);
+  }
+
+  lock_bsf();
+
+  /* Check whether there is a block special file open which uses the
+   * same device (partition) */
+  for (bspec = &vnode[0]; bspec < &vnode[NR_VNODES]; ++bspec) {
+	if (bspec->v_ref_count > 0 && bspec->v_sdev == dev) {
+		/* Found, flush and invalidate any blocks for this device. */
+		req_flush(bspec->v_fs_e, dev);
+		break;
+	}
+  }
+
+  /* Scan vmnt table to see if dev already mounted. If not, find a free slot.*/
+  found = FALSE;
+  for (i = 0; i < NR_MNTS; ++i) {
+	if (vmnt[i].m_dev == dev) found = TRUE;
+  }
+  if (found) {
+	unlock_bsf();
+	return(EBUSY);
+  } else if ((new_vmp = get_free_vmnt()) == NULL) {
+	unlock_bsf();
+	return(ENOMEM);
+  }
+
+  lock_vmnt(new_vmp, VMNT_EXCL);
+
+  isroot = (strcmp(mountpoint, "/") == 0);
+  mount_root = (isroot && have_root < 2); /* Root can be mounted twice:
+					   * 1: ramdisk
+					   * 2: boot disk (e.g., harddisk)
+					   */
+
+  if (!mount_root) {
+	/* Get vnode of mountpoint */
+	lookup_init(&resolve, mountpoint, PATH_NOFLAGS, &parent_vmp, &vp);
+	resolve.l_vmnt_lock = VMNT_EXCL;
+	resolve.l_vnode_lock = VNODE_WRITE;
+	if ((vp = eat_path(&resolve, fp)) == NULL)
+		r = err_code;
+	else if (vp->v_ref_count == 1) {
+		/*Tell FS on which vnode it is mounted (glue into mount tree)*/
+		r = req_mountpoint(vp->v_fs_e, vp->v_inode_nr);
+	} else
+		r = EBUSY;
+
+	if (r != OK) {
+		if (vp != NULL) {
+			unlock_vnode(vp);
+			unlock_vmnt(parent_vmp);
+			put_vnode(vp);
+		}
+		unlock_vmnt(new_vmp);
+		unlock_bsf();
+		return(r);
+	}
+  }
+
+/* XXX: move this upwards before lookup after proper locking. */
+  /* We'll need a vnode for the root inode */
+  if ((root_node = get_free_vnode()) == NULL || dev == 266) {
+	if (vp != NULL) {
+		unlock_vnode(vp);
+		unlock_vmnt(parent_vmp);
+		put_vnode(vp);
+	}
+	unlock_vmnt(new_vmp);
+	unlock_bsf();
+	return(err_code);
+  }
+
+  lock_vnode(root_node, VNODE_OPCL);
+
+  /* Store some essential vmnt data first */
+  new_vmp->m_fs_e = fs_e;
+  new_vmp->m_dev = dev;
+  if (rdonly) new_vmp->m_flags |= VMNT_READONLY;
+  else new_vmp->m_flags &= ~VMNT_READONLY;
+
+  /* Tell FS which device to mount */
+  if ((r = req_readsuper(fs_e, label, dev, rdonly, isroot, &res)) != OK) {
+	if (vp != NULL) {
+		unlock_vnode(vp);
+		unlock_vmnt(parent_vmp);
+		put_vnode(vp);
+	}
+	new_vmp->m_fs_e = NONE;
+	new_vmp->m_dev = NO_DEV;
+	unlock_vnode(root_node);
+	unlock_vmnt(new_vmp);
+	unlock_bsf();
+	return(r);
+  }
+
+  /* Fill in root node's fields */
+  root_node->v_fs_e = res.fs_e;
+  root_node->v_inode_nr = res.inode_nr;
+  root_node->v_mode = res.fmode;
+  root_node->v_uid = res.uid;
+  root_node->v_gid = res.gid;
+  root_node->v_size = res.fsize;
+  root_node->v_sdev = NO_DEV;
+  root_node->v_fs_count = 1;
+  root_node->v_ref_count = 1;
+
+  /* Root node is indeed on the partition */
+  root_node->v_vmnt = new_vmp;
+  root_node->v_dev = new_vmp->m_dev;
+
+  if(mount_root) {
+	/* Superblock and root node already read.
+	 * Nothing else can go wrong. Perform the mount. */
+	new_vmp->m_root_node = root_node;
+	new_vmp->m_mounted_on = NULL;
+	strcpy(new_vmp->m_label, mount_label);
+	if (is_nonedev(dev)) alloc_nonedev(dev);
+	update_bspec(dev, fs_e, 0 /* Don't send new driver endpoint */);
+
+	ROOT_DEV = dev;
+	ROOT_FS_E = fs_e;
+
+	/* Replace all root and working directories */
+	for (i = 0, tfp = fproc; i < NR_PROCS; i++, tfp++) {
+		if (tfp->fp_pid == PID_FREE)
+			continue;
+
+#define		MAKEROOT(what) { 			\
+			if (what) put_vnode(what);	\
+			dup_vnode(root_node);		\
+			what = root_node;		\
+		}
+
+		MAKEROOT(tfp->fp_rd);
+		MAKEROOT(tfp->fp_wd);
+	}
+
+	unlock_vnode(root_node);
+	unlock_vmnt(new_vmp);
+	have_root++; /* We have a (new) root */
+	unlock_bsf();
+	return(OK);
+  }
+
+  /* File types may not conflict. */
+  mdir = ((vp->v_mode & I_TYPE) == I_DIRECTORY); /*TRUE iff dir*/
+  rdir = ((root_node->v_mode & I_TYPE) == I_DIRECTORY);
+  if (!mdir && rdir) r = EISDIR;
+
+  /* If error, return the super block and both inodes; release the vmnt. */
+  if (r != OK) {
+	unlock_vnode(vp);
+	unlock_vmnt(parent_vmp);
+	unlock_vnode(root_node);
+	unlock_vmnt(new_vmp);
+	put_vnode(vp);
+	put_vnode(root_node);
+	new_vmp->m_dev = NO_DEV;
+	unlock_bsf();
+	return(r);
+  }
+
+  /* Nothing else can go wrong.  Perform the mount. */
+  new_vmp->m_mounted_on = vp;
+  new_vmp->m_root_node = root_node;
+  strcpy(new_vmp->m_label, mount_label);
+
+  /* Allocate the pseudo device that was found, if not using a real device. */
+  if (is_nonedev(dev)) alloc_nonedev(dev);
+
+  /* The new FS will handle block I/O requests for its device now. */
+  update_bspec(dev, fs_e, 0 /* Don't send new driver endpoint */);
+
+  unlock_vnode(vp);
+  unlock_vmnt(parent_vmp);
+  unlock_vnode(root_node);
+  unlock_vmnt(new_vmp);
+  unlock_bsf();
+
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				mount_pfs				     *
+ *===========================================================================*/
+PUBLIC void mount_pfs(void)
+{
+/* Mount the Pipe File Server. It's not really mounted onto the file system,
+   but it's necessary it has a vmnt entry to make locking easier */
+
+  dev_t dev;
+  struct vmnt *vmp;
+
+  if ((dev = find_free_nonedev()) == NO_DEV)
+	panic("VFS: no nonedev to initialize PFS");
+
+  if ((vmp = get_free_vmnt()) == NULL)
+	panic("VFS: no vmnt to initialize PFS");
+
+  alloc_nonedev(dev);
+
+  vmp->m_dev = dev;
+  vmp->m_fs_e = PFS_PROC_NR;
+  strcpy(vmp->m_label, "pfs");
+}
+
+/*===========================================================================*
+ *                              do_umount                                    *
+ *===========================================================================*/
+PUBLIC int do_umount(void)
+{
+/* Perform the umount(name) system call. */
+  char label[LABEL_MAX];
+  dev_t dev;
+  int r;
+  char fullpath[PATH_MAX+1];
+
+  /* Only the super-user may do umount. */
+  if (!super_user) return(EPERM);
+
+  /* If 'name' is not for a block special file or mountpoint, return error. */
+  if (fetch_name(m_in.name, m_in.name_length, M3, fullpath) != OK)
+	return(err_code);
+  if ((dev = name_to_dev(TRUE /*allow_mountpt*/, fullpath)) == NO_DEV)
+	return(err_code);
+
+  if ((r = unmount(dev, label)) != OK) return(r);
+
+  /* Return the label of the mounted file system, so that the caller
+   * can shut down the corresponding server process.
+   */
+  if (strlen(label) >= M3_LONG_STRING)	/* should never evaluate to true */
+	label[M3_LONG_STRING-1] = 0;
+  strcpy(m_out.umount_label, label);
+  return(OK);
+}
+
+
+/*===========================================================================*
+ *                              unmount                                      *
+ *===========================================================================*/
+PUBLIC int unmount(
+  dev_t dev,			/* block-special device */
+  char *label			/* buffer to retrieve label, or NULL */
+)
+{
+  struct vnode *vp;
+  struct vmnt *vmp_i = NULL, *vmp = NULL;
+  int count, locks, r;
+
+  /* Find vmnt that is to be unmounted */
+  for (vmp_i = &vmnt[0]; vmp_i < &vmnt[NR_MNTS]; ++vmp_i) {
+	  if (vmp_i->m_dev == dev) {
+		  if(vmp) panic("device mounted more than once: %d", dev);
+		  vmp = vmp_i;
+	  }
+  }
+
+  /* Did we find the vmnt (i.e., was dev a mounted device)? */
+  if(!vmp) return(EINVAL);
+
+  lock_bsf();
+
+  assert(lock_vmnt(vmp, VMNT_EXCL) == OK);
+
+  /* See if the mounted device is busy.  Only 1 vnode using it should be
+   * open -- the root vnode -- and that inode only 1 time. */
+  locks = count = 0;
+  for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; vp++)
+	  if (vp->v_ref_count > 0 && vp->v_dev == dev) {
+		count += vp->v_ref_count;
+		if (is_vnode_locked(vp)) locks++;
+	  }
+
+  if (count > 1 || locks > 1) {
+	unlock_vmnt(vmp);
+	unlock_bsf();
+	return(EBUSY);    /* can't umount a busy file system */
+  }
+
+  /* Tell FS to drop all inode references for root inode except 1. */
+  vnode_clean_refs(vmp->m_root_node);
+
+  if (vmp->m_mounted_on) {
+	put_vnode(vmp->m_mounted_on);
+	vmp->m_mounted_on = NULL;
+  }
+
+  vmp->m_comm.c_max_reqs = 1;	/* Force max concurrent reqs to just one, so
+				 * we won't send any messages after the
+				 * unmount request */
+
+  /* Tell FS to unmount */
+  if ((r = req_unmount(vmp->m_fs_e)) != OK)              /* Not recoverable. */
+	printf("VFS: ignoring failed umount attempt FS endpoint: %d (%d)\n",
+	       vmp->m_fs_e, r);
+
+  if (is_nonedev(vmp->m_dev)) free_nonedev(vmp->m_dev);
+
+  if (label != NULL) strcpy(label, vmp->m_label);
+
+  if (vmp->m_root_node) {	/* PFS lacks a root node */
+	vmp->m_root_node->v_ref_count = 0;
+	vmp->m_root_node->v_fs_count = 0;
+	vmp->m_root_node->v_sdev = NO_DEV;
+	vmp->m_root_node = NULL;
+  }
+  vmp->m_dev = NO_DEV;
+  vmp->m_fs_e = NONE;
+
+  /* The root FS will handle block I/O requests for this device now. */
+  update_bspec(dev, ROOT_FS_E, 1 /* send new driver endpoint */);
+
+  unlock_vmnt(vmp);
+  unlock_bsf();
+  return(OK);
+}
+
+
+/*===========================================================================*
+ *				unmount_all				     *
+ *===========================================================================*/
+PUBLIC void unmount_all(void)
+{
+/* Unmount all filesystems.  File systems are mounted on other file systems,
+ * so you have to pull off the loose bits repeatedly to get it all undone.
+ */
+
+  int i;
+  struct vmnt *vmp;
+
+  /* Now unmount the rest */
+  for (i = 0; i < NR_MNTS; i++) {
+	/* Unmount at least one. */
+	for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; vmp++) {
+		if (vmp->m_dev != NO_DEV)
+			unmount(vmp->m_dev, NULL);
+	}
+  }
+  check_vnode_locks();
+  check_vmnt_locks();
+  check_filp_locks();
+  check_bsf_lock();
+}
+
+/*===========================================================================*
+ *                              name_to_dev                                  *
+ *===========================================================================*/
+PRIVATE dev_t name_to_dev(int allow_mountpt, char path[PATH_MAX+1])
+{
+/* Convert the block special file in 'user_fullpath' to a device number.
+ * If the given path is not a block special file, but 'allow_mountpt' is set
+ * and the path is the root node of a mounted file system, return that device
+ * number. In all other cases, return NO_DEV and an error code in 'err_code'.
+ */
+  dev_t dev;
+  struct vnode *vp;
+  struct vmnt *vmp;
+  struct lookup resolve;
+
+  lookup_init(&resolve, path, PATH_NOFLAGS, &vmp, &vp);
+  resolve.l_vmnt_lock = VMNT_READ;
+  resolve.l_vnode_lock = VNODE_READ;
+
+  /* Request lookup */
+  if ((vp = eat_path(&resolve, fp)) == NULL) return(NO_DEV);
+
+  if ((vp->v_mode & I_TYPE) == I_BLOCK_SPECIAL) {
+	dev = vp->v_sdev;
+  } else if (allow_mountpt && vp->v_vmnt->m_root_node == vp) {
+	dev = vp->v_dev;
+  } else {
+	err_code = ENOTBLK;
+	dev = NO_DEV;
+  }
+
+  unlock_vnode(vp);
+  unlock_vmnt(vmp);
+  put_vnode(vp);
+  return(dev);
+}
+
+
+/*===========================================================================*
+ *                              is_nonedev				     *
+ *===========================================================================*/
+PRIVATE int is_nonedev(dev_t dev)
+{
+/* Return whether the given device is a "none" pseudo device.
+ */
+
+  return (major(dev) == NONE_MAJOR &&
+	minor(dev) > 0 && minor(dev) <= NR_NONEDEVS);
+}
+
+
+/*===========================================================================*
+ *                              find_free_nonedev			     *
+ *===========================================================================*/
+PRIVATE dev_t find_free_nonedev(void)
+{
+/* Find a free "none" pseudo device. Do not allocate it yet.
+ */
+  int i;
+
+  for (i = 0; i < NR_NONEDEVS; i++)
+	if (!GET_BIT(nonedev, i))
+		return makedev(NONE_MAJOR, i + 1);
+
+  err_code = EMFILE;
+  return NO_DEV;
+}
diff --git a/servers/avfs/open.c b/servers/avfs/open.c
new file mode 100644
index 000000000..54b69a43b
--- /dev/null
+++ b/servers/avfs/open.c
@@ -0,0 +1,734 @@
+/* This file contains the procedures for creating, opening, closing, and
+ * seeking on files.
+ *
+ * The entry points into this file are
+ *   do_creat:	perform the CREAT system call
+ *   do_open:	perform the OPEN system call
+ *   do_mknod:	perform the MKNOD system call
+ *   do_mkdir:	perform the MKDIR system call
+ *   do_close:	perform the CLOSE system call
+ *   do_lseek:  perform the LSEEK system call
+ *   do_llseek: perform the LLSEEK system call
+ */
+
+#include "fs.h"
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include <minix/callnr.h>
+#include <minix/com.h>
+#include <minix/u64.h>
+#include "file.h"
+#include "fproc.h"
+#include "dmap.h"
+#include "lock.h"
+#include "param.h"
+#include <dirent.h>
+#include <assert.h>
+#include <minix/vfsif.h>
+#include "vnode.h"
+#include "vmnt.h"
+#include "path.h"
+
+PRIVATE char mode_map[] = {R_BIT, W_BIT, R_BIT|W_BIT, 0};
+
+FORWARD _PROTOTYPE( int common_open, (char path[PATH_MAX+1], int oflags,
+				      mode_t omode)			);
+FORWARD _PROTOTYPE( struct vnode *new_node, (struct lookup *resolve,
+					     int oflags, mode_t bits)	);
+FORWARD _PROTOTYPE( int pipe_open, (struct vnode *vp, mode_t bits,
+				    int oflags)				);
+
+
+/*===========================================================================*
+ *				do_creat				     *
+ *===========================================================================*/
+PUBLIC int do_creat()
+{
+/* Perform the creat(name, mode) system call. */
+  int r;
+  char fullpath[PATH_MAX+1];
+
+  if (fetch_name(m_in.name, m_in.name_length, M3, fullpath) != OK)
+	return(err_code);
+  r = common_open(fullpath, O_WRONLY | O_CREAT | O_TRUNC, (mode_t) m_in.mode);
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				do_open					     *
+ *===========================================================================*/
+PUBLIC int do_open()
+{
+/* Perform the open(name, flags,...) system call. */
+  int create_mode = 0;		/* is really mode_t but this gives problems */
+  int r;
+  char fullpath[PATH_MAX+1];
+
+  /* If O_CREAT is set, open has three parameters, otherwise two. */
+  if (m_in.mode & O_CREAT) {
+	create_mode = m_in.c_mode;
+	r = fetch_name(m_in.c_name, m_in.name1_length, M1, fullpath);
+  } else {
+	r = fetch_name(m_in.name, m_in.name_length, M3, fullpath);
+  }
+
+  if (r != OK) return(err_code); /* name was bad */
+  r = common_open(fullpath, m_in.mode, create_mode);
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				common_open				     *
+ *===========================================================================*/
+PRIVATE int common_open(char path[PATH_MAX+1], int oflags, mode_t omode)
+{
+/* Common code from do_creat and do_open. */
+  int b, r, exist = TRUE, major_dev;
+  dev_t dev;
+  mode_t bits;
+  struct filp *filp, *filp2;
+  struct vnode *vp;
+  struct vmnt *vmp;
+  struct dmap *dp;
+  struct lookup resolve;
+
+  /* Remap the bottom two bits of oflags. */
+  bits = (mode_t) mode_map[oflags & O_ACCMODE];
+  if (!bits) return(EINVAL);
+
+  /* See if file descriptor and filp slots are available. */
+  if ((r = get_fd(0, bits, &m_in.fd, &filp)) != OK) return(r);
+
+  lookup_init(&resolve, path, PATH_NOFLAGS, &vmp, &vp);
+
+  /* If O_CREATE is set, try to make the file. */
+  if (oflags & O_CREAT) {
+        omode = I_REGULAR | (omode & ALL_MODES & fp->fp_umask);
+	vp = new_node(&resolve, oflags, omode);
+	r = err_code;
+	if (r == OK) exist = FALSE;	/* We just created the file */
+	else if (r != EEXIST) {		/* other error */
+		if (vp) unlock_vnode(vp);
+		unlock_filp(filp);
+		return(r);
+	}
+	else exist = !(oflags & O_EXCL);/* file exists, if the O_EXCL
+					   flag is set this is an error */
+  } else {
+	/* Scan path name */
+	resolve.l_vmnt_lock = VMNT_READ;
+	resolve.l_vnode_lock = VNODE_OPCL;
+	if ((vp = eat_path(&resolve, fp)) == NULL) {
+		unlock_filp(filp);
+		return(err_code);
+	}
+
+	if (vmp != NULL) unlock_vmnt(vmp);
+  }
+
+  /* Claim the file descriptor and filp slot and fill them in. */
+  fp->fp_filp[m_in.fd] = filp;
+  FD_SET(m_in.fd, &fp->fp_filp_inuse);
+  filp->filp_count = 1;
+  filp->filp_vno = vp;
+  filp->filp_flags = oflags;
+
+  /* Only do the normal open code if we didn't just create the file. */
+  if (exist) {
+	/* Check protections. */
+	if ((r = forbidden(vp, bits)) == OK) {
+		/* Opening reg. files, directories, and special files differ */
+		switch (vp->v_mode & I_TYPE) {
+		   case I_REGULAR:
+			/* Truncate regular file if O_TRUNC. */
+			if (oflags & O_TRUNC) {
+				if ((r = forbidden(vp, W_BIT)) != OK)
+					break;
+				truncate_vnode(vp, 0);
+			}
+			break;
+		   case I_DIRECTORY:
+			/* Directories may be read but not written. */
+			r = (bits & W_BIT ? EISDIR : OK);
+			break;
+		   case I_CHAR_SPECIAL:
+			/* Invoke the driver for special processing. */
+			dev = (dev_t) vp->v_sdev;
+			r = dev_open(dev, who_e, bits | (oflags & ~O_ACCMODE));
+			if (r == SUSPEND) suspend(FP_BLOCKED_ON_DOPEN);
+			else vp = filp->filp_vno; /* Might be updated by
+						   * dev_open/clone_opcl */
+			break;
+		   case I_BLOCK_SPECIAL:
+
+			lock_bsf();
+
+			/* Invoke the driver for special processing. */
+			dev = (dev_t) vp->v_sdev;
+			r = dev_open(dev, who_e, bits | (oflags & ~O_ACCMODE));
+			if (r != OK) {
+				unlock_bsf();
+				break;
+			}
+
+			/* Check whether the device is mounted or not. If so,
+			 * then that FS is responsible for this device. Else
+			 * we default to ROOT_FS. */
+			vp->v_bfs_e = ROOT_FS_E; /* By default */
+			for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp)
+				if (vmp->m_dev == vp->v_sdev)
+					vp->v_bfs_e = vmp->m_fs_e;
+
+			/* Get the driver endpoint of the block spec device */
+			major_dev = major(vp->v_sdev);
+			if (major_dev < 0 || major_dev >= NR_DEVICES)
+				r = ENXIO;
+			else
+				dp = &dmap[major_dev];
+			if (r != OK || dp->dmap_driver == NONE) {
+				printf("VFS: driver not found for device %d\n",
+					vp->v_sdev);
+				r = ENXIO;
+				unlock_bsf();
+				break;
+			}
+
+			/* Send the driver endpoint (even when known already)*/
+			if (vp->v_bfs_e != ROOT_FS_E) {
+				/* but only when it's the ROOT_FS */
+				unlock_bsf();
+				break;
+			}
+			if ((r = req_newdriver(vp->v_bfs_e, vp->v_sdev,
+					       dp->dmap_driver)) != OK) {
+				printf("VFS: error sending driver endpoint\n");
+				r = ENXIO;
+			}
+			unlock_bsf();
+			break;
+
+		   case I_NAMED_PIPE:
+			/* Create a mapped inode on PFS which handles reads
+			   and writes to this named pipe. */
+			tll_upgrade(&vp->v_lock);
+			r = map_vnode(vp, PFS_PROC_NR);
+			if (r == OK) {
+				vp->v_pipe = I_PIPE;
+				if (vp->v_ref_count == 1) {
+					vp->v_pipe_rd_pos = 0;
+					vp->v_pipe_wr_pos = 0;
+					if (vp->v_size != 0)
+						r = truncate_vnode(vp, 0);
+				}
+				oflags |= O_APPEND;	/* force append mode */
+				filp->filp_flags = oflags;
+			}
+			if (r == OK) {
+				r = pipe_open(vp, bits, oflags);
+			}
+			if (r != ENXIO) {
+				/* See if someone else is doing a rd or wt on
+				 * the FIFO.  If so, use its filp entry so the
+				 * file position will be automatically shared.
+				 */
+				b = (bits & R_BIT ? R_BIT : W_BIT);
+				filp->filp_count = 0; /* don't find self */
+				if ((filp2 = find_filp(vp, b)) != NULL) {
+					/* Co-reader or writer found. Use it.*/
+					fp->fp_filp[m_in.fd] = filp2;
+					filp2->filp_count++;
+					filp2->filp_vno = vp;
+					filp2->filp_flags = oflags;
+
+					/* v_count was incremented after the
+					 * vnode has been found. i_count was
+					 * incremented incorrectly in FS, not
+					 * knowing that we were going to use an
+					 * existing filp entry.  Correct this
+					 * error.
+					 */
+					unlock_vnode(vp);
+					put_vnode(vp);
+				} else {
+					/* Nobody else found. Restore filp. */
+					filp->filp_count = 1;
+				}
+			}
+			break;
+		}
+	}
+  }
+
+  unlock_filp(filp);
+
+  /* If error, release inode. */
+  if (r != OK) {
+	if (r != SUSPEND) {
+		fp->fp_filp[m_in.fd] = NULL;
+		FD_CLR(m_in.fd, &fp->fp_filp_inuse);
+		filp->filp_count = 0;
+		filp->filp_vno = NULL;
+		put_vnode(vp);
+	}
+  } else {
+	r = m_in.fd;
+  }
+
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				new_node				     *
+ *===========================================================================*/
+PRIVATE struct vnode *new_node(struct lookup *resolve, int oflags, mode_t bits)
+{
+/* Try to create a new inode and return a pointer to it. If the inode already
+   exists, return a pointer to it as well, but set err_code accordingly.
+   NULL is returned if the path cannot be resolved up to the last
+   directory, or when the inode cannot be created due to permissions or
+   otherwise. */
+  struct vnode *dirp, *vp;
+  struct vmnt *dir_vmp, *vp_vmp;
+  int r;
+  struct node_details res;
+  struct lookup findnode;
+  char *path;
+
+  path = resolve->l_path;	/* For easy access */
+
+  lookup_init(&findnode, path, resolve->l_flags, &dir_vmp, &dirp);
+  findnode.l_vmnt_lock = VMNT_WRITE;
+  findnode.l_vnode_lock = VNODE_WRITE; /* dir node */
+
+  /* When O_CREAT and O_EXCL flags are set, the path may not be named by a
+   * symbolic link. */
+  if (oflags & O_EXCL) findnode.l_flags |= PATH_RET_SYMLINK;
+
+  /* See if the path can be opened down to the last directory. */
+  if ((dirp = last_dir(&findnode, fp)) == NULL) return(NULL);
+
+  /* The final directory is accessible. Get final component of the path. */
+  findnode.l_vmp = &vp_vmp;
+  findnode.l_vnode = &vp;
+  findnode.l_vnode_lock = (oflags & O_TRUNC) ? VNODE_WRITE : VNODE_OPCL;
+  vp = advance(dirp, &findnode, fp);
+  assert(vp_vmp == NULL);	/* Lookup to last dir should have yielded lock
+				 * on vmp or final component does not exist. */
+
+  /* The combination of a symlink with absolute path followed by a danglink
+   * symlink results in a new path that needs to be re-resolved entirely. */
+  if (path[0] == '/') {
+printf("XXX: dangling symlink needs re-resolving\n");
+	unlock_vnode(dirp);
+	unlock_vmnt(dir_vmp);
+	put_vnode(dirp);
+	if (vp != NULL) {
+		unlock_vnode(vp);
+		put_vnode(vp);
+	}
+	return new_node(resolve, oflags, bits);
+  }
+
+  if (vp == NULL && err_code == ENOENT) {
+	/* Last path component does not exist. Make a new directory entry. */
+	if ((vp = get_free_vnode()) == NULL) {
+		/* Can't create new entry: out of vnodes. */
+		unlock_vnode(dirp);
+		unlock_vmnt(dir_vmp);
+		put_vnode(dirp);
+		return(NULL);
+	}
+
+	lock_vnode(vp, VNODE_OPCL);
+
+	if ((r = forbidden(dirp, W_BIT|X_BIT)) != OK ||
+	    (r = req_create(dirp->v_fs_e, dirp->v_inode_nr,bits, fp->fp_effuid,
+			    fp->fp_effgid, path, &res)) != OK ) {
+		/* Can't create inode either due to permissions or some other
+		 * problem. In case r is EEXIST, we might be dealing with a
+		 * dangling symlink.*/
+		if (r == EEXIST) {
+			struct vnode *slp, *old_wd;
+
+			/* Resolve path up to symlink */
+			findnode.l_flags = PATH_RET_SYMLINK;
+			findnode.l_vnode_lock = VNODE_READ;
+			findnode.l_vnode = &slp;
+			slp = advance(dirp, &findnode, fp);
+			if (slp != NULL) {
+				if (S_ISLNK(slp->v_mode)) {
+					/* Get contents of link */
+
+					r = req_rdlink(slp->v_fs_e,
+						       slp->v_inode_nr,
+						       VFS_PROC_NR,
+						       path,
+						       PATH_MAX, 0);
+					if (r < 0) {
+						/* Failed to read link */
+						unlock_vnode(slp);
+						unlock_vnode(dirp);
+						unlock_vmnt(dir_vmp);
+						put_vnode(slp);
+						put_vnode(dirp);
+						err_code = r;
+						return(NULL);
+					}
+					path[r] = '\0'; /* Terminate path */
+				}
+				unlock_vnode(slp);
+				put_vnode(slp);
+			}
+
+			/* Try to create the inode the dangling symlink was
+			 * pointing to. We have to use dirp as starting point
+			 * as there might be multiple successive symlinks
+			 * crossing multiple mountpoints. */
+			old_wd = fp->fp_wd; /* Save orig. working dirp */
+			fp->fp_wd = dirp;
+			vp = new_node(resolve, oflags, bits);
+			fp->fp_wd = old_wd; /* Restore */
+
+			if (vp != NULL) {
+				unlock_vnode(dirp);
+				unlock_vmnt(dir_vmp);
+				put_vnode(dirp);
+				*(resolve->l_vnode) = vp;
+				return(vp);
+			}
+			r = err_code;
+		}
+
+		if (r == EEXIST)
+			err_code = EIO; /* Impossible, we have verified that
+					 * the last component doesn't exist and
+					 * is not a dangling symlink. */
+		else
+			err_code = r;
+
+		unlock_vnode(dirp);
+		unlock_vnode(vp);
+		unlock_vmnt(dir_vmp);
+		put_vnode(dirp);
+		return(NULL);
+	}
+
+	/* Store results and mark vnode in use */
+
+	vp->v_fs_e = res.fs_e;
+	vp->v_inode_nr = res.inode_nr;
+	vp->v_mode = res.fmode;
+	vp->v_size = res.fsize;
+	vp->v_uid = res.uid;
+	vp->v_gid = res.gid;
+	vp->v_sdev = res.dev;
+	vp->v_vmnt = dirp->v_vmnt;
+	vp->v_dev = vp->v_vmnt->m_dev;
+	vp->v_fs_count = 1;
+	vp->v_ref_count = 1;
+  } else {
+	/* Either last component exists, or there is some other problem. */
+	if (vp != NULL) {
+		r = EEXIST;	/* File exists or a symlink names a file while
+				 * O_EXCL is set. */
+	} else
+		r = err_code;	/* Other problem. */
+  }
+
+  err_code = r;
+  /* When dirp equals vp, we shouldn't release the lock as a vp is locked only
+   * once. Releasing the lock would cause the resulting vp not be locked and
+   * cause mayhem later on. */
+  if (dirp != vp) {
+	unlock_vnode(dirp);
+  }
+  unlock_vmnt(dir_vmp);
+  put_vnode(dirp);
+
+  *(resolve->l_vnode) = vp;
+  return(vp);
+}
+
+
+/*===========================================================================*
+ *				pipe_open				     *
+ *===========================================================================*/
+PRIVATE int pipe_open(register struct vnode *vp, register mode_t bits,
+	register int oflags)
+{
+/*  This function is called from common_open. It checks if
+ *  there is at least one reader/writer pair for the pipe, if not
+ *  it suspends the caller, otherwise it revives all other blocked
+ *  processes hanging on the pipe.
+ */
+
+  vp->v_pipe = I_PIPE;
+
+  if((bits & (R_BIT|W_BIT)) == (R_BIT|W_BIT)) return(ENXIO);
+
+  /* Find the reader/writer at the other end of the pipe */
+  if (find_filp(vp, bits & W_BIT ? R_BIT : W_BIT) == NULL) {
+	/* Not found */
+	if (oflags & O_NONBLOCK) {
+		if (bits & W_BIT) return(ENXIO);
+	} else {
+		/* Let's wait for the other side to show up */
+		suspend(FP_BLOCKED_ON_POPEN);	/* suspend caller */
+		return(SUSPEND);
+	}
+  } else if (susp_count > 0) { /* revive blocked processes */
+	release(vp, OPEN, susp_count);
+	release(vp, CREAT, susp_count);
+  }
+  return(OK);
+}
+
+
+/*===========================================================================*
+ *				do_mknod				     *
+ *===========================================================================*/
+PUBLIC int do_mknod()
+{
+/* Perform the mknod(name, mode, addr) system call. */
+  register mode_t bits, mode_bits;
+  int r;
+  struct vnode *vp;
+  struct vmnt *vmp;
+  char fullpath[PATH_MAX+1];
+  struct lookup resolve;
+
+  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+  resolve.l_vmnt_lock = VMNT_WRITE;
+  resolve.l_vnode_lock = VNODE_READ;
+
+  /* Only the super_user may make nodes other than fifos. */
+  mode_bits = (mode_t) m_in.mk_mode;		/* mode of the inode */
+  if (!super_user && (((mode_bits & I_TYPE) != I_NAMED_PIPE) &&
+      ((mode_bits & I_TYPE) != I_UNIX_SOCKET))) {
+	return(EPERM);
+  }
+  bits = (mode_bits & I_TYPE) | (mode_bits & ALL_MODES & fp->fp_umask);
+
+  /* Open directory that's going to hold the new node. */
+  if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
+	return(err_code);
+  if ((vp = last_dir(&resolve, fp)) == NULL) return(err_code);
+
+  /* Make sure that the object is a directory */
+  if ((vp->v_mode & I_TYPE) != I_DIRECTORY) {
+	r = ENOTDIR;
+  } else if ((r = forbidden(vp, W_BIT|X_BIT)) == OK) {
+	r = req_mknod(vp->v_fs_e, vp->v_inode_nr, fullpath, fp->fp_effuid,
+		      fp->fp_effgid, bits, m_in.mk_z0);
+  }
+
+  unlock_vnode(vp);
+  unlock_vmnt(vmp);
+  put_vnode(vp);
+  return(r);
+}
+
+/*===========================================================================*
+ *				do_mkdir				     *
+ *===========================================================================*/
+PUBLIC int do_mkdir()
+{
+/* Perform the mkdir(name, mode) system call. */
+  mode_t bits;			/* mode bits for the new inode */
+  int r;
+  struct vnode *vp;
+  struct vmnt *vmp;
+  char fullpath[PATH_MAX+1];
+  struct lookup resolve;
+
+  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+  resolve.l_vmnt_lock = VMNT_WRITE;
+  resolve.l_vnode_lock = VNODE_READ;
+
+  if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
+	return(err_code);
+  bits = I_DIRECTORY | (m_in.mode & RWX_MODES & fp->fp_umask);
+  if ((vp = last_dir(&resolve, fp)) == NULL) return(err_code);
+
+  /* Make sure that the object is a directory */
+  if ((vp->v_mode & I_TYPE) != I_DIRECTORY) {
+	r = ENOTDIR;
+  } else if ((r = forbidden(vp, W_BIT|X_BIT)) == OK) {
+	r = req_mkdir(vp->v_fs_e, vp->v_inode_nr, fullpath, fp->fp_effuid,
+		      fp->fp_effgid, bits);
+  }
+
+  unlock_vnode(vp);
+  unlock_vmnt(vmp);
+  put_vnode(vp);
+  return(r);
+}
+
+/*===========================================================================*
+ *				do_lseek				     *
+ *===========================================================================*/
+PUBLIC int do_lseek()
+{
+/* Perform the lseek(ls_fd, offset, whence) system call. */
+  register struct filp *rfilp;
+  int r = OK;
+  long offset;
+  u64_t pos, newpos;
+
+  /* Check to see if the file descriptor is valid. */
+  if ( (rfilp = get_filp(m_in.ls_fd, VNODE_READ)) == NULL) return(err_code);
+
+  /* No lseek on pipes. */
+  if (rfilp->filp_vno->v_pipe == I_PIPE) {
+	unlock_filp(rfilp);
+	return(ESPIPE);
+  }
+
+  /* The value of 'whence' determines the start position to use. */
+  switch(m_in.whence) {
+    case SEEK_SET: pos = cvu64(0);	break;
+    case SEEK_CUR: pos = rfilp->filp_pos;	break;
+    case SEEK_END: pos = cvul64(rfilp->filp_vno->v_size);	break;
+    default: unlock_filp(rfilp); return(EINVAL);
+  }
+
+  offset = m_in.offset_lo;
+  if (offset >= 0)
+	newpos = add64ul(pos, offset);
+  else
+	newpos = sub64ul(pos, -offset);
+
+  /* Check for overflow. */
+  if (ex64hi(newpos) != 0)
+	r = EINVAL;
+  else {
+	rfilp->filp_pos = newpos;
+
+	/* insert the new position into the output message */
+	m_out.reply_l1 = ex64lo(newpos);
+
+	if (cmp64(newpos, rfilp->filp_pos) != 0) {
+		/* Inhibit read ahead request */
+		r = req_inhibread(rfilp->filp_vno->v_fs_e,
+				  rfilp->filp_vno->v_inode_nr);
+	}
+  }
+
+  unlock_filp(rfilp);
+  return(r);
+}
+
+/*===========================================================================*
+ *				do_llseek				     *
+ *===========================================================================*/
+PUBLIC int do_llseek()
+{
+/* Perform the llseek(ls_fd, offset, whence) system call. */
+  register struct filp *rfilp;
+  u64_t pos, newpos;
+  int r = OK;
+
+  /* Check to see if the file descriptor is valid. */
+  if ( (rfilp = get_filp(m_in.ls_fd, VNODE_READ)) == NULL) return(err_code);
+
+  /* No lseek on pipes. */
+  if (rfilp->filp_vno->v_pipe == I_PIPE) {
+	unlock_filp(rfilp);
+	return(ESPIPE);
+  }
+
+  /* The value of 'whence' determines the start position to use. */
+  switch(m_in.whence) {
+    case SEEK_SET: pos = cvu64(0);	break;
+    case SEEK_CUR: pos = rfilp->filp_pos;	break;
+    case SEEK_END: pos = cvul64(rfilp->filp_vno->v_size);	break;
+    default: unlock_filp(rfilp); return(EINVAL);
+  }
+
+  newpos = add64(pos, make64(m_in.offset_lo, m_in.offset_high));
+
+  /* Check for overflow. */
+  if (( (long) m_in.offset_high > 0) && cmp64(newpos, pos) < 0)
+      r = EINVAL;
+  else if (( (long) m_in.offset_high < 0) && cmp64(newpos, pos) > 0)
+      r = EINVAL;
+  else {
+	rfilp->filp_pos = newpos;
+
+	/* insert the new position into the output message */
+	m_out.reply_l1 = ex64lo(newpos);
+	m_out.reply_l2 = ex64hi(newpos);
+
+	if (cmp64(newpos, rfilp->filp_pos) != 0) {
+		/* Inhibit read ahead request */
+		r = req_inhibread(rfilp->filp_vno->v_fs_e,
+				  rfilp->filp_vno->v_inode_nr);
+	}
+  }
+
+  unlock_filp(rfilp);
+  return(r);
+}
+
+/*===========================================================================*
+ *				do_close				     *
+ *===========================================================================*/
+PUBLIC int do_close()
+{
+/* Perform the close(fd) system call. */
+
+  return close_fd(fp, m_in.fd);
+}
+
+
+/*===========================================================================*
+ *				close_fd				     *
+ *===========================================================================*/
+PUBLIC int close_fd(rfp, fd_nr)
+struct fproc *rfp;
+int fd_nr;
+{
+/* Perform the close(fd) system call. */
+  register struct filp *rfilp;
+  register struct vnode *vp;
+  struct file_lock *flp;
+  int lock_count;
+
+  /* First locate the vnode that belongs to the file descriptor. */
+  if ( (rfilp = get_filp2(rfp, fd_nr, VNODE_OPCL)) == NULL) return(err_code);
+  vp = rfilp->filp_vno;
+
+  close_filp(rfilp);
+  rfp->fp_filp[fd_nr] = NULL;
+  FD_CLR(fd_nr, &rfp->fp_cloexec_set);
+  FD_CLR(fd_nr, &rfp->fp_filp_inuse);
+
+  /* Check to see if the file is locked.  If so, release all locks. */
+  if (nr_locks > 0) {
+	lock_count = nr_locks;	/* save count of locks */
+	for (flp = &file_lock[0]; flp < &file_lock[NR_LOCKS]; flp++) {
+		if (flp->lock_type == 0) continue;	/* slot not in use */
+		if (flp->lock_vnode == vp && flp->lock_pid == rfp->fp_pid) {
+			flp->lock_type = 0;
+			nr_locks--;
+		}
+	}
+	if (nr_locks < lock_count)
+		lock_revive();	/* one or more locks released */
+  }
+
+  return(OK);
+}
+
+/*===========================================================================*
+ *				close_reply				     *
+ *===========================================================================*/
+PUBLIC void close_reply()
+{
+	/* No need to do anything */
+}
diff --git a/servers/avfs/param.h b/servers/avfs/param.h
new file mode 100644
index 000000000..ad9107625
--- /dev/null
+++ b/servers/avfs/param.h
@@ -0,0 +1,63 @@
+#ifndef __VFS_PARAM_H__
+#define __VFS_PARAM_H__
+
+/* The following names are synonyms for the variables in the input message. */
+#define addr	      m1_i3
+#define buffer	      m1_p1
+#define child_endpt      m1_i2
+#define co_mode	      m1_i1
+#define fd	      m1_i1
+#define fd2	      m1_i2
+#define group	      m1_i3
+#define ls_fd	      m2_i1
+#define mk_mode	      m1_i2
+#define mk_z0	      m1_i3
+#define mode	      m3_i2
+#define c_mode        m1_i3
+#define c_name        m1_p1
+#define name	      m3_p1
+#define flength       m2_l1
+#define name1	      m1_p1
+#define name2	      m1_p2
+#define	name_length   m3_i1
+#define name1_length  m1_i1
+#define name2_length  m1_i2
+#define nbytes        m1_i2
+#define owner	      m1_i2
+#define pathname      m3_ca1
+#define pid	      m1_i3
+#define ENDPT	      m1_i1
+#define offset_lo     m2_l1
+#define offset_high   m2_l2
+#define ctl_req       m4_l1
+#define mount_flags   m1_i3
+#define request       m1_i2
+#define sig	      m1_i2
+#define endpt1	      m1_i1
+#define fs_label      m1_p3
+#define umount_label  m3_ca1
+#define tp	      m2_l1
+#define utime_actime  m2_l1
+#define utime_modtime m2_l2
+#define utime_file    m2_p1
+#define utime_length  m2_i1
+#define utime_strlen  m2_i2
+#define whence	      m2_i2
+#define svrctl_req    m2_i1
+#define svrctl_argp   m2_p1
+#define info_what     m1_i1
+#define info_where    m1_p1
+#define md_label	m2_p1
+#define md_label_len	m2_l1
+#define md_major	m2_i1
+#define md_style	m2_i2
+#define md_flags	m2_i3
+
+/* The following names are synonyms for the variables in the output message. */
+#define reply_type    m_type
+#define reply_l1      m2_l1
+#define reply_l2      m2_l2
+#define reply_i1      m1_i1
+#define reply_i2      m1_i2
+
+#endif
diff --git a/servers/avfs/path.c b/servers/avfs/path.c
new file mode 100644
index 000000000..1be54d9ef
--- /dev/null
+++ b/servers/avfs/path.c
@@ -0,0 +1,687 @@
+/* lookup() is the main routine that controls the path name lookup. It
+ * handles mountpoints and symbolic links. The actual lookup requests
+ * are sent through the req_lookup wrapper function.
+ */
+
+#include "fs.h"
+#include <string.h>
+#include <minix/callnr.h>
+#include <minix/com.h>
+#include <minix/keymap.h>
+#include <minix/const.h>
+#include <minix/endpoint.h>
+#include <unistd.h>
+#include <assert.h>
+#include <minix/vfsif.h>
+#include <sys/stat.h>
+#include <sys/un.h>
+#include <dirent.h>
+#include "threads.h"
+#include "vmnt.h"
+#include "vnode.h"
+#include "path.h"
+#include "fproc.h"
+#include "param.h"
+
+/* Set to following define to 1 if you really want to use the POSIX definition
+ * (IEEE Std 1003.1, 2004) of pathname resolution. POSIX requires pathnames
+ * with a traling slash (and that do not entirely consist of slash characters)
+ * to be treated as if a single dot is appended. This means that for example
+ * mkdir("dir/", ...) and rmdir("dir/") will fail because the call tries to
+ * create or remove the directory '.'. Historically, Unix systems just ignore
+ * trailing slashes.
+ */
+#define DO_POSIX_PATHNAME_RES	0
+
+FORWARD _PROTOTYPE( int lookup, (struct vnode *dirp, struct lookup *resolve,
+				 node_details_t *node, struct fproc *rfp));
+FORWARD _PROTOTYPE( int check_perms, (endpoint_t ep, cp_grant_id_t io_gr,
+				      size_t pathlen)			);
+
+/*===========================================================================*
+ *				advance					     *
+ *===========================================================================*/
+PUBLIC struct vnode *advance(dirp, resolve, rfp)
+struct vnode *dirp;
+struct lookup *resolve;
+struct fproc *rfp;
+{
+/* Resolve a path name starting at dirp to a vnode. */
+  int r;
+  int do_downgrade = 1;
+  struct vnode *new_vp, *vp;
+  struct vmnt *vmp;
+  struct node_details res = {0,0,0,0,0,0,0};
+  tll_access_t initial_locktype;
+
+  assert(dirp);
+  assert(resolve->l_vnode_lock != TLL_NONE);
+  assert(resolve->l_vmnt_lock != TLL_NONE);
+
+  if (resolve->l_vnode_lock == VNODE_READ)
+	initial_locktype = VNODE_OPCL;
+  else
+	initial_locktype = resolve->l_vnode_lock;
+
+  /* Get a free vnode and lock it */
+  if ((new_vp = get_free_vnode()) == NULL) return(NULL);
+  lock_vnode(new_vp, initial_locktype);
+
+  /* Lookup vnode belonging to the file. */
+  if ((r = lookup(dirp, resolve, &res, rfp)) != OK) {
+	err_code = r;
+	unlock_vnode(new_vp);
+	return(NULL);
+  }
+
+  /* Check whether we already have a vnode for that file */
+  if ((vp = find_vnode(res.fs_e, res.inode_nr)) != NULL) {
+	unlock_vnode(new_vp);	/* Don't need this anymore */
+	do_downgrade = (lock_vnode(vp, initial_locktype) != EBUSY);
+
+	/* Unfortunately, by the time we get the lock, another thread might've
+	 * rid of the vnode (e.g., find_vnode found the vnode while a
+	 * req_putnode was being processed). */
+	if (vp->v_ref_count == 0) { /* vnode vanished! */
+		/* As the lookup before increased the usage counters in the FS,
+		 * we can simply set the usage counters to 1 and proceed as
+		 * normal, because the putnode resulted in a use count of 1 in
+		 * the FS. Other data is still valid, because the vnode was
+		 * marked as pending lock, so get_free_vnode hasn't
+		 * reinitialized the vnode yet. */
+		vp->v_fs_count = 1;
+		if (vp->v_mapfs_e != NONE) vp->v_mapfs_count = 1;
+	} else {
+		vp->v_fs_count++;	/* We got a reference from the FS */
+	}
+
+  } else {
+	/* Vnode not found, fill in the free vnode's fields */
+
+	new_vp->v_fs_e = res.fs_e;
+	new_vp->v_inode_nr = res.inode_nr;
+	new_vp->v_mode = res.fmode;
+	new_vp->v_size = res.fsize;
+	new_vp->v_uid = res.uid;
+	new_vp->v_gid = res.gid;
+	new_vp->v_sdev = res.dev;
+
+	if( (vmp = find_vmnt(new_vp->v_fs_e)) == NULL)
+		  panic("advance: vmnt not found");
+
+	new_vp->v_vmnt = vmp;
+	new_vp->v_dev = vmp->m_dev;
+	new_vp->v_fs_count = 1;
+
+	vp = new_vp;
+  }
+
+  dup_vnode(vp);
+  if (do_downgrade) {
+	/* Only downgrade a lock if we managed to lock it in the first place */
+	*(resolve->l_vnode) = vp;
+
+	if (initial_locktype != resolve->l_vnode_lock)
+		tll_downgrade(&vp->v_lock);
+
+#if LOCK_DEBUG
+	if (resolve->l_vnode_lock == VNODE_READ)
+		fp->fp_vp_rdlocks++;
+#endif
+  }
+
+  return(vp);
+}
+
+
+/*===========================================================================*
+ *				eat_path				     *
+ *===========================================================================*/
+PUBLIC struct vnode *eat_path(resolve, rfp)
+struct lookup *resolve;
+struct fproc *rfp;
+{
+/* Resolve path to a vnode. advance does the actual work. */
+  struct vnode *start_dir;
+
+  start_dir = (resolve->l_path[0] == '/' ? rfp->fp_rd : rfp->fp_wd);
+  return advance(start_dir, resolve, rfp);
+}
+
+
+/*===========================================================================*
+ *				last_dir				     *
+ *===========================================================================*/
+PUBLIC struct vnode *last_dir(resolve, rfp)
+struct lookup *resolve;
+struct fproc *rfp;
+{
+/* Parse a path, as far as the last directory, fetch the vnode
+ * for the last directory into the vnode table, and return a pointer to the
+ * vnode. In addition, return the final component of the path in 'string'. If
+ * the last directory can't be opened, return NULL and the reason for
+ * failure in 'err_code'. We can't parse component by component as that would
+ * be too expensive. Alternatively, we cut off the last component of the path,
+ * and parse the path up to the penultimate component.
+ */
+
+  size_t len;
+  char *cp;
+  char dir_entry[PATH_MAX+1];
+  struct vnode *start_dir, *res;
+
+  /* Is the path absolute or relative? Initialize 'start_dir' accordingly. */
+  start_dir = (resolve->l_path[0] == '/' ? rfp->fp_rd : rfp->fp_wd);
+
+  len = strlen(resolve->l_path);
+
+  /* If path is empty, return ENOENT. */
+  if (len == 0)	{
+	err_code = ENOENT;
+	return(NULL);
+  }
+
+#if !DO_POSIX_PATHNAME_RES
+  /* Remove trailing slashes */
+  while (len > 1 && resolve->l_path[len-1] == '/') {
+	  len--;
+	  resolve->l_path[len]= '\0';
+  }
+#endif
+
+  cp = strrchr(resolve->l_path, '/');
+  if (cp == NULL) {
+	/* Just one entry in the current working directory */
+	struct vmnt *vmp;
+
+	vmp = find_vmnt(start_dir->v_fs_e);
+	if (lock_vmnt(vmp, resolve->l_vmnt_lock) != EBUSY)
+		*resolve->l_vmp = vmp;
+	lock_vnode(start_dir, resolve->l_vnode_lock);
+	*resolve->l_vnode = start_dir;
+	dup_vnode(start_dir);
+	return(start_dir);
+
+  } else if (cp[1] == '\0') {
+	/* Path ends in a slash. The directory entry is '.' */
+	strcpy(dir_entry, ".");
+  } else {
+	/* A path name for the directory and a directory entry */
+	strcpy(dir_entry, cp+1);
+	cp[1] = '\0';
+  }
+
+  /* Remove trailing slashes */
+  while(cp > resolve->l_path && cp[0] == '/') {
+	cp[0]= '\0';
+	cp--;
+  }
+
+  resolve->l_flags = PATH_NOFLAGS;
+  res = advance(start_dir, resolve, rfp);
+  if (res == NULL) return(NULL);
+
+  /* Copy the directory entry back to user_fullpath */
+  strncpy(resolve->l_path, dir_entry, PATH_MAX);
+
+  return(res);
+}
+
+/*===========================================================================*
+ *				lookup					     *
+ *===========================================================================*/
+PRIVATE int lookup(start_node, resolve, result_node, rfp)
+struct vnode *start_node;
+struct lookup *resolve;
+node_details_t *result_node;
+struct fproc *rfp;
+{
+/* Resolve a path name relative to start_node. */
+
+  int r, symloop;
+  endpoint_t fs_e;
+  size_t path_off, path_left_len;
+  ino_t dir_ino, root_ino;
+  uid_t uid;
+  gid_t gid;
+  struct vnode *dir_vp;
+  struct vmnt *vmp, *vmpres;
+  struct lookup_res res;
+
+  assert(resolve->l_vmp);
+  assert(resolve->l_vnode);
+
+  *(resolve->l_vmp) = vmpres = NULL; /* No vmnt found nor locked yet */
+
+  /* Empty (start) path? */
+  if (resolve->l_path[0] == '\0') {
+	result_node->inode_nr = 0;
+	return(ENOENT);
+  }
+
+  if (!rfp->fp_rd || !rfp->fp_wd) {
+	printf("VFS: lookup %d: no rd/wd\n", rfp->fp_endpoint);
+	return(ENOENT);
+  }
+
+  fs_e = start_node->v_fs_e;
+  dir_ino = start_node->v_inode_nr;
+  vmpres = find_vmnt(fs_e);
+
+  /* Is the process' root directory on the same partition?,
+   * if so, set the chroot directory too. */
+  if (rfp->fp_rd->v_dev == rfp->fp_wd->v_dev)
+	root_ino = rfp->fp_rd->v_inode_nr;
+  else
+	root_ino = 0;
+
+  /* Set user and group ids according to the system call */
+  uid = (call_nr == ACCESS ? rfp->fp_realuid : rfp->fp_effuid);
+  gid = (call_nr == ACCESS ? rfp->fp_realgid : rfp->fp_effgid);
+
+  symloop = 0;	/* Number of symlinks seen so far */
+
+  /* Lock vmnt */
+  if ((r = lock_vmnt(vmpres, resolve->l_vmnt_lock)) != OK) {
+	if (r == EBUSY) /* vmnt already locked */
+		vmpres = NULL;
+  }
+  *(resolve->l_vmp) = vmpres;
+
+  /* Issue the request */
+  r = req_lookup(fs_e, dir_ino, root_ino, uid, gid, resolve, &res, rfp);
+
+  if (r != OK && r != EENTERMOUNT && r != ELEAVEMOUNT && r != ESYMLINK) {
+	if (vmpres) unlock_vmnt(vmpres);
+	*(resolve->l_vmp) = NULL;
+	return(r); /* i.e., an error occured */
+  }
+
+  /* While the response is related to mount control set the
+   * new requests respectively */
+  while (r == EENTERMOUNT || r == ELEAVEMOUNT || r == ESYMLINK) {
+	/* Update user_fullpath to reflect what's left to be parsed. */
+	path_off = res.char_processed;
+	path_left_len = strlen(&resolve->l_path[path_off]);
+	memmove(resolve->l_path, &resolve->l_path[path_off], path_left_len);
+	resolve->l_path[path_left_len] = '\0'; /* terminate string */
+
+	/* Update the current value of the symloop counter */
+	symloop += res.symloop;
+	if (symloop > SYMLOOP_MAX) {
+		if (vmpres) unlock_vmnt(vmpres);
+		*(resolve->l_vmp) = NULL;
+		return(ELOOP);
+	}
+
+	/* Symlink encountered with absolute path */
+	if (r == ESYMLINK) {
+		dir_vp = rfp->fp_rd;
+		vmp = NULL;
+	} else if (r == EENTERMOUNT) {
+		/* Entering a new partition */
+		dir_vp = 0;
+		/* Start node is now the mounted partition's root node */
+		for (vmp = &vmnt[0]; vmp != &vmnt[NR_MNTS]; ++vmp) {
+			if (vmp->m_dev != NO_DEV && vmp->m_mounted_on) {
+			   if (vmp->m_mounted_on->v_inode_nr == res.inode_nr &&
+			       vmp->m_mounted_on->v_fs_e == res.fs_e) {
+				dir_vp = vmp->m_root_node;
+				break;
+			   }
+			}
+		}
+		assert(dir_vp);
+	} else {
+		/* Climbing up mount */
+		/* Find the vmnt that represents the partition on
+		 * which we "climb up". */
+		if ((vmp = find_vmnt(res.fs_e)) == NULL) {
+			panic("VFS lookup: can't find parent vmnt");
+		}
+
+		/* Make sure that the child FS does not feed a bogus path
+		 * to the parent FS. That is, when we climb up the tree, we
+		 * must've encountered ".." in the path, and that is exactly
+		 * what we're going to feed to the parent */
+		if(strncmp(resolve->l_path, "..", 2) != 0 ||
+		   (resolve->l_path[2] != '\0' && resolve->l_path[2] != '/')) {
+			printf("VFS: bogus path: %s\n", resolve->l_path);
+			if (vmpres) unlock_vmnt(vmpres);
+			*(resolve->l_vmp) = NULL;
+			return(ENOENT);
+		}
+
+		/* Start node is the vnode on which the partition is
+		 * mounted */
+		dir_vp = vmp->m_mounted_on;
+	}
+
+	/* Set the starting directories inode number and FS endpoint */
+	fs_e = dir_vp->v_fs_e;
+	dir_ino = dir_vp->v_inode_nr;
+
+	/* Is the process' root directory on the same partition?,
+	 * if so, set the chroot directory too. */
+	if (dir_vp->v_dev == rfp->fp_rd->v_dev)
+		root_ino = rfp->fp_rd->v_inode_nr;
+	else
+		root_ino = 0;
+
+	/* Unlock a previously locked vmnt if locked and lock new vmnt */
+	if (vmpres) unlock_vmnt(vmpres);
+	vmpres = find_vmnt(fs_e);
+	if ((r = lock_vmnt(vmpres, resolve->l_vmnt_lock)) != OK) {
+		if (r == EBUSY)
+			vmpres = NULL;	/* Already locked */
+	}
+	*(resolve->l_vmp) = vmpres;
+
+	r = req_lookup(fs_e, dir_ino, root_ino, uid, gid, resolve, &res, rfp);
+
+	if (r != OK && r != EENTERMOUNT && r != ELEAVEMOUNT && r != ESYMLINK) {
+		if (vmpres) unlock_vmnt(vmpres);
+		*(resolve->l_vmp) = NULL;
+		return(r);
+	}
+  }
+
+  /* Fill in response fields */
+  result_node->inode_nr = res.inode_nr;
+  result_node->fmode = res.fmode;
+  result_node->fsize = res.fsize;
+  result_node->dev = res.dev;
+  result_node->fs_e = res.fs_e;
+  result_node->uid = res.uid;
+  result_node->gid = res.gid;
+
+  return(r);
+}
+
+/*===========================================================================*
+ *				lookup_init				     *
+ *===========================================================================*/
+PUBLIC void lookup_init(resolve, path, flags, vmp, vp)
+struct lookup *resolve;
+char *path;
+int flags;
+struct vmnt **vmp;
+struct vnode **vp;
+{
+  assert(vmp != NULL);
+  assert(vp != NULL);
+
+  resolve->l_path = path;
+  resolve->l_flags = flags;
+  resolve->l_vmp = vmp;
+  resolve->l_vnode = vp;
+  resolve->l_vmnt_lock = TLL_NONE;
+  resolve->l_vnode_lock = TLL_NONE;
+  *vmp = NULL;	/* Initialize lookup result to NULL */
+  *vp = NULL;
+}
+
+/*===========================================================================*
+ *				get_name				     *
+ *===========================================================================*/
+PUBLIC int get_name(dirp, entry, ename)
+struct vnode *dirp;
+struct vnode *entry;
+char ename[NAME_MAX + 1];
+{
+  u64_t pos, new_pos;
+  int r, consumed, totalbytes;
+  char buf[(sizeof(struct dirent) + NAME_MAX) * 8];
+  struct dirent *cur;
+
+  pos = make64(0, 0);
+
+  if ((dirp->v_mode & I_TYPE) != I_DIRECTORY) {
+	return(EBADF);
+  }
+
+  do {
+	r = req_getdents(dirp->v_fs_e, dirp->v_inode_nr, pos, buf, sizeof(buf),
+			 &new_pos, 1);
+
+	if (r == 0) {
+		return(ENOENT); /* end of entries -- matching inode !found */
+	} else if (r < 0) {
+		return(r); /* error */
+	}
+
+	consumed = 0; /* bytes consumed */
+	totalbytes = r; /* number of bytes to consume */
+
+	do {
+		cur = (struct dirent *) (buf + consumed);
+		if (entry->v_inode_nr == cur->d_ino) {
+			/* found the entry we were looking for */
+			strncpy(ename, cur->d_name, NAME_MAX);
+			ename[NAME_MAX] = '\0';
+			return(OK);
+		}
+
+		/* not a match -- move on to the next dirent */
+		consumed += cur->d_reclen;
+	} while (consumed < totalbytes);
+
+	pos = new_pos;
+  } while (1);
+}
+
+/*===========================================================================*
+ *				canonical_path				     *
+ *===========================================================================*/
+PUBLIC int canonical_path(orig_path, canon_path, rfp)
+char *orig_path;
+char canon_path[PATH_MAX+1]; /* should have length PATH_MAX+1 */
+struct fproc *rfp;
+{
+  int len = 0;
+  int r, symloop = 0;
+  struct vnode *dir_vp, *parent_dir;
+  struct vmnt *dir_vmp, *parent_vmp;
+  char component[NAME_MAX+1];
+  char link_path[PATH_MAX+1];
+  char temp_path[PATH_MAX+1];
+  struct lookup resolve;
+
+  dir_vp = NULL;
+  strncpy(temp_path, orig_path, PATH_MAX);
+
+  do {
+	if (dir_vp) {
+		unlock_vnode(dir_vp);
+		unlock_vmnt(dir_vmp);
+		put_vnode(dir_vp);
+	}
+
+	/* Resolve to the last directory holding the file */
+	lookup_init(&resolve, temp_path, PATH_NOFLAGS, &dir_vmp, &dir_vp);
+	resolve.l_vmnt_lock = VMNT_READ;
+	resolve.l_vnode_lock = VNODE_READ;
+	if ((dir_vp = last_dir(&resolve, rfp)) == NULL) return(err_code);
+
+	/* dir_vp points to dir and resolve path now contains only the
+	 * filename.
+	 */
+	strcpy(canon_path, resolve.l_path); /* Store file name */
+
+	/* check if the file is a symlink, if so resolve it */
+	r = rdlink_direct(canon_path, link_path, rfp);
+	if (r <= 0) {
+		strcpy(temp_path, canon_path);
+		break;
+	}
+
+	/* encountered a symlink -- loop again */
+	strcpy(temp_path, link_path);
+
+	symloop++;
+  } while (symloop < SYMLOOP_MAX);
+
+  if (symloop >= SYMLOOP_MAX) {
+	if (dir_vp) {
+		unlock_vnode(dir_vp);
+		unlock_vmnt(dir_vmp);
+		put_vnode(dir_vp);
+	}
+	return(ELOOP);
+  }
+
+  while(dir_vp != rfp->fp_rd) {
+
+	strcpy(temp_path, "..");
+
+	/* check if we're at the root node of the file system */
+	if (dir_vp->v_vmnt->m_root_node == dir_vp) {
+		unlock_vnode(dir_vp);
+		unlock_vmnt(dir_vmp);
+		put_vnode(dir_vp);
+		dir_vp = dir_vp->v_vmnt->m_mounted_on;
+		dir_vmp = dir_vp->v_vmnt;
+		assert(lock_vmnt(dir_vmp, VMNT_READ) == OK);
+		assert(lock_vnode(dir_vp, VNODE_READ) == OK);
+		dup_vnode(dir_vp);
+	}
+
+	lookup_init(&resolve, temp_path, PATH_NOFLAGS, &parent_vmp,
+		    &parent_dir);
+	resolve.l_vmnt_lock = VMNT_READ;
+	resolve.l_vnode_lock = VNODE_READ;
+
+	if ((parent_dir = advance(dir_vp, &resolve, rfp)) == NULL) {
+		unlock_vnode(dir_vp);
+		unlock_vmnt(dir_vmp);
+		put_vnode(dir_vp);
+		return(err_code);
+	}
+
+	/* now we have to retrieve the name of the parent directory */
+	if (get_name(parent_dir, dir_vp, component) != OK) {
+		unlock_vnode(parent_dir);
+		unlock_vmnt(parent_vmp);
+		unlock_vnode(dir_vp);
+		unlock_vmnt(dir_vmp);
+		put_vnode(parent_dir);
+		put_vnode(dir_vp);
+		return(ENOENT);
+	}
+
+	len += strlen(component) + 1;
+	if (len > PATH_MAX) {
+		/* adding the component to canon_path would exceed PATH_MAX */
+		unlock_vnode(parent_dir);
+		unlock_vmnt(parent_vmp);
+		unlock_vnode(dir_vp);
+		unlock_vmnt(dir_vmp);
+		put_vnode(parent_dir);
+		put_vnode(dir_vp);
+		return(ENOMEM);
+	}
+
+	/* store result of component in canon_path */
+
+	/* first make space by moving the contents of canon_path to
+	 * the right. Move strlen + 1 bytes to include the terminating '\0'.
+	 */
+	memmove(canon_path+strlen(component)+1, canon_path,
+						strlen(canon_path) + 1);
+
+	/* Copy component into canon_path */
+	memmove(canon_path, component, strlen(component));
+
+	/* Put slash into place */
+	canon_path[strlen(component)] = '/';
+
+	/* Store parent_dir result, and continue the loop once more */
+	unlock_vnode(dir_vp);
+	unlock_vmnt(dir_vmp);
+	put_vnode(dir_vp);
+	dir_vp = parent_dir;
+  }
+
+  unlock_vnode(dir_vp);
+  unlock_vmnt(parent_vmp);
+
+  put_vnode(dir_vp);
+
+  /* add the leading slash */
+  if (strlen(canon_path) >= PATH_MAX) return(ENAMETOOLONG);
+  memmove(canon_path+1, canon_path, strlen(canon_path));
+  canon_path[0] = '/';
+
+  return(OK);
+}
+
+/*===========================================================================*
+ *				check_perms				     *
+ *===========================================================================*/
+PRIVATE int check_perms(ep, io_gr, pathlen)
+endpoint_t ep;
+cp_grant_id_t io_gr;
+size_t pathlen;
+{
+  int r, slot;
+  struct vnode *vp;
+  struct vmnt *vmp;
+  struct fproc *rfp;
+  char orig_path[PATH_MAX+1];
+  char canon_path[PATH_MAX+1];
+  char temp_path[PATH_MAX+1];
+  struct lookup resolve;
+
+  if (isokendpt(ep, &slot) != OK) return(EINVAL);
+  if (pathlen < UNIX_PATH_MAX || pathlen > PATH_MAX) return(EINVAL);
+
+  rfp = &(fproc[slot]);
+  memset(canon_path, '\0', PATH_MAX+1);
+
+  r = sys_safecopyfrom(PFS_PROC_NR, io_gr, (vir_bytes) 0,
+				(vir_bytes) temp_path, pathlen, D);
+  if (r != OK) return(r);
+
+  temp_path[pathlen] = '\0';
+
+  /* save path from pfs before permissions checking modifies it */
+  memcpy(orig_path, temp_path, PATH_MAX+1);
+
+  /* get the canonical path to the socket file */
+  if ((r = canonical_path(orig_path, canon_path, rfp)) != OK)
+	return(r);
+
+  if (strlen(canon_path) >= pathlen) return(ENAMETOOLONG);
+
+  /* copy canon_path back to PFS */
+  r = sys_safecopyto(PFS_PROC_NR, (cp_grant_id_t) io_gr, (vir_bytes) 0,
+				(vir_bytes) canon_path, strlen(canon_path)+1,
+				D);
+  if (r != OK) return(r);
+
+  /* reload user_fullpath for permissions checking */
+  memcpy(temp_path, orig_path, PATH_MAX+1);
+  lookup_init(&resolve, temp_path, PATH_NOFLAGS, &vmp, &vp);
+  resolve.l_vmnt_lock = VMNT_READ;
+  resolve.l_vnode_lock = VNODE_READ;
+
+  if ((vp = eat_path(&resolve, rfp)) == NULL) return(err_code);
+
+  /* check permissions */
+  r = forbidden(vp, (R_BIT | W_BIT));
+
+  unlock_vnode(vp);
+  unlock_vmnt(vmp);
+
+  put_vnode(vp);
+  return(r);
+}
+
+/*===========================================================================*
+ *				do_check_perms				     *
+ *===========================================================================*/
+PUBLIC int do_check_perms(void)
+{
+  return check_perms(m_in.USER_ENDPT, (cp_grant_id_t) m_in.IO_GRANT,
+		     (size_t) m_in.COUNT);
+}
diff --git a/servers/avfs/path.h b/servers/avfs/path.h
new file mode 100644
index 000000000..0ba68b349
--- /dev/null
+++ b/servers/avfs/path.h
@@ -0,0 +1,13 @@
+#ifndef __VFS_PATH_H__
+#define __VFS_PATH_H__
+
+struct lookup {
+  char *l_path;			/* Path to lookup */
+  int l_flags;			/* VFS/FS flags (see <minix/vfsif.h>) */
+  tll_access_t l_vmnt_lock;	/* Lock to obtain on vmnt */
+  tll_access_t l_vnode_lock;	/* Lock to obtain on vnode */
+  struct vmnt **l_vmp;		/* vmnt object that was locked */
+  struct vnode **l_vnode;	/* vnode object that was locked */
+};
+
+#endif
diff --git a/servers/avfs/pipe.c b/servers/avfs/pipe.c
new file mode 100644
index 000000000..de2128069
--- /dev/null
+++ b/servers/avfs/pipe.c
@@ -0,0 +1,637 @@
+/* This file deals with the suspension and revival of processes.  A process can
+ * be suspended because it wants to read or write from a pipe and can't, or
+ * because it wants to read or write from a special file and can't.  When a
+ * process can't continue it is suspended, and revived later when it is able
+ * to continue.
+ *
+ * The entry points into this file are
+ *   do_pipe:	  perform the PIPE system call
+ *   pipe_check:  check to see that a read or write on a pipe is feasible now
+ *   suspend:	  suspend a process that cannot do a requested read or write
+ *   release:	  check to see if a suspended process can be released and do
+ *                it
+ *   revive:	  mark a suspended process as able to run again
+ *   unsuspend_by_endpt: revive all processes blocking on a given process
+ *   do_unpause:  a signal has been sent to a process; see if it suspended
+ */
+
+#include "fs.h"
+#include <fcntl.h>
+#include <signal.h>
+#include <assert.h>
+#include <minix/callnr.h>
+#include <minix/endpoint.h>
+#include <minix/com.h>
+#include <minix/u64.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include "file.h"
+#include "fproc.h"
+#include "dmap.h"
+#include "param.h"
+#include "select.h"
+#include <minix/vfsif.h>
+#include "vnode.h"
+#include "vmnt.h"
+
+
+/*===========================================================================*
+ *				do_pipe					     *
+ *===========================================================================*/
+PUBLIC int do_pipe()
+{
+/* Perform the pipe(fil_des) system call. */
+
+  register struct fproc *rfp;
+  int r;
+  struct filp *fil_ptr0, *fil_ptr1;
+  int fil_des[2];		/* reply goes here */
+  struct vnode *vp;
+  struct vmnt *vmp;
+  struct node_details res;
+
+  /* See if a free vnode is available */
+  if ((vp = get_free_vnode()) == NULL) return(err_code);
+  lock_vnode(vp, VNODE_OPCL);
+
+  /* Get a lock on PFS */
+  if ((vmp = find_vmnt(PFS_PROC_NR)) == NULL) panic("PFS gone");
+  lock_vmnt(vmp, VMNT_WRITE);
+
+  /* Acquire two file descriptors. */
+  rfp = fp;
+  if ((r = get_fd(0, R_BIT, &fil_des[0], &fil_ptr0)) != OK) {
+	unlock_vnode(vp);
+	unlock_vmnt(vmp);
+	return(r);
+  }
+  rfp->fp_filp[fil_des[0]] = fil_ptr0;
+  FD_SET(fil_des[0], &rfp->fp_filp_inuse);
+  fil_ptr0->filp_count = 1;		/* mark filp in use */
+  if ((r = get_fd(0, W_BIT, &fil_des[1], &fil_ptr1)) != OK) {
+	rfp->fp_filp[fil_des[0]] = NULL;
+	FD_CLR(fil_des[0], &rfp->fp_filp_inuse);
+	fil_ptr0->filp_count = 0;	/* mark filp free */
+	unlock_filp(fil_ptr0);
+	unlock_vnode(vp);
+	unlock_vmnt(vmp);
+	return(r);
+  }
+  rfp->fp_filp[fil_des[1]] = fil_ptr1;
+  FD_SET(fil_des[1], &rfp->fp_filp_inuse);
+  fil_ptr1->filp_count = 1;
+
+  /* Create a named pipe inode on PipeFS */
+  r = req_newnode(PFS_PROC_NR, fp->fp_effuid, fp->fp_effgid, I_NAMED_PIPE,
+		  NO_DEV, &res);
+
+  if (r != OK) {
+	rfp->fp_filp[fil_des[0]] = NULL;
+	FD_CLR(fil_des[0], &rfp->fp_filp_inuse);
+	fil_ptr0->filp_count = 0;
+	rfp->fp_filp[fil_des[1]] = NULL;
+	FD_CLR(fil_des[1], &rfp->fp_filp_inuse);
+	fil_ptr1->filp_count = 0;
+	unlock_filp(fil_ptr1);
+	unlock_filp(fil_ptr0);
+	unlock_vnode(vp);
+	unlock_vmnt(vmp);
+	return(r);
+  }
+
+  /* Fill in vnode */
+  vp->v_fs_e = res.fs_e;
+  vp->v_mapfs_e = res.fs_e;
+  vp->v_inode_nr = res.inode_nr;
+  vp->v_mapinode_nr = res.inode_nr;
+  vp->v_mode = res.fmode;
+  vp->v_pipe = I_PIPE;
+  vp->v_pipe_rd_pos= 0;
+  vp->v_pipe_wr_pos= 0;
+  vp->v_fs_count = 1;
+  vp->v_mapfs_count = 1;
+  vp->v_ref_count = 1;
+  vp->v_size = 0;
+  vp->v_vmnt = NULL;
+  vp->v_dev = NO_DEV;
+
+  /* Fill in filp objects */
+  fil_ptr0->filp_vno = vp;
+  dup_vnode(vp);
+  fil_ptr1->filp_vno = vp;
+  fil_ptr0->filp_flags = O_RDONLY;
+  fil_ptr1->filp_flags = O_WRONLY;
+
+  m_out.reply_i1 = fil_des[0];
+  m_out.reply_i2 = fil_des[1];
+
+  unlock_filps(fil_ptr0, fil_ptr1);
+  unlock_vmnt(vmp);
+
+  return(OK);
+}
+
+
+/*===========================================================================*
+ *				map_vnode				     *
+ *===========================================================================*/
+PUBLIC int map_vnode(vp, map_to_fs_e)
+struct vnode *vp;
+endpoint_t map_to_fs_e;
+{
+  int r;
+  struct vmnt *vmp;
+  struct node_details res;
+
+  if(vp->v_mapfs_e != NONE) return(OK);	/* Already mapped; nothing to do. */
+
+  if ((vmp = find_vmnt(map_to_fs_e)) == NULL)
+	panic("Can't map to unknown endpoint");
+  if (lock_vmnt(vmp, VMNT_WRITE) == EBUSY)
+	vmp = NULL;	/* Already locked, do not unlock */
+
+  /* Create a temporary mapping of this inode to another FS. Read and write
+   * operations on data will be handled by that FS. The rest by the 'original'
+   * FS that holds the inode. */
+  if ((r = req_newnode(map_to_fs_e, fp->fp_effuid, fp->fp_effgid, I_NAMED_PIPE,
+		       vp->v_dev, &res)) == OK) {
+	vp->v_mapfs_e = res.fs_e;
+	vp->v_mapinode_nr = res.inode_nr;
+	vp->v_mapfs_count = 1;
+  }
+
+  if (vmp) unlock_vmnt(vmp);
+
+  return(r);
+}
+
+/*===========================================================================*
+ *				pipe_check				     *
+ *===========================================================================*/
+PUBLIC int pipe_check(vp, rw_flag, oflags, bytes, position, notouch)
+register struct vnode *vp;	/* the inode of the pipe */
+int rw_flag;			/* READING or WRITING */
+int oflags;			/* flags set by open or fcntl */
+register int bytes;		/* bytes to be read or written (all chunks) */
+u64_t position;			/* current file position */
+int notouch;			/* check only */
+{
+/* Pipes are a little different.  If a process reads from an empty pipe for
+ * which a writer still exists, suspend the reader.  If the pipe is empty
+ * and there is no writer, return 0 bytes.  If a process is writing to a
+ * pipe and no one is reading from it, give a broken pipe error.
+ */
+  off_t pos;
+  int r = OK;
+
+  if (ex64hi(position) != 0)
+	panic("pipe_check: position too large in pipe");
+  pos = ex64lo(position);
+
+  /* If reading, check for empty pipe. */
+  if (rw_flag == READING) {
+	if (pos >= vp->v_size) {
+		/* Process is reading from an empty pipe. */
+		if (find_filp(vp, W_BIT) != NULL) {
+			/* Writer exists */
+			if (oflags & O_NONBLOCK)
+				r = EAGAIN;
+			else
+				r = SUSPEND;
+
+			/* If need be, activate sleeping writers. */
+			if (susp_count > 0)
+				release(vp, WRITE, susp_count);
+		}
+		return(r);
+	}
+	return(bytes);
+  }
+
+  /* Process is writing to a pipe. */
+  if (find_filp(vp, R_BIT) == NULL) {
+	/* Process is writing, but there is no reader. Tell kernel to generate
+	 * a SIGPIPE signal. */
+	if (!notouch) sys_kill(fp->fp_endpoint, SIGPIPE);
+
+	return(EPIPE);
+  }
+
+  /* Calculate how many bytes can be written. */
+  if (pos + bytes > PIPE_BUF) {
+	if (oflags & O_NONBLOCK) {
+		if (bytes <= PIPE_BUF) {
+			/* Write has to be atomic */
+			return(EAGAIN);
+		}
+
+		/* Compute available space */
+		bytes = PIPE_BUF - pos;
+
+		if (bytes > 0)  {
+			/* Do a partial write. Need to wakeup reader */
+			if (!notouch)
+				release(vp, READ, susp_count);
+			return(bytes);
+		} else {
+			/* Pipe is full */
+			return(EAGAIN);
+		}
+	}
+
+	if (bytes > PIPE_BUF) {
+		/* Compute available space */
+		bytes = PIPE_BUF - pos;
+
+		if (bytes > 0) {
+			/* Do a partial write. Need to wakeup reader
+			 * since we'll suspend ourself in read_write()
+			 */
+			if (!notouch)
+				release(vp, READ, susp_count);
+			return(bytes);
+		}
+	}
+
+	/* Pipe is full */
+	return(SUSPEND);
+  }
+
+  /* Writing to an empty pipe.  Search for suspended reader. */
+  if (pos == 0 && !notouch)
+	release(vp, READ, susp_count);
+
+  /* Requested amount fits */
+  return(bytes);
+}
+
+
+/*===========================================================================*
+ *				suspend					     *
+ *===========================================================================*/
+PUBLIC void suspend(int why)
+{
+/* Take measures to suspend the processing of the present system call.
+ * Store the parameters to be used upon resuming in the process table.
+ * (Actually they are not used when a process is waiting for an I/O device,
+ * but they are needed for pipes, and it is not worth making the distinction.)
+ * The SUSPEND pseudo error should be returned after calling suspend().
+ */
+
+#if DO_SANITYCHECKS
+  if (why == FP_BLOCKED_ON_PIPE)
+	panic("suspend: called for FP_BLOCKED_ON_PIPE");
+
+  if(fp_is_blocked(fp))
+	panic("suspend: called for suspended process");
+
+  if(why == FP_BLOCKED_ON_NONE)
+	panic("suspend: called for FP_BLOCKED_ON_NONE");
+#endif
+
+  if (why == FP_BLOCKED_ON_POPEN)
+	  /* #procs susp'ed on pipe*/
+	  susp_count++;
+
+  fp->fp_blocked_on = why;
+  assert(fp->fp_grant == GRANT_INVALID || !GRANT_VALID(fp->fp_grant));
+  fp->fp_block_fd = m_in.fd;
+  fp->fp_block_callnr = call_nr;
+  fp->fp_flags &= ~FP_SUSP_REOPEN;		/* Clear this flag. The caller
+						 * can set it when needed.
+						 */
+  if (why == FP_BLOCKED_ON_LOCK) {
+	fp->fp_buffer = (char *) m_in.name1;	/* third arg to fcntl() */
+	fp->fp_nbytes = m_in.request;		/* second arg to fcntl() */
+  } else {
+	fp->fp_buffer = m_in.buffer;		/* for reads and writes */
+	fp->fp_nbytes = m_in.nbytes;
+  }
+}
+
+/*===========================================================================*
+ *				wait_for				     *
+ *===========================================================================*/
+PUBLIC void wait_for(endpoint_t who)
+{
+  if(who == NONE || who == ANY)
+	panic("suspend on NONE or ANY");
+  suspend(FP_BLOCKED_ON_OTHER);
+  fp->fp_task = who;
+}
+
+
+/*===========================================================================*
+ *				pipe_suspend					     *
+ *===========================================================================*/
+PUBLIC void pipe_suspend(rw_flag, fd_nr, buf, size)
+int rw_flag;
+int fd_nr;
+char *buf;
+size_t size;
+{
+/* Take measures to suspend the processing of the present system call.
+ * Store the parameters to be used upon resuming in the process table.
+ * (Actually they are not used when a process is waiting for an I/O device,
+ * but they are needed for pipes, and it is not worth making the distinction.)
+ * The SUSPEND pseudo error should be returned after calling suspend().
+ */
+#if DO_SANITYCHECKS
+  if(fp_is_blocked(fp))
+	panic("pipe_suspend: called for suspended process");
+#endif
+
+  susp_count++;					/* #procs susp'ed on pipe*/
+  fp->fp_blocked_on = FP_BLOCKED_ON_PIPE;
+  assert(!GRANT_VALID(fp->fp_grant));
+  fp->fp_block_fd = fd_nr;
+  fp->fp_block_callnr = ((rw_flag == READING) ? READ : WRITE);
+  fp->fp_buffer = buf;
+  fp->fp_nbytes = size;
+}
+
+
+/*===========================================================================*
+ *				unsuspend_by_endpt			     *
+ *===========================================================================*/
+PUBLIC void unsuspend_by_endpt(endpoint_t proc_e)
+{
+/* Revive processes waiting for drivers (SUSPENDed) that have disappeared with
+ * return code EAGAIN.
+ */
+  struct fproc *rp;
+
+  for (rp = &fproc[0]; rp < &fproc[NR_PROCS]; rp++) {
+	if (rp->fp_pid == PID_FREE) continue;
+	if (rp->fp_blocked_on == FP_BLOCKED_ON_OTHER && rp->fp_task == proc_e)
+		revive(rp->fp_endpoint, EAGAIN);
+  }
+
+  /* Revive processes waiting in drivers on select()s with EAGAIN too */
+  select_unsuspend_by_endpt(proc_e);
+
+  return;
+}
+
+
+/*===========================================================================*
+ *				release					     *
+ *===========================================================================*/
+PUBLIC void release(vp, op, count)
+register struct vnode *vp;	/* inode of pipe */
+int op;				/* READ, WRITE, OPEN or CREAT */
+int count;			/* max number of processes to release */
+{
+/* Check to see if any process is hanging on the pipe whose inode is in 'ip'.
+ * If one is, and it was trying to perform the call indicated by 'call_nr',
+ * release it.
+ */
+
+  register struct fproc *rp;
+  struct filp *f;
+  int selop;
+
+  /* Trying to perform the call also includes SELECTing on it with that
+   * operation.
+   */
+  if (op == READ || op == WRITE) {
+	if (op == READ)
+		selop = SEL_RD;
+	else
+		selop = SEL_WR;
+
+	for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
+		if (f->filp_count < 1 || !(f->filp_pipe_select_ops & selop) ||
+		    f->filp_vno != vp)
+			continue;
+		select_callback(f, selop);
+		f->filp_pipe_select_ops &= ~selop;
+	}
+  }
+
+  /* Search the proc table. */
+  for (rp = &fproc[0]; rp < &fproc[NR_PROCS] && count > 0; rp++) {
+	if (rp->fp_pid != PID_FREE && fp_is_blocked(rp) &&
+	    !(rp->fp_flags & FP_REVIVED) && rp->fp_block_callnr == op &&
+	    rp->fp_filp[rp->fp_block_fd] != NULL &&
+	    rp->fp_filp[rp->fp_block_fd]->filp_vno == vp) {
+		revive(rp->fp_endpoint, 0);
+		susp_count--;	/* keep track of who is suspended */
+		if(susp_count < 0)
+			panic("susp_count now negative: %d", susp_count);
+		if (--count == 0) return;
+	}
+  }
+}
+
+
+/*===========================================================================*
+ *				revive					     *
+ *===========================================================================*/
+PUBLIC void revive(proc_nr_e, returned)
+int proc_nr_e;			/* process to revive */
+int returned;			/* if hanging on task, how many bytes read */
+{
+/* Revive a previously blocked process. When a process hangs on tty, this
+ * is the way it is eventually released.
+ */
+  register struct fproc *rfp;
+  int blocked_on;
+  int fd_nr, slot;
+  struct filp *fil_ptr;
+
+  if (proc_nr_e == NONE || isokendpt(proc_nr_e, &slot) != OK) return;
+
+  rfp = &fproc[slot];
+  if (!fp_is_blocked(rfp) || (rfp->fp_flags & FP_REVIVED)) return;
+
+  /* The 'reviving' flag only applies to pipes.  Processes waiting for TTY get
+   * a message right away.  The revival process is different for TTY and pipes.
+   * For select and TTY revival, the work is already done, for pipes it is not:
+   * the proc must be restarted so it can try again.
+   */
+  blocked_on = rfp->fp_blocked_on;
+  if (blocked_on == FP_BLOCKED_ON_PIPE || blocked_on == FP_BLOCKED_ON_LOCK) {
+	/* Revive a process suspended on a pipe or lock. */
+	rfp->fp_flags |= FP_REVIVED;
+	reviving++;		/* process was waiting on pipe or lock */
+  } else if (blocked_on == FP_BLOCKED_ON_DOPEN) {
+	rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
+	fd_nr = rfp->fp_block_fd;
+	if (returned < 0) {
+		fil_ptr = rfp->fp_filp[fd_nr];
+		lock_filp(fil_ptr, VNODE_OPCL);
+		rfp->fp_filp[fd_nr] = NULL;
+		FD_CLR(fd_nr, &rfp->fp_filp_inuse);
+		if (fil_ptr->filp_count != 1) {
+			panic("VFS: revive: bad count in filp: %d",
+				fil_ptr->filp_count);
+		}
+		fil_ptr->filp_count = 0;
+		unlock_filp(fil_ptr);
+		put_vnode(fil_ptr->filp_vno);
+		fil_ptr->filp_vno = NULL;
+		reply(proc_nr_e, returned);
+	} else {
+		reply(proc_nr_e, fd_nr);
+	}
+  } else {
+	rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
+	if (blocked_on == FP_BLOCKED_ON_POPEN) {
+		/* process blocked in open or create */
+		reply(proc_nr_e, rfp->fp_block_fd);
+	} else if (blocked_on == FP_BLOCKED_ON_SELECT) {
+		reply(proc_nr_e, returned);
+	} else {
+		/* Revive a process suspended on TTY or other device.
+		 * Pretend it wants only what there is.
+		 */
+		rfp->fp_nbytes = returned;
+		/* If a grant has been issued by FS for this I/O, revoke
+		 * it again now that I/O is done.
+		 */
+		if (GRANT_VALID(rfp->fp_grant)) {
+			if(cpf_revoke(rfp->fp_grant)) {
+				panic("VFS: revoke failed for grant: %d",
+					rfp->fp_grant);
+			}
+			rfp->fp_grant = GRANT_INVALID;
+		}
+		reply(proc_nr_e, returned);	/* unblock the process */
+	}
+  }
+}
+
+
+/*===========================================================================*
+ *				unpause					     *
+ *===========================================================================*/
+PUBLIC void unpause(proc_nr_e)
+int proc_nr_e;
+{
+/* A signal has been sent to a user who is paused on the file system.
+ * Abort the system call with the EINTR error message.
+ */
+
+  register struct fproc *rfp, *org_fp;
+  int slot, blocked_on, fild, status = EINTR, major_dev, minor_dev;
+  struct filp *f;
+  dev_t dev;
+  message mess;
+  int wasreviving = 0;
+
+  if (isokendpt(proc_nr_e, &slot) != OK) {
+	printf("VFS: ignoring unpause for bogus endpoint %d\n", proc_nr_e);
+	return;
+  }
+
+  rfp = &fproc[slot];
+  if (!fp_is_blocked(rfp)) return;
+  blocked_on = rfp->fp_blocked_on;
+
+  if (rfp->fp_flags & FP_REVIVED) {
+	rfp->fp_flags &= ~FP_REVIVED;
+	reviving--;
+	wasreviving = 1;
+  }
+
+  switch (blocked_on) {
+	case FP_BLOCKED_ON_PIPE:/* process trying to read or write a pipe */
+		break;
+
+	case FP_BLOCKED_ON_LOCK:/* process trying to set a lock with FCNTL */
+		break;
+
+	case FP_BLOCKED_ON_SELECT:/* process blocking on select() */
+		select_forget(proc_nr_e);
+		break;
+
+	case FP_BLOCKED_ON_POPEN:	/* process trying to open a fifo */
+		break;
+
+	case FP_BLOCKED_ON_DOPEN:/* process trying to open a device */
+		/* Don't cancel OPEN. Just wait until the open completes. */
+		return;
+
+	case FP_BLOCKED_ON_OTHER:/* process trying to do device I/O (e.g. tty)*/
+		if (rfp->fp_flags & FP_SUSP_REOPEN) {
+			/* Process is suspended while waiting for a reopen.
+			 * Just reply EINTR.
+			 */
+			rfp->fp_flags &= ~FP_SUSP_REOPEN;
+			status = EINTR;
+			break;
+		}
+
+		fild = rfp->fp_block_fd;
+		if (fild < 0 || fild >= OPEN_MAX)
+			panic("file descriptor out-of-range");
+		f = rfp->fp_filp[fild];
+		dev = (dev_t) f->filp_vno->v_sdev;	/* device hung on */
+		major_dev = major(dev);
+		minor_dev = minor(dev);
+		mess.TTY_LINE = minor_dev;
+		mess.USER_ENDPT = rfp->fp_ioproc;
+		mess.IO_GRANT = (char *) rfp->fp_grant;
+
+		/* Tell kernel R or W. Mode is from current call, not open. */
+		mess.COUNT = rfp->fp_block_callnr == READ ? R_BIT : W_BIT;
+		mess.m_type = CANCEL;
+
+		org_fp = fp;
+		fp = rfp;	/* hack - ctty_io uses fp */
+		(*dmap[major_dev].dmap_io)(rfp->fp_task, &mess);
+		fp = org_fp;
+		status = mess.REP_STATUS;
+		if (status == SUSPEND)
+			return;		/* Process will be revived at a
+					 * later time.
+					 */
+
+		if (status == EAGAIN) status = EINTR;
+		if (GRANT_VALID(rfp->fp_grant)) {
+			(void) cpf_revoke(rfp->fp_grant);
+			rfp->fp_grant = GRANT_INVALID;
+		}
+		break;
+	default :
+		panic("VFS: unknown block reason: %d", blocked_on);
+  }
+
+  rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
+
+  if ((blocked_on == FP_BLOCKED_ON_PIPE || blocked_on == FP_BLOCKED_ON_POPEN)&&
+	!wasreviving) {
+	susp_count--;
+  }
+
+  reply(proc_nr_e, status);	/* signal interrupted call */
+}
+
+#if DO_SANITYCHECKS
+/*===========================================================================*
+ *				check_pipe			     *
+ *===========================================================================*/
+PUBLIC int check_pipe(void)
+{
+/* Integrity check; verify that susp_count equals what the fproc table thinks
+ * is suspended on a pipe */
+  struct fproc *rfp;
+  int count = 0;
+  for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+	if (rfp->fp_pid == PID_FREE) continue;
+	if ( !(rfp->fp_flags & FP_REVIVED) &&
+	    (rfp->fp_blocked_on == FP_BLOCKED_ON_PIPE ||
+	     rfp->fp_blocked_on == FP_BLOCKED_ON_POPEN)) {
+		count++;
+	}
+  }
+
+  if (count != susp_count) {
+	printf("check_pipe: count %d susp_count %d\n", count, susp_count);
+	return(0);
+  }
+
+  return(l);
+}
+#endif
diff --git a/servers/avfs/protect.c b/servers/avfs/protect.c
new file mode 100644
index 000000000..300a47d31
--- /dev/null
+++ b/servers/avfs/protect.c
@@ -0,0 +1,274 @@
+/* This file deals with protection in the file system.  It contains the code
+ * for four system calls that relate to protection.
+ *
+ * The entry points into this file are
+ *   do_chmod:	perform the CHMOD and FCHMOD system calls
+ *   do_chown:	perform the CHOWN and FCHOWN system calls
+ *   do_umask:	perform the UMASK system call
+ *   do_access:	perform the ACCESS system call
+ */
+
+#include "fs.h"
+#include <unistd.h>
+#include <minix/callnr.h>
+#include "file.h"
+#include "fproc.h"
+#include "path.h"
+#include "param.h"
+#include <minix/vfsif.h>
+#include "vnode.h"
+#include "vmnt.h"
+
+/*===========================================================================*
+ *				do_chmod				     *
+ *===========================================================================*/
+PUBLIC int do_chmod()
+{
+/* Perform the chmod(name, mode) and fchmod(fd, mode) system calls. */
+
+  struct filp *flp;
+  struct vnode *vp;
+  struct vmnt *vmp;
+  int r;
+  mode_t new_mode;
+  char fullpath[PATH_MAX+1];
+  struct lookup resolve;
+
+  flp = NULL;
+
+  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+  resolve.l_vmnt_lock = VMNT_WRITE;
+  resolve.l_vnode_lock = VNODE_WRITE;
+
+  if (call_nr == CHMOD) {
+	/* Temporarily open the file */
+	if (fetch_name(m_in.name, m_in.name_length, M3, fullpath) != OK)
+		return(err_code);
+	if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
+  } else {	/* call_nr == FCHMOD */
+	/* File is already opened; get a pointer to vnode from filp. */
+	if ((flp = get_filp(m_in.fd, VNODE_WRITE)) == NULL)
+		return(err_code);
+	vp = flp->filp_vno;
+	dup_vnode(vp);
+  }
+
+  /* Only the owner or the super_user may change the mode of a file.
+   * No one may change the mode of a file on a read-only file system.
+   */
+  if (vp->v_uid != fp->fp_effuid && fp->fp_effuid != SU_UID)
+	r = EPERM;
+  else
+	r = read_only(vp);
+
+  if (r == OK) {
+	/* Now make the change. Clear setgid bit if file is not in caller's
+	 * group */
+	if (fp->fp_effuid != SU_UID && vp->v_gid != fp->fp_effgid)
+		m_in.mode &= ~I_SET_GID_BIT;
+
+	r = req_chmod(vp->v_fs_e, vp->v_inode_nr, m_in.mode, &new_mode);
+	if (r == OK)
+		vp->v_mode = new_mode;
+  }
+
+  if (call_nr == CHMOD) {
+	unlock_vnode(vp);
+	unlock_vmnt(vmp);
+  } else {	/* FCHMOD */
+	unlock_filp(flp);
+  }
+
+  put_vnode(vp);
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				do_chown				     *
+ *===========================================================================*/
+PUBLIC int do_chown()
+{
+/* Perform the chown(path, owner, group) and fchmod(fd, owner, group) system
+ * calls. */
+  struct filp *flp;
+  struct vnode *vp;
+  struct vmnt *vmp;
+  int r;
+  uid_t uid;
+  gid_t gid;
+  mode_t new_mode;
+  char fullpath[PATH_MAX+1];
+  struct lookup resolve;
+
+  flp = NULL;
+
+  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+  resolve.l_vmnt_lock = VMNT_WRITE;
+  resolve.l_vnode_lock = VNODE_WRITE;
+
+  if (call_nr == CHOWN) {
+	/* Temporarily open the file. */
+	if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
+		return(err_code);
+	if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
+  } else {	/* call_nr == FCHOWN */
+	/* File is already opened; get a pointer to the vnode from filp. */
+	if ((flp = get_filp(m_in.fd, VNODE_WRITE)) == NULL)
+		return(err_code);
+	vp = flp->filp_vno;
+	dup_vnode(vp);
+  }
+
+  r = read_only(vp);
+  if (r == OK) {
+	/* FS is R/W. Whether call is allowed depends on ownership, etc. */
+	/* The super user can do anything, so check permissions only if we're
+	   a regular user. */
+	if (fp->fp_effuid != SU_UID) {
+		/* Regular users can only change groups of their own files. */
+		if (vp->v_uid != fp->fp_effuid) r = EPERM;
+		if (vp->v_uid != m_in.owner) r = EPERM;	/* no giving away */
+		if (fp->fp_effgid != m_in.group) r = EPERM;
+	}
+  }
+
+  if (r == OK) {
+	/* Do not change uid/gid if new uid/gid is -1. */
+	uid = (m_in.owner == (uid_t)-1 ? vp->v_uid : m_in.owner);
+	gid = (m_in.group == (gid_t)-1 ? vp->v_gid : m_in.group);
+	if ((r = req_chown(vp->v_fs_e, vp->v_inode_nr, uid, gid,
+		      &new_mode)) == OK) {
+		vp->v_uid = uid;
+		vp->v_gid = gid;
+		vp->v_mode = new_mode;
+	}
+  }
+
+  if (call_nr == CHOWN) {
+	unlock_vnode(vp);
+	unlock_vmnt(vmp);
+  } else {	/* FCHOWN */
+	unlock_filp(flp);
+  }
+
+  put_vnode(vp);
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				do_umask				     *
+ *===========================================================================*/
+PUBLIC int do_umask()
+{
+/* Perform the umask(co_mode) system call. */
+  register mode_t r;
+
+  r = ~fp->fp_umask;		/* set 'r' to complement of old mask */
+  fp->fp_umask = ~(m_in.co_mode & RWX_MODES);
+  return(r);			/* return complement of old mask */
+}
+
+
+/*===========================================================================*
+ *				do_access				     *
+ *===========================================================================*/
+PUBLIC int do_access()
+{
+/* Perform the access(name, mode) system call. */
+  int r;
+  struct vnode *vp;
+  struct vmnt *vmp;
+  char fullpath[PATH_MAX+1];
+  struct lookup resolve;
+
+  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+  resolve.l_vmnt_lock = VMNT_READ;
+  resolve.l_vnode_lock = VNODE_READ;
+
+  /* First check to see if the mode is correct. */
+  if ( (m_in.mode & ~(R_OK | W_OK | X_OK)) != 0 && m_in.mode != F_OK)
+	return(EINVAL);
+
+  /* Temporarily open the file. */
+  if (fetch_name(m_in.name, m_in.name_length, M3, fullpath) != OK)
+	return(err_code);
+  if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
+
+  r = forbidden(vp, m_in.mode);
+
+  unlock_vnode(vp);
+  unlock_vmnt(vmp);
+
+  put_vnode(vp);
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				forbidden				     *
+ *===========================================================================*/
+PUBLIC int forbidden(struct vnode *vp, mode_t access_desired)
+{
+/* Given a pointer to an vnode, 'vp', and the access desired, determine
+ * if the access is allowed, and if not why not.  The routine looks up the
+ * caller's uid in the 'fproc' table.  If access is allowed, OK is returned
+ * if it is forbidden, EACCES is returned.
+ */
+
+  register mode_t bits, perm_bits;
+  uid_t uid;
+  gid_t gid;
+  int r, shift;
+
+  if (vp->v_uid == (uid_t) -1 || vp->v_gid == (gid_t) -1) return(EACCES);
+
+  /* Isolate the relevant rwx bits from the mode. */
+  bits = vp->v_mode;
+  uid = (call_nr == ACCESS ? fp->fp_realuid : fp->fp_effuid);
+  gid = (call_nr == ACCESS ? fp->fp_realgid : fp->fp_effgid);
+
+  if (uid == SU_UID) {
+	/* Grant read and write permission.  Grant search permission for
+	 * directories.  Grant execute permission (for non-directories) if
+	 * and only if one of the 'X' bits is set.
+	 */
+	if ( (bits & I_TYPE) == I_DIRECTORY ||
+	     bits & ((X_BIT << 6) | (X_BIT << 3) | X_BIT))
+		perm_bits = R_BIT | W_BIT | X_BIT;
+	else
+		perm_bits = R_BIT | W_BIT;
+  } else {
+	if (uid == vp->v_uid) shift = 6;		/* owner */
+	else if (gid == vp->v_gid) shift = 3;		/* group */
+	else if (in_group(fp, vp->v_gid) == OK) shift = 3; /* suppl. groups */
+	else shift = 0;					/* other */
+	perm_bits = (bits >> shift) & (R_BIT | W_BIT | X_BIT);
+  }
+
+  /* If access desired is not a subset of what is allowed, it is refused. */
+  r = OK;
+  if ((perm_bits | access_desired) != perm_bits) r = EACCES;
+
+  /* Check to see if someone is trying to write on a file system that is
+   * mounted read-only.
+   */
+  if (r == OK)
+	if (access_desired & W_BIT)
+		r = read_only(vp);
+
+  return(r);
+}
+
+/*===========================================================================*
+ *				read_only				     *
+ *===========================================================================*/
+PUBLIC int read_only(vp)
+struct vnode *vp;		/* ptr to inode whose file sys is to be cked */
+{
+/* Check to see if the file system on which the inode 'ip' resides is mounted
+ * read only.  If so, return EROFS, else return OK.
+ */
+  return((vp->v_vmnt->m_flags & VMNT_READONLY) ? EROFS : OK);
+}
diff --git a/servers/avfs/proto.h b/servers/avfs/proto.h
new file mode 100644
index 000000000..31f7ca2b9
--- /dev/null
+++ b/servers/avfs/proto.h
@@ -0,0 +1,372 @@
+#ifndef __VFS_PROTO_H__
+#define __VFS_PROTO_H__
+
+/* Function prototypes. */
+
+#include "timers.h"
+#include "request.h"
+#include "tll.h"
+#include "threads.h"
+#include <minix/rs.h>
+
+/* Structs used in prototypes must be declared as such first. */
+struct filp;
+struct fproc;
+struct vmnt;
+struct vnode;
+struct lookup;
+struct worker_thread;
+struct job;
+
+typedef struct filp * filp_id_t;
+
+/* comm.c */
+_PROTOTYPE(int fs_sendrec, (endpoint_t fs_e, message *reqm)		);
+_PROTOTYPE(void fs_sendmore, (struct vmnt *vmp)				);
+_PROTOTYPE(void send_work, (void)					);
+
+/* device.c */
+_PROTOTYPE( int dev_open, (dev_t dev, endpoint_t proc_e, int flags)	);
+_PROTOTYPE( int dev_reopen, (dev_t dev, int filp_no, int flags)		);
+_PROTOTYPE( int dev_close, (dev_t dev, int filp_no)			);
+_PROTOTYPE( int dev_io, (int op, dev_t dev, endpoint_t proc_e, void *buf,
+		u64_t pos, size_t bytes, int flags, int suspend_reopen)	);
+_PROTOTYPE( int gen_opcl, (int op, dev_t dev, endpoint_t task_nr, int flags));
+_PROTOTYPE( int gen_io, (int task_nr, message *mess_ptr)		);
+_PROTOTYPE( int asyn_io, (int task_nr, message *mess_ptr)		);
+_PROTOTYPE( int no_dev, (int op, dev_t dev, int proc, int flags)	);
+_PROTOTYPE( int no_dev_io, (int, message *)				);
+_PROTOTYPE( int tty_opcl, (int op, dev_t dev, endpoint_t proc, int flags));
+_PROTOTYPE( int ctty_opcl, (int op, dev_t dev, endpoint_t proc, int flags));
+_PROTOTYPE( int clone_opcl, (int op, dev_t dev, int proc, int flags)	);
+_PROTOTYPE( int ctty_io, (int task_nr, message *mess_ptr)		);
+_PROTOTYPE( int do_ioctl, (void)					);
+_PROTOTYPE( void pm_setsid, (int proc_e)				);
+_PROTOTYPE( void dev_status, (message *)				);
+_PROTOTYPE( void dev_up, (int major)					);
+_PROTOTYPE( endpoint_t find_suspended_ep, (endpoint_t driver,
+					   cp_grant_id_t g)		);
+_PROTOTYPE( void reopen_reply, (void)					);
+_PROTOTYPE( void open_reply, (void)					);
+
+/* dmap.c */
+_PROTOTYPE( int do_mapdriver, (void)					);
+_PROTOTYPE( void init_dmap, (void)					);
+_PROTOTYPE( int dmap_driver_match, (endpoint_t proc, int major)		);
+_PROTOTYPE( void dmap_endpt_up, (int proc_nr)				);
+_PROTOTYPE( void dmap_unmap_by_endpt, (int proc_nr)			);
+_PROTOTYPE( struct dmap *get_dmap, (endpoint_t proc_e)			);
+_PROTOTYPE( int do_mapdriver, (void)					);
+_PROTOTYPE( int map_service, (struct rprocpub *rpub)			);
+_PROTOTYPE( void dmap_unmap_by_endpt, (int proc_nr)			);
+_PROTOTYPE( struct dmap *get_dmap, (endpoint_t proc_e)			);
+_PROTOTYPE( int map_driver, (const char *label, int major, endpoint_t proc_nr,
+	int dev_style, int flags)					);
+_PROTOTYPE( int map_service, (struct rprocpub *rpub)			);
+
+/* exec.c */
+_PROTOTYPE( int pm_exec, (int proc_e, char *path, vir_bytes path_len,
+			  char *frame, vir_bytes frame_len, vir_bytes *pc));
+#define check_bsf_lock() do {						\
+	assert(mutex_trylock(&bsf_lock) == 0);				\
+	unlock_bsf();							\
+			} while(0)
+
+/* filedes.c */
+_PROTOTYPE( void check_filp_locks, (void)				);
+_PROTOTYPE( void check_filp_locks_by_me, (void)				);
+_PROTOTYPE( void init_filps, (void)					);
+_PROTOTYPE( struct filp *find_filp, (struct vnode *vp, mode_t bits)	);
+_PROTOTYPE( int get_fd, (int start, mode_t bits, int *k,
+            struct filp **fpt)						);
+_PROTOTYPE( struct filp *get_filp, (int fild, tll_access_t locktype)				);
+_PROTOTYPE( struct filp *get_filp2, (struct fproc *rfp, int fild,
+					tll_access_t locktype)		);
+_PROTOTYPE( void lock_filp, (struct filp *filp, tll_access_t locktype)	);
+_PROTOTYPE( void unlock_filp, (struct filp *filp)			);
+_PROTOTYPE( void unlock_filps, (struct filp *filp1, struct filp *filp2)	);
+_PROTOTYPE( int invalidate, (struct filp *)				);
+_PROTOTYPE( int do_verify_fd, (void)					);
+_PROTOTYPE( int set_filp, (filp_id_t sfilp)				);
+_PROTOTYPE( int do_set_filp, (void)					);
+_PROTOTYPE( int copy_filp, (endpoint_t to_ep, filp_id_t cfilp)		);
+_PROTOTYPE( int do_copy_filp, (void)					);
+_PROTOTYPE( int put_filp, (filp_id_t pfilp)				);
+_PROTOTYPE( int do_put_filp, (void)					);
+_PROTOTYPE( int cancel_fd, (endpoint_t ep, int fd)			);
+_PROTOTYPE( int do_cancel_fd, (void)					);
+_PROTOTYPE( void close_filp, (struct filp *fp)				);
+
+/* fscall.c */
+_PROTOTYPE( void nested_fs_call, (message *m)				);
+
+/* link.c */
+_PROTOTYPE( int do_link, (void)						);
+_PROTOTYPE( int do_unlink, (void)					);
+_PROTOTYPE( int do_rename, (void)					);
+_PROTOTYPE( int do_truncate, (void)					);
+_PROTOTYPE( int do_ftruncate, (void)					);
+_PROTOTYPE( int truncate_vnode, (struct vnode *vp, off_t newsize)	);
+_PROTOTYPE( int rdlink_direct, (char *orig_path, char *link_path,
+						struct fproc *rfp)	);
+
+/* lock.c */
+_PROTOTYPE( int lock_op, (struct filp *f, int req)			);
+_PROTOTYPE( void lock_revive, (void)					);
+
+/* main.c */
+_PROTOTYPE( int main, (void)						);
+_PROTOTYPE( void reply, (int whom, int result)				);
+_PROTOTYPE( void lock_proc, (struct fproc *rfp, int force_lock)		);
+_PROTOTYPE( void unlock_proc, (struct fproc *rfp)			);
+_PROTOTYPE( void *do_dummy, (void *arg)					);
+
+/* misc.c */
+_PROTOTYPE( int do_dup, (void)						);
+_PROTOTYPE( void pm_exit, (int proc)					);
+_PROTOTYPE( int do_fcntl, (void)					);
+_PROTOTYPE( void pm_fork, (int pproc, int cproc, int cpid)		);
+_PROTOTYPE( void pm_setgid, (int proc_e, int egid, int rgid)		);
+_PROTOTYPE( void pm_setuid, (int proc_e, int euid, int ruid)		);
+_PROTOTYPE( void pm_setgroups, (int proc_e, int ngroups, gid_t *addr)	);
+_PROTOTYPE( int do_sync, (void)						);
+_PROTOTYPE( int do_fsync, (void)					);
+_PROTOTYPE( void pm_reboot, (void)					);
+_PROTOTYPE( int do_svrctl, (void)					);
+_PROTOTYPE( int do_getsysinfo, (void)					);
+_PROTOTYPE( int pm_dumpcore, (int proc_e, struct mem_map *seg_ptr)	);
+_PROTOTYPE( void ds_event, (void)					);
+
+/* mount.c */
+_PROTOTYPE( int do_fsready, (void)                                      );
+_PROTOTYPE( int do_mount, (void)					);
+_PROTOTYPE( int do_umount, (void)					);
+_PROTOTYPE( void mount_pfs, (void)					);
+_PROTOTYPE( int mount_fs, (dev_t dev, char fullpath[PATH_MAX+1],
+				   endpoint_t fs_e, int rdonly,
+				   char mount_label[LABEL_MAX])		);
+_PROTOTYPE( int unmount, (dev_t dev, char *label)			);
+_PROTOTYPE( void unmount_all, (void)					);
+
+/* open.c */
+_PROTOTYPE( int do_close, (void)					);
+_PROTOTYPE( int close_fd, (struct fproc *rfp, int fd_nr)		);
+_PROTOTYPE( void close_reply, (void)					);
+_PROTOTYPE( int do_creat, (void)					);
+_PROTOTYPE( int do_lseek, (void)					);
+_PROTOTYPE( int do_llseek, (void)					);
+_PROTOTYPE( int do_mknod, (void)					);
+_PROTOTYPE( int do_mkdir, (void)					);
+_PROTOTYPE( int do_open, (void)						);
+_PROTOTYPE( int do_slink, (void)                                        );
+_PROTOTYPE( int do_vm_open, (void)					);
+_PROTOTYPE( int do_vm_close, (void)					);
+
+/* path.c */
+_PROTOTYPE( struct vnode *advance, (struct vnode *dirp, struct lookup *resolve,
+				    struct fproc *rfp)			);
+_PROTOTYPE( struct vnode *eat_path, (struct lookup *resolve,
+				     struct fproc *rfp)			);
+_PROTOTYPE( struct vnode *last_dir, (struct lookup *resolve,
+				     struct fproc *rfp)			);
+_PROTOTYPE( void lookup_init, (struct lookup *resolve, char *path, int flags,
+			       struct vmnt **vmp, struct vnode **vp)	);
+_PROTOTYPE( int get_name, (struct vnode *dirp, struct vnode *entry,
+							char *_name)	);
+_PROTOTYPE( int canonical_path, (char *orig_path, char *canon_path,
+						struct fproc *rfp)	);
+_PROTOTYPE( int do_check_perms, (void)					);
+
+/* pipe.c */
+_PROTOTYPE( int do_pipe, (void)						);
+_PROTOTYPE( int map_vnode, (struct vnode *vp, endpoint_t fs_e)		);
+_PROTOTYPE( void unpause, (int proc_nr_e)				);
+_PROTOTYPE( int pipe_check, (struct vnode *vp, int rw_flag,
+	      int oflags, int bytes, u64_t position, int notouch)	);
+_PROTOTYPE( void release, (struct vnode *vp, int call_nr, int count)	);
+_PROTOTYPE( void revive, (int proc_nr, int bytes)			);
+_PROTOTYPE( void suspend, (int task)					);
+_PROTOTYPE( void pipe_suspend, (int rw_flag, int fd_nr, char *buf,
+							size_t size)	);
+_PROTOTYPE( void unsuspend_by_endpt, (endpoint_t)			);
+_PROTOTYPE( void wait_for, (endpoint_t)					);
+#if DO_SANITYCHECKS
+_PROTOTYPE( int check_pipe, (void)					);
+#endif
+
+/* protect.c */
+_PROTOTYPE( int do_access, (void)					);
+_PROTOTYPE( int do_chmod, (void)					);
+_PROTOTYPE( int do_chown, (void)					);
+_PROTOTYPE( int do_umask, (void)					);
+_PROTOTYPE( int forbidden, (struct vnode *vp, mode_t access_desired)	);
+_PROTOTYPE( int read_only, (struct vnode *vp)				);
+
+/* read.c */
+_PROTOTYPE( int do_read, (void)						);
+_PROTOTYPE( int do_getdents, (void)					);
+_PROTOTYPE( void lock_bsf, (void)					);
+_PROTOTYPE( void unlock_bsf, (void)					);
+_PROTOTYPE( int read_write, (int rw_flag)				);
+_PROTOTYPE( int rw_pipe, (int rw_flag, endpoint_t usr,
+		int fd_nr, struct filp *f, char *buf, size_t req_size)	);
+
+/* request.c */
+_PROTOTYPE( int req_breadwrite, (endpoint_t fs_e, endpoint_t user_e,
+			dev_t dev, u64_t pos, unsigned int num_of_bytes,
+			char *user_addr, int rw_flag,
+			u64_t *new_posp, unsigned int *cum_iop)		);
+_PROTOTYPE( int req_chmod, (int fs_e, ino_t inode_nr, mode_t rmode,
+						mode_t *new_modep)	);
+_PROTOTYPE( int req_chown, (endpoint_t fs_e, ino_t inode_nr,
+	uid_t newuid, gid_t newgid, mode_t *new_modep)	);
+_PROTOTYPE( int req_create, (int fs_e, ino_t inode_nr, int omode,
+		uid_t uid, gid_t gid, char *path, node_details_t *res)	);
+_PROTOTYPE( int req_flush, (endpoint_t fs_e, dev_t dev)			);
+_PROTOTYPE( int req_fstatfs, (int fs_e, int who_e, char *buf)		);
+_PROTOTYPE( int req_statvfs, (int fs_e, int who_e, char *buf)		);
+_PROTOTYPE( int req_ftrunc, (endpoint_t fs_e, ino_t inode_nr,
+						off_t start, off_t end)	);
+_PROTOTYPE( int req_getdents, (endpoint_t fs_e, ino_t inode_nr,
+			u64_t pos, char *buf, size_t size,
+			u64_t *new_pos, int direct)			);
+_PROTOTYPE( int req_inhibread, (endpoint_t fs_e, ino_t inode_nr)	);
+_PROTOTYPE( int req_link, (endpoint_t fs_e, ino_t link_parent,
+					char *lastc, ino_t linked_file)	);
+_PROTOTYPE( int req_lookup, (endpoint_t fs_e, ino_t dir_ino, ino_t root_ino,
+			     uid_t uid, gid_t gid, struct lookup *resolve,
+			     lookup_res_t *res, struct fproc *rfp)	);
+_PROTOTYPE( int req_mkdir, (endpoint_t fs_e, ino_t inode_nr,
+	char *lastc, uid_t uid, gid_t gid, mode_t dmode)		);
+_PROTOTYPE( int req_mknod, (endpoint_t fs_e, ino_t inode_nr,
+			char *lastc, uid_t uid, gid_t gid,
+			mode_t dmode, dev_t dev)			);
+_PROTOTYPE( int req_mountpoint, (endpoint_t fs_e, ino_t inode_nr)	);
+_PROTOTYPE( int req_newnode, (endpoint_t fs_e, uid_t uid,
+				gid_t gid, mode_t dmode,
+				dev_t dev, struct node_details *res)	);
+_PROTOTYPE( int req_putnode, (int fs_e, ino_t inode_nr, int count)	);
+_PROTOTYPE( int req_rdlink, (endpoint_t fs_e, ino_t inode_nr,
+				endpoint_t who_e, char *buf, size_t len,
+				int direct)				);
+_PROTOTYPE( int req_readsuper, (endpoint_t fs_e, char *driver_name,
+				dev_t dev, int readonly, int isroot,
+				struct node_details *res_nodep)		);
+_PROTOTYPE( int req_readwrite, (endpoint_t fs_e, ino_t inode_nr,
+				u64_t pos, int rw_flag,
+				endpoint_t user_e, char *user_addr,
+				unsigned int num_of_bytes, u64_t *new_posp,
+				unsigned int *cum_iop)			);
+_PROTOTYPE( int req_rename, (endpoint_t fs_e, ino_t old_dir,
+			char *old_name, ino_t new_dir, char *new_name)	);
+_PROTOTYPE( int req_rmdir, (endpoint_t fs_e, ino_t inode_nr,
+							char *lastc)	);
+_PROTOTYPE(int req_slink, (endpoint_t fs_e, ino_t inode_nr, char *lastc,
+		endpoint_t who_e, char *path_addr,
+		unsigned short path_length, uid_t uid, gid_t gid)	);
+_PROTOTYPE( int req_stat, (int fs_e, ino_t inode_nr, int who_e,
+				char *buf, int pos, int stat_version)	);
+_PROTOTYPE( int req_sync, (endpoint_t fs_e)                             );
+_PROTOTYPE( int req_unlink, (endpoint_t fs_e, ino_t inode_nr,
+							char *lastc)	);
+_PROTOTYPE( int req_unmount, (endpoint_t fs_e)                          );
+_PROTOTYPE( int req_utime, (endpoint_t fs_e, ino_t inode_nr,
+					time_t actime, time_t modtime)	);
+_PROTOTYPE( int req_newdriver, (endpoint_t fs_e, dev_t dev,
+            endpoint_t driver_e)                                        );
+
+/* stadir.c */
+_PROTOTYPE( int do_chdir, (void)					);
+_PROTOTYPE( int do_fchdir, (void)					);
+_PROTOTYPE( int do_chroot, (void)					);
+_PROTOTYPE( int do_fstat, (void)					);
+_PROTOTYPE( int do_stat, (void)						);
+_PROTOTYPE( int do_fstatfs, (void)					);
+_PROTOTYPE( int do_statvfs, (void)					);
+_PROTOTYPE( int do_fstatvfs, (void)					);
+_PROTOTYPE( int do_rdlink, (void)					);
+_PROTOTYPE( int do_lstat, (void)					);
+
+/* time.c */
+_PROTOTYPE( int do_utime, (void)					);
+
+/* tll.c */
+_PROTOTYPE( void tll_downgrade, (tll_t *tllp)				);
+_PROTOTYPE( int tll_haspendinglock, (tll_t *tllp)			);
+_PROTOTYPE( void tll_init, (tll_t *tllp)				);
+_PROTOTYPE( int tll_islocked, (tll_t *tllp)				);
+_PROTOTYPE( int tll_lock, (tll_t *tllp, tll_access_t locktype)		);
+_PROTOTYPE( int tll_locked_by_me, (tll_t *tllp)				);
+_PROTOTYPE( void tll_lockstat, (tll_t *tllp)				);
+_PROTOTYPE( int tll_unlock, (tll_t *tllp)				);
+_PROTOTYPE( void tll_upgrade, (tll_t *tllp)				);
+
+/* utility.c */
+_PROTOTYPE( time_t clock_time, (void)					);
+_PROTOTYPE( unsigned conv2, (int norm, int w)				);
+_PROTOTYPE( long conv4, (int norm, long x)				);
+_PROTOTYPE( int fetch_name, (char *path, int len, int flag, char *dest)		);
+_PROTOTYPE( int no_sys, (void)						);
+_PROTOTYPE( int isokendpt_f, (char *f, int l, endpoint_t e, int *p, int ft));
+_PROTOTYPE( int in_group, (struct fproc *rfp, gid_t grp)		);
+
+#define okendpt(e, p) isokendpt_f(__FILE__, __LINE__, (e), (p), 1)
+#define isokendpt(e, p) isokendpt_f(__FILE__, __LINE__, (e), (p), 0)
+
+/* vmnt.c */
+_PROTOTYPE( void check_vmnt_locks, (void)				);
+_PROTOTYPE( void check_vmnt_locks_by_me, (struct fproc *rfp)		);
+_PROTOTYPE( struct vmnt *get_free_vmnt, (void)				);
+_PROTOTYPE( struct vmnt *find_vmnt, (endpoint_t fs_e)			);
+_PROTOTYPE( struct vmnt *get_locked_vmnt, (struct fproc *rfp)		);
+_PROTOTYPE( void init_vmnts, (void)					);
+_PROTOTYPE( int lock_vmnt, (struct vmnt *vp, tll_access_t locktype)	);
+_PROTOTYPE( void unlock_vmnt, (struct vmnt *vp)				);
+
+/* vnode.c */
+_PROTOTYPE( void check_vnode_locks, (void)				);
+_PROTOTYPE( void check_vnode_locks_by_me, (struct fproc *rfp)		);
+_PROTOTYPE( struct vnode *get_free_vnode, (void)			);
+_PROTOTYPE( struct vnode *find_vnode, (int fs_e, int numb)              );
+_PROTOTYPE( void init_vnodes, (void)					);
+_PROTOTYPE( int is_vnode_locked, (struct vnode *vp)			);
+_PROTOTYPE( int lock_vnode, (struct vnode *vp, tll_access_t locktype)	);
+_PROTOTYPE( void unlock_vnode, (struct vnode *vp)			);
+_PROTOTYPE( void dup_vnode, (struct vnode *vp)                          );
+_PROTOTYPE( void put_vnode, (struct vnode *vp)				);
+_PROTOTYPE( void vnode_clean_refs, (struct vnode *vp)                   );
+#if DO_SANITYCHECKS
+_PROTOTYPE( int check_vrefs, (void)			);
+#endif
+
+/* write.c */
+_PROTOTYPE( int do_write, (void)					);
+
+/* gcov.c */
+_PROTOTYPE( int do_gcov_flush, (void)					);
+#if ! USE_COVERAGE
+#define do_gcov_flush no_sys
+#endif
+
+/* select.c */
+_PROTOTYPE( int do_select, (void)					);
+_PROTOTYPE( void init_select, (void)					);
+_PROTOTYPE( void select_callback, (struct filp *, int ops)		);
+_PROTOTYPE( void select_forget, (endpoint_t proc_e)				);
+_PROTOTYPE( void select_reply1, (endpoint_t driver_e, int minor, int status));
+_PROTOTYPE( void select_reply2, (endpoint_t driver_e, int minor, int status));
+_PROTOTYPE( void select_timeout_check, (timer_t *)			);
+_PROTOTYPE( void select_unsuspend_by_endpt, (endpoint_t proc)		);
+
+/* worker.c */
+_PROTOTYPE( int worker_available, (void)				);
+_PROTOTYPE( struct worker_thread *worker_get, (thread_t worker_tid)	);
+_PROTOTYPE( struct job *worker_getjob, (thread_t worker_tid)		);
+_PROTOTYPE( void worker_init, (struct worker_thread *worker)		);
+_PROTOTYPE( struct worker_thread *worker_self, (void)			);
+_PROTOTYPE( void worker_start, (void *(*func)(void *arg))		);
+_PROTOTYPE( void worker_signal, (struct worker_thread *worker)		);
+_PROTOTYPE( void worker_wait, (void)					);
+_PROTOTYPE( void sys_worker_start, (void *(*func)(void *arg))		);
+_PROTOTYPE( void dl_worker_start, (void *(*func)(void *arg))		);
+#endif
diff --git a/servers/avfs/read.c b/servers/avfs/read.c
new file mode 100644
index 000000000..ffde1f9db
--- /dev/null
+++ b/servers/avfs/read.c
@@ -0,0 +1,326 @@
+/* This file contains the heart of the mechanism used to read (and write)
+ * files.  Read and write requests are split up into chunks that do not cross
+ * block boundaries.  Each chunk is then processed in turn.  Reads on special
+ * files are also detected and handled.
+ *
+ * The entry points into this file are
+ *   do_read:	 perform the READ system call by calling read_write
+ *   do_getdents: read entries from a directory (GETDENTS)
+ *   read_write: actually do the work of READ and WRITE
+ *
+ */
+
+#include "fs.h"
+#include <fcntl.h>
+#include <unistd.h>
+#include <minix/com.h>
+#include <minix/u64.h>
+#include "file.h"
+#include "fproc.h"
+#include "param.h"
+#include <dirent.h>
+#include <assert.h>
+#include <minix/vfsif.h>
+#include "vnode.h"
+#include "vmnt.h"
+
+
+/*===========================================================================*
+ *				do_read					     *
+ *===========================================================================*/
+PUBLIC int do_read()
+{
+  return(read_write(READING));
+}
+
+
+/*===========================================================================*
+ *				lock_bsf				     *
+ *===========================================================================*/
+PUBLIC void lock_bsf(void)
+{
+  message org_m_in;
+  struct fproc *org_fp;
+  struct worker_thread *org_self;
+
+  if (mutex_trylock(&bsf_lock) == 0)
+	return;
+
+  org_m_in = m_in;
+  org_fp = fp;
+  org_self = self;
+  assert(mutex_lock(&bsf_lock) == 0);
+  m_in = org_m_in;
+  fp = org_fp;
+  self = org_self;
+}
+
+/*===========================================================================*
+ *				unlock_bsf				     *
+ *===========================================================================*/
+PUBLIC void unlock_bsf(void)
+{
+  assert(mutex_unlock(&bsf_lock) == 0);
+}
+
+/*===========================================================================*
+ *				read_write				     *
+ *===========================================================================*/
+PUBLIC int read_write(rw_flag)
+int rw_flag;			/* READING or WRITING */
+{
+/* Perform read(fd, buffer, nbytes) or write(fd, buffer, nbytes) call. */
+  register struct filp *f;
+  register struct vnode *vp;
+  u64_t position, res_pos, new_pos;
+  unsigned int cum_io, cum_io_incr, res_cum_io;
+  int op, oflags, r, block_spec, char_spec, regular;
+  tll_access_t locktype;
+  mode_t mode_word;
+
+  /* If the file descriptor is valid, get the vnode, size and mode. */
+  if (m_in.nbytes < 0) return(EINVAL);
+  locktype = (rw_flag == READING) ? VNODE_READ : VNODE_WRITE;
+  if ((f = get_filp(m_in.fd, locktype)) == NULL) return(err_code);
+  if (((f->filp_mode) & (rw_flag == READING ? R_BIT : W_BIT)) == 0) {
+	unlock_filp(f);
+	return(f->filp_mode == FILP_CLOSED ? EIO : EBADF);
+  }
+  if (m_in.nbytes == 0) {
+	unlock_filp(f);
+	return(0);	/* so char special files need not check for 0*/
+  }
+
+  position = f->filp_pos;
+  oflags = f->filp_flags;
+  vp = f->filp_vno;
+  r = OK;
+  cum_io = 0;
+
+  if (vp->v_pipe == I_PIPE) {
+	if (fp->fp_cum_io_partial != 0) {
+		panic("VFS: read_write: fp_cum_io_partial not clear");
+	}
+	r = rw_pipe(rw_flag, who_e, m_in.fd, f, m_in.buffer, m_in.nbytes);
+	unlock_filp(f);
+	return(r);
+  }
+
+  op = (rw_flag == READING ? VFS_DEV_READ : VFS_DEV_WRITE);
+  mode_word = vp->v_mode & I_TYPE;
+  regular = mode_word == I_REGULAR;
+
+  if ((char_spec = (mode_word == I_CHAR_SPECIAL ? 1 : 0))) {
+	if (vp->v_sdev == NO_DEV)
+		panic("VFS: read_write tries to access char dev NO_DEV");
+  }
+
+  if ((block_spec = (mode_word == I_BLOCK_SPECIAL ? 1 : 0))) {
+	if (vp->v_sdev == NO_DEV)
+		panic("VFS: read_write tries to access block dev NO_DEV");
+  }
+
+  if (char_spec) {			/* Character special files. */
+	dev_t dev;
+	int suspend_reopen;
+
+	suspend_reopen = (f->filp_state != FS_NORMAL);
+	dev = (dev_t) vp->v_sdev;
+
+	r = dev_io(op, dev, who_e, m_in.buffer, position, m_in.nbytes, oflags,
+		   suspend_reopen);
+	if (r >= 0) {
+		cum_io = r;
+		position = add64ul(position, r);
+		r = OK;
+	}
+  } else if (block_spec) {		/* Block special files. */
+	lock_bsf();
+
+	r = req_breadwrite(vp->v_bfs_e, who_e, vp->v_sdev, position,
+		m_in.nbytes, m_in.buffer, rw_flag, &res_pos, &res_cum_io);
+	if (r == OK) {
+		position = res_pos;
+		cum_io += res_cum_io;
+	}
+
+	unlock_bsf();
+  } else {				/* Regular files */
+	if (rw_flag == WRITING && block_spec == 0) {
+		/* Check for O_APPEND flag. */
+		if (oflags & O_APPEND) position = cvul64(vp->v_size);
+	}
+
+	/* Issue request */
+	r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, position, rw_flag, who_e,
+			  m_in.buffer, m_in.nbytes, &new_pos, &cum_io_incr);
+
+	if (r >= 0) {
+		if (ex64hi(new_pos))
+			panic("read_write: bad new pos");
+
+		position = new_pos;
+		cum_io += cum_io_incr;
+	}
+  }
+
+  /* On write, update file size and access time. */
+  if (rw_flag == WRITING) {
+	if (regular || mode_word == I_DIRECTORY) {
+		if (cmp64ul(position, vp->v_size) > 0) {
+			if (ex64hi(position) != 0) {
+				panic("read_write: file size too big ");
+			}
+			vp->v_size = ex64lo(position);
+		}
+	}
+  }
+
+  f->filp_pos = position;
+  unlock_filp(f);
+
+  if (r == OK) return(cum_io);
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				do_getdents				     *
+ *===========================================================================*/
+PUBLIC int do_getdents()
+{
+/* Perform the getdents(fd, buf, size) system call. */
+  int r = OK;
+  u64_t new_pos;
+  register struct filp *rfilp;
+
+  /* Is the file descriptor valid? */
+  if ( (rfilp = get_filp(m_in.fd, VNODE_READ)) == NULL) return(err_code);
+
+  if (!(rfilp->filp_mode & R_BIT))
+	r = EBADF;
+  else if ((rfilp->filp_vno->v_mode & I_TYPE) != I_DIRECTORY)
+	r = EBADF;
+
+  if (r == OK) {
+	if (ex64hi(rfilp->filp_pos) != 0)
+		panic("do_getdents: can't handle large offsets");
+
+	r = req_getdents(rfilp->filp_vno->v_fs_e, rfilp->filp_vno->v_inode_nr,
+			 rfilp->filp_pos, m_in.buffer, m_in.nbytes,&new_pos,0);
+
+	if (r > 0) rfilp->filp_pos = new_pos;
+  }
+
+  unlock_filp(rfilp);
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				rw_pipe					     *
+ *===========================================================================*/
+PUBLIC int rw_pipe(rw_flag, usr_e, fd_nr, f, buf, req_size)
+int rw_flag;			/* READING or WRITING */
+endpoint_t usr_e;
+int fd_nr;
+struct filp *f;
+char *buf;
+size_t req_size;
+{
+  int r, oflags, partial_pipe = 0;
+  size_t size, cum_io, cum_io_incr;
+  struct vnode *vp;
+  u64_t position, new_pos;
+
+  /* Must make sure we're operating on locked filp and vnode */
+  assert(tll_islocked(&f->filp_vno->v_lock));
+  assert(mutex_trylock(&f->filp_lock) == -EDEADLK);
+
+  oflags = f->filp_flags;
+  vp = f->filp_vno;
+  position = cvu64((rw_flag == READING) ? vp->v_pipe_rd_pos :
+							vp->v_pipe_wr_pos);
+  /* fp->fp_cum_io_partial is only nonzero when doing partial writes */
+  cum_io = fp->fp_cum_io_partial;
+
+  r = pipe_check(vp, rw_flag, oflags, req_size, position, 0);
+  if (r <= 0) {
+	if (r == SUSPEND) pipe_suspend(rw_flag, fd_nr, buf, req_size);
+	return(r);
+  }
+
+  size = r;
+  if (size < req_size) partial_pipe = 1;
+
+  /* Truncate read request at size. */
+  if((rw_flag == READING) &&
+	cmp64ul(add64ul(position, size), vp->v_size) > 0) {
+	/* Position always should fit in an off_t (LONG_MAX). */
+	off_t pos32;
+
+	assert(cmp64ul(position, LONG_MAX) <= 0);
+	pos32 = cv64ul(position);
+	assert(pos32 >= 0);
+	assert(pos32 <= LONG_MAX);
+	size = vp->v_size - pos32;
+  }
+
+  if (vp->v_mapfs_e == 0)
+	panic("unmapped pipe");
+
+  r = req_readwrite(vp->v_mapfs_e, vp->v_mapinode_nr, position, rw_flag, usr_e,
+		    buf, size, &new_pos, &cum_io_incr);
+
+  if (r >= 0) {
+	if (ex64hi(new_pos))
+		panic("rw_pipe: bad new pos");
+
+	position = new_pos;
+	cum_io += cum_io_incr;
+	buf += cum_io_incr;
+	req_size -= cum_io_incr;
+  }
+
+  /* On write, update file size and access time. */
+  if (rw_flag == WRITING) {
+	if (cmp64ul(position, vp->v_size) > 0) {
+		if (ex64hi(position) != 0) {
+			panic("read_write: file size too big for v_size");
+		}
+		vp->v_size = ex64lo(position);
+	}
+  } else {
+	if (cmp64ul(position, vp->v_size) >= 0) {
+		/* Reset pipe pointers */
+		vp->v_size = 0;
+		vp->v_pipe_rd_pos= 0;
+		vp->v_pipe_wr_pos= 0;
+		position = cvu64(0);
+	}
+  }
+
+  if (rw_flag == READING)
+	vp->v_pipe_rd_pos= cv64ul(position);
+  else
+	vp->v_pipe_wr_pos= cv64ul(position);
+
+  if (r == OK) {
+	if (partial_pipe) {
+		/* partial write on pipe with */
+		/* O_NONBLOCK, return write count */
+		if (!(oflags & O_NONBLOCK)) {
+			/* partial write on pipe with req_size > PIPE_SIZE,
+			 * non-atomic
+			 */
+			fp->fp_cum_io_partial = cum_io;
+			pipe_suspend(rw_flag, fd_nr, buf, req_size);
+			return(SUSPEND);
+		}
+	}
+	fp->fp_cum_io_partial = 0;
+	return(cum_io);
+  }
+
+  return(r);
+}
diff --git a/servers/avfs/request.c b/servers/avfs/request.c
new file mode 100644
index 000000000..6f31777a0
--- /dev/null
+++ b/servers/avfs/request.c
@@ -0,0 +1,1093 @@
+/* This file contains the wrapper functions for issueing a request
+ * and receiving response from FS processes.
+ * Each function builds a request message according to the request
+ * parameter, calls the most low-level fs_sendrec and copies
+ * back the response.
+ * The low-level fs_sendrec handles the recovery mechanism from
+ * a dead driver and reissues the request.
+ */
+
+#include "fs.h"
+#include <string.h>
+#include <assert.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/statvfs.h>
+#include <minix/vfsif.h>
+#include <minix/com.h>
+#include <minix/const.h>
+#include <minix/endpoint.h>
+#include <minix/u64.h>
+#include <unistd.h>
+#include <minix/vfsif.h>
+#include "fproc.h"
+#include "vmnt.h"
+#include "vnode.h"
+#include "path.h"
+#include "param.h"
+
+
+/*===========================================================================*
+ *			req_breadwrite					     *
+ *===========================================================================*/
+PUBLIC int req_breadwrite(
+  endpoint_t fs_e,
+  endpoint_t user_e,
+  dev_t dev,
+  u64_t pos,
+  unsigned int num_of_bytes,
+  char *user_addr,
+  int rw_flag,
+  u64_t *new_posp,
+  unsigned int *cum_iop
+)
+{
+  int r;
+  cp_grant_id_t grant_id;
+  message m;
+
+  grant_id = cpf_grant_magic(fs_e, user_e, (vir_bytes) user_addr, num_of_bytes,
+			(rw_flag == READING ? CPF_WRITE : CPF_READ));
+  if(grant_id == -1)
+	  panic("req_breadwrite: cpf_grant_magic failed");
+
+  /* Fill in request message */
+  m.m_type = rw_flag == READING ? REQ_BREAD : REQ_BWRITE;
+  m.REQ_DEV2 = dev;
+  m.REQ_GRANT = grant_id;
+  m.REQ_SEEK_POS_LO = ex64lo(pos);
+  m.REQ_SEEK_POS_HI = ex64hi(pos);
+  m.REQ_NBYTES = num_of_bytes;
+
+  /* Send/rec request */
+  r = fs_sendrec(fs_e, &m);
+  cpf_revoke(grant_id);
+  if (r != OK) return(r);
+
+  /* Fill in response structure */
+  *new_posp = make64(m.RES_SEEK_POS_LO, m.RES_SEEK_POS_HI);
+  *cum_iop = m.RES_NBYTES;
+
+  return(OK);
+}
+
+
+/*===========================================================================*
+ *				req_chmod	      			     *
+ *===========================================================================*/
+PUBLIC int req_chmod(
+  int fs_e,
+  ino_t inode_nr,
+  mode_t rmode,
+  mode_t *new_modep
+)
+{
+  message m;
+  int r;
+
+  /* Fill in request message */
+  m.m_type = REQ_CHMOD;
+  m.REQ_INODE_NR = inode_nr;
+  m.REQ_MODE = rmode;
+
+  /* Send/rec request */
+  r = fs_sendrec(fs_e, &m);
+
+  /* Copy back actual mode. */
+  *new_modep = m.RES_MODE;
+
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				req_chown          			     *
+ *===========================================================================*/
+PUBLIC int req_chown(
+  endpoint_t fs_e,
+  ino_t inode_nr,
+  uid_t newuid,
+  gid_t newgid,
+  mode_t *new_modep
+)
+{
+  message m;
+  int r;
+
+  /* Fill in request message */
+  m.m_type = REQ_CHOWN;
+  m.REQ_INODE_NR = inode_nr;
+  m.REQ_UID = newuid;
+  m.REQ_GID = newgid;
+
+  /* Send/rec request */
+  r = fs_sendrec(fs_e, &m);
+
+  /* Return new mode to caller. */
+  *new_modep = m.RES_MODE;
+
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				req_create				     *
+ *===========================================================================*/
+PUBLIC int req_create(
+  int fs_e,
+  ino_t inode_nr,
+  int omode,
+  uid_t uid,
+  gid_t gid,
+  char *path,
+  node_details_t *res
+)
+{
+  int r;
+  cp_grant_id_t grant_id;
+  size_t len;
+  message m;
+
+  if (path[0] == '/')
+	panic("req_create: filename starts with '/'");
+
+  len = strlen(path) + 1;
+  grant_id = cpf_grant_direct(fs_e, (vir_bytes) path, len, CPF_READ);
+  if (grant_id == -1)
+	panic("req_create: cpf_grant_direct failed");
+
+  /* Fill in request message */
+  m.m_type	= REQ_CREATE;
+  m.REQ_INODE_NR = inode_nr;
+  m.REQ_MODE	= omode;
+  m.REQ_UID	= uid;
+  m.REQ_GID	= gid;
+  m.REQ_GRANT	= grant_id;
+  m.REQ_PATH_LEN = len;
+
+  /* Send/rec request */
+  r = fs_sendrec(fs_e, &m);
+  cpf_revoke(grant_id);
+  if (r != OK) return(r);
+
+  /* Fill in response structure */
+  res->fs_e	= m.m_source;
+  res->inode_nr	= m.RES_INODE_NR;
+  res->fmode	= m.RES_MODE;
+  res->fsize	= m.RES_FILE_SIZE_LO;
+  res->uid	= m.RES_UID;
+  res->gid	= m.RES_GID;
+  res->dev	= m.RES_DEV;
+
+  return(OK);
+}
+
+
+/*===========================================================================*
+ *				req_flush	      			     *
+ *===========================================================================*/
+PUBLIC int req_flush(endpoint_t fs_e, dev_t dev)
+{
+  message m;
+
+  /* Fill in request message */
+  m.m_type = REQ_FLUSH;
+  m.REQ_DEV = dev;
+
+  /* Send/rec request */
+  return fs_sendrec(fs_e, &m);
+}
+
+
+/*===========================================================================*
+ *				req_fstatfs	    			     *
+ *===========================================================================*/
+PUBLIC int req_fstatfs(int fs_e, int proc_e, char *buf)
+{
+  int r;
+  cp_grant_id_t grant_id;
+  message m;
+
+  grant_id = cpf_grant_magic(fs_e, proc_e, (vir_bytes) buf, sizeof(struct statfs),
+			CPF_WRITE);
+  if(grant_id == -1)
+	  panic("req_fstatfs: cpf_grant_magic failed");
+
+  /* Fill in request message */
+  m.m_type = REQ_FSTATFS;
+  m.REQ_GRANT = grant_id;
+
+  /* Send/rec request */
+  r = fs_sendrec(fs_e, &m);
+  cpf_revoke(grant_id);
+
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				req_statvfs	    			     *
+ *===========================================================================*/
+PUBLIC int req_statvfs(int fs_e, int proc_e, char *buf)
+{
+  int r;
+  cp_grant_id_t grant_id;
+  message m;
+
+  grant_id = cpf_grant_magic(fs_e, proc_e, (vir_bytes) buf, sizeof(struct statvfs),
+			CPF_WRITE);
+  if(grant_id == -1)
+	  panic("req_statvfs: cpf_grant_magic failed");
+
+  /* Fill in request message */
+  m.m_type = REQ_STATVFS;
+  m.REQ_GRANT = grant_id;
+
+  /* Send/rec request */
+  r = fs_sendrec(fs_e, &m);
+  cpf_revoke(grant_id);
+
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				req_ftrunc	     			     *
+ *===========================================================================*/
+PUBLIC int req_ftrunc(endpoint_t fs_e, ino_t inode_nr, off_t start, off_t end)
+{
+  message m;
+
+  /* Fill in request message */
+  m.m_type = REQ_FTRUNC;
+  m.REQ_INODE_NR = inode_nr;
+  m.REQ_TRC_START_LO = start;
+  m.REQ_TRC_START_HI = 0;	/* Not used for now, so clear it. */
+  m.REQ_TRC_END_LO = end;
+  m.REQ_TRC_END_HI = 0;		/* Not used for now, so clear it. */
+
+  /* Send/rec request */
+  return fs_sendrec(fs_e, &m);
+}
+
+
+/*===========================================================================*
+ *				req_getdents	     			     *
+ *===========================================================================*/
+PUBLIC int req_getdents(
+  endpoint_t fs_e,
+  ino_t inode_nr,
+  u64_t pos,
+  char *buf,
+  size_t size,
+  u64_t *new_pos,
+  int direct
+)
+{
+  int r;
+  message m;
+  cp_grant_id_t grant_id;
+
+  if (direct) {
+	grant_id = cpf_grant_direct(fs_e, (vir_bytes) buf, size,
+								CPF_WRITE);
+  } else {
+	grant_id = cpf_grant_magic(fs_e, who_e, (vir_bytes) buf, size,
+								CPF_WRITE);
+  }
+
+  if (grant_id < 0)
+	panic("req_getdents: cpf_grant_direct/cpf_grant_magic failed: %d",
+								grant_id);
+
+  m.m_type = REQ_GETDENTS;
+  m.REQ_INODE_NR = inode_nr;
+  m.REQ_GRANT = grant_id;
+  m.REQ_MEM_SIZE = size;
+  m.REQ_SEEK_POS_LO = ex64lo(pos);
+  m.REQ_SEEK_POS_HI = 0;	/* Not used for now, so clear it. */
+
+  r = fs_sendrec(fs_e, &m);
+  cpf_revoke(grant_id);
+
+  if (r == OK) {
+	  *new_pos = cvul64(m.RES_SEEK_POS_LO);
+	  r = m.RES_NBYTES;
+  }
+
+  return(r);
+}
+
+/*===========================================================================*
+ *				req_inhibread	  			     *
+ *===========================================================================*/
+PUBLIC int req_inhibread(endpoint_t fs_e, ino_t inode_nr)
+{
+  message m;
+
+  /* Fill in request message */
+  m.m_type = REQ_INHIBREAD;
+  m.REQ_INODE_NR = inode_nr;
+
+  /* Send/rec request */
+  return fs_sendrec(fs_e, &m);
+}
+
+
+/*===========================================================================*
+ *				req_link	       			     *
+ *===========================================================================*/
+PUBLIC int req_link(
+  endpoint_t fs_e,
+  ino_t link_parent,
+  char *lastc,
+  ino_t linked_file
+)
+{
+  int r;
+  cp_grant_id_t grant_id;
+  const size_t len = strlen(lastc) + 1;
+  message m;
+
+  grant_id = cpf_grant_direct(fs_e, (vir_bytes)lastc, len, CPF_READ);
+  if(grant_id == -1)
+	  panic("req_link: cpf_grant_direct failed");
+
+  /* Fill in request message */
+  m.m_type = REQ_LINK;
+  m.REQ_INODE_NR = linked_file;
+  m.REQ_DIR_INO = link_parent;
+  m.REQ_GRANT = grant_id;
+  m.REQ_PATH_LEN = len;
+
+  /* Send/rec request */
+  r = fs_sendrec(fs_e, &m);
+  cpf_revoke(grant_id);
+
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				req_lookup	                   	     *
+ *===========================================================================*/
+PUBLIC int req_lookup(
+  endpoint_t fs_e,
+  ino_t dir_ino,
+  ino_t root_ino,
+  uid_t uid,
+  gid_t gid,
+  struct lookup *resolve,
+  lookup_res_t *res,
+  struct fproc *rfp
+)
+{
+  int r;
+  size_t len;
+  cp_grant_id_t grant_id=0, grant_id2=0;
+  message m;
+  vfs_ucred_t credentials;
+  int flags;
+
+  grant_id = cpf_grant_direct(fs_e, (vir_bytes) resolve->l_path, PATH_MAX+1,
+			      CPF_READ | CPF_WRITE);
+  if(grant_id == -1)
+	  panic("req_lookup: cpf_grant_direct failed");
+
+  flags = resolve->l_flags;
+  len = strlen(resolve->l_path) + 1;
+
+  m.m_type		= REQ_LOOKUP;
+  m.REQ_GRANT		= grant_id;
+  m.REQ_PATH_LEN 	= len;
+  m.REQ_PATH_SIZE 	= PATH_MAX + 1;
+  m.REQ_DIR_INO 	= dir_ino;
+  m.REQ_ROOT_INO 	= root_ino;
+
+  if(rfp->fp_ngroups > 0) { /* Is the process member of multiple groups? */
+	/* In that case the FS has to copy the uid/gid credentials */
+	int i;
+
+	/* Set credentials */
+	credentials.vu_uid = rfp->fp_effuid;
+	credentials.vu_gid = rfp->fp_effgid;
+	credentials.vu_ngroups = rfp->fp_ngroups;
+	for (i = 0; i < rfp->fp_ngroups; i++)
+		credentials.vu_sgroups[i] = rfp->fp_sgroups[i];
+
+	grant_id2 = cpf_grant_direct(fs_e, (vir_bytes) &credentials,
+				     sizeof(credentials), CPF_READ);
+	if(grant_id2 == -1)
+		panic("req_lookup: cpf_grant_direct failed");
+
+	m.REQ_GRANT2	= grant_id2;
+	m.REQ_UCRED_SIZE= sizeof(credentials);
+	flags		|= PATH_GET_UCRED;
+  } else {
+	/* When there's only one gid, we can send it directly */
+	m.REQ_UID	= uid;
+	m.REQ_GID	= gid;
+	flags		&= ~PATH_GET_UCRED;
+  }
+
+  m.REQ_FLAGS		= flags;
+
+  /* Send/rec request */
+  r = fs_sendrec(fs_e, &m);
+  cpf_revoke(grant_id);
+  if(rfp->fp_ngroups > 0) cpf_revoke(grant_id2);
+
+  /* Fill in response according to the return value */
+  res->fs_e = m.m_source;
+
+  switch (r) {
+  case OK:
+	  res->inode_nr = m.RES_INODE_NR;
+	  res->fmode = m.RES_MODE;
+	  res->fsize = m.RES_FILE_SIZE_LO;
+	  res->dev = m.RES_DEV;
+	  res->uid= m.RES_UID;
+	  res->gid= m.RES_GID;
+	  break;
+  case EENTERMOUNT:
+	  res->inode_nr = m.RES_INODE_NR;
+	  res->char_processed = m.RES_OFFSET;
+	  res->symloop = m.RES_SYMLOOP;
+	  break;
+  case ELEAVEMOUNT:
+	  res->char_processed = m.RES_OFFSET;
+	  res->symloop = m.RES_SYMLOOP;
+	  break;
+  case ESYMLINK:
+	  res->char_processed = m.RES_OFFSET;
+	  res->symloop = m.RES_SYMLOOP;
+	  break;
+  default:
+	  break;
+  }
+
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				req_mkdir	      			     *
+ *===========================================================================*/
+PUBLIC int req_mkdir(
+  endpoint_t fs_e,
+  ino_t inode_nr,
+  char *lastc,
+  uid_t uid,
+  gid_t gid,
+  mode_t dmode
+)
+{
+  int r;
+  cp_grant_id_t grant_id;
+  size_t len;
+  message m;
+
+  len = strlen(lastc) + 1;
+  grant_id = cpf_grant_direct(fs_e, (vir_bytes)lastc, len, CPF_READ);
+  if(grant_id == -1)
+	  panic("req_mkdir: cpf_grant_direct failed");
+
+  /* Fill in request message */
+  m.m_type = REQ_MKDIR;
+  m.REQ_INODE_NR = inode_nr;
+  m.REQ_MODE = dmode;
+  m.REQ_UID = uid;
+  m.REQ_GID = gid;
+  m.REQ_GRANT = grant_id;
+  m.REQ_PATH_LEN = len;
+
+  /* Send/rec request */
+  r = fs_sendrec(fs_e, &m);
+  cpf_revoke(grant_id);
+
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				req_mknod	      			     *
+ *===========================================================================*/
+PUBLIC int req_mknod(
+  endpoint_t fs_e,
+  ino_t inode_nr,
+  char *lastc,
+  uid_t uid,
+  gid_t gid,
+  mode_t dmode,
+  dev_t dev
+)
+{
+  int r;
+  size_t len;
+  cp_grant_id_t grant_id;
+  message m;
+
+  len = strlen(lastc) + 1;
+  grant_id = cpf_grant_direct(fs_e, (vir_bytes)lastc, len, CPF_READ);
+  if(grant_id == -1)
+	  panic("req_mknod: cpf_grant_direct failed");
+
+  /* Fill in request message */
+  m.m_type = REQ_MKNOD;
+  m.REQ_INODE_NR = inode_nr;
+  m.REQ_MODE = dmode;
+  m.REQ_DEV = dev;
+  m.REQ_UID = uid;
+  m.REQ_GID = gid;
+  m.REQ_GRANT = grant_id;
+  m.REQ_PATH_LEN = len;
+
+  /* Send/rec request */
+  r = fs_sendrec(fs_e, &m);
+  cpf_revoke(grant_id);
+
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				req_mountpoint	                 	     *
+ *===========================================================================*/
+PUBLIC int req_mountpoint(endpoint_t fs_e, ino_t inode_nr)
+{
+  message m;
+
+  /* Fill in request message */
+  m.m_type = REQ_MOUNTPOINT;
+  m.REQ_INODE_NR = inode_nr;
+
+  /* Send/rec request */
+  return fs_sendrec(fs_e, &m);
+}
+
+
+/*===========================================================================*
+ *				req_newnode	      			     *
+ *===========================================================================*/
+PUBLIC int req_newnode(
+  endpoint_t fs_e,
+  uid_t uid,
+  gid_t gid,
+  mode_t dmode,
+  dev_t dev,
+  struct node_details *res
+)
+{
+  int r;
+  message m;
+
+  /* Fill in request message */
+  m.m_type = REQ_NEWNODE;
+  m.REQ_MODE = dmode;
+  m.REQ_DEV = dev;
+  m.REQ_UID = uid;
+  m.REQ_GID = gid;
+
+  /* Send/rec request */
+  r = fs_sendrec(fs_e, &m);
+
+  res->fs_e	= m.m_source;
+  res->inode_nr = m.RES_INODE_NR;
+  res->fmode	= m.RES_MODE;
+  res->fsize	= m.RES_FILE_SIZE_LO;
+  res->dev	= m.RES_DEV;
+  res->uid	= m.RES_UID;
+  res->gid	= m.RES_GID;
+
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				req_newdriver          			     *
+ *===========================================================================*/
+PUBLIC int req_newdriver(
+  endpoint_t fs_e,
+  dev_t dev,
+  endpoint_t driver_e
+)
+{
+/* Note: this is the only request function that doesn't use the
+ * fs_sendrec internal routine, since we want to avoid the dead
+ * driver recovery mechanism here. This function is actually called
+ * during the recovery.
+ */
+  message m;
+  int r;
+
+  /* Fill in request message */
+  m.m_type = REQ_NEW_DRIVER;
+  m.REQ_DEV = dev;
+  m.REQ_DRIVER_E = driver_e;
+
+  /* Issue request */
+  if((r = sendrec(fs_e, &m)) != OK) {
+	  printf("%s:%d VFS req_newdriver: error sending message %d to %d\n",
+		 __FILE__, __LINE__, r, fs_e);
+	  util_stacktrace();
+	  return(r);
+  }
+
+  return(OK);
+}
+
+
+
+/*===========================================================================*
+ *				req_putnode				     *
+ *===========================================================================*/
+PUBLIC int req_putnode(fs_e, inode_nr, count)
+int fs_e;
+ino_t inode_nr;
+int count;
+{
+  message m;
+
+  /* Fill in request message */
+  m.m_type = REQ_PUTNODE;
+  m.REQ_INODE_NR = inode_nr;
+  m.REQ_COUNT = count;
+
+  /* Send/rec request */
+  return fs_sendrec(fs_e, &m);
+}
+
+
+/*===========================================================================*
+ *				req_rdlink	     			     *
+ *===========================================================================*/
+PUBLIC int req_rdlink(fs_e, inode_nr, proc_e, buf, len, direct)
+endpoint_t fs_e;
+ino_t inode_nr;
+endpoint_t proc_e;
+char *buf;
+size_t len;
+int direct; /* set to 1 to use direct grants instead of magic grants */
+{
+  message m;
+  int r;
+  cp_grant_id_t grant_id;
+
+  if (direct) {
+	grant_id = cpf_grant_direct(fs_e, (vir_bytes) buf, len, CPF_WRITE);
+  } else {
+	grant_id = cpf_grant_magic(fs_e, proc_e, (vir_bytes) buf, len,
+								CPF_WRITE);
+  }
+  if(grant_id == -1)
+	  panic("req_rdlink: cpf_grant_magic failed");
+
+  /* Fill in request message */
+  m.m_type = REQ_RDLINK;
+  m.REQ_INODE_NR = inode_nr;
+  m.REQ_GRANT = grant_id;
+  m.REQ_MEM_SIZE = len;
+
+  /* Send/rec request */
+  r = fs_sendrec(fs_e, &m);
+  cpf_revoke(grant_id);
+
+  if(r == OK) r = m.RES_NBYTES;
+
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				req_readsuper	                  	     *
+ *===========================================================================*/
+PUBLIC int req_readsuper(
+  endpoint_t fs_e,
+  char *label,
+  dev_t dev,
+  int readonly,
+  int isroot,
+  struct node_details *res_nodep
+)
+{
+  int r;
+  cp_grant_id_t grant_id;
+  size_t len;
+  message m;
+
+  len = strlen(label)+1;
+  grant_id = cpf_grant_direct(fs_e, (vir_bytes) label, len, CPF_READ);
+  if (grant_id == -1)
+	  panic("req_readsuper: cpf_grant_direct failed");
+
+  /* Fill in request message */
+  m.m_type = REQ_READSUPER;
+  m.REQ_FLAGS = 0;
+  if(readonly) m.REQ_FLAGS |= REQ_RDONLY;
+  if(isroot)   m.REQ_FLAGS |= REQ_ISROOT;
+  m.REQ_GRANT = grant_id;
+  m.REQ_DEV = dev;
+  m.REQ_PATH_LEN = len;
+
+  /* Send/rec request */
+  r = fs_sendrec(fs_e, &m);
+  cpf_revoke(grant_id);
+
+  if(r == OK) {
+	/* Fill in response structure */
+	res_nodep->fs_e = m.m_source;
+	res_nodep->inode_nr = m.RES_INODE_NR;
+	res_nodep->fmode = m.RES_MODE;
+	res_nodep->fsize = m.RES_FILE_SIZE_LO;
+	res_nodep->uid = m.RES_UID;
+	res_nodep->gid = m.RES_GID;
+  }
+
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				req_readwrite				     *
+ *===========================================================================*/
+PUBLIC int req_readwrite(fs_e, inode_nr, pos, rw_flag, user_e,
+	user_addr, num_of_bytes, new_posp, cum_iop)
+endpoint_t fs_e;
+ino_t inode_nr;
+u64_t pos;
+int rw_flag;
+endpoint_t user_e;
+char *user_addr;
+unsigned int num_of_bytes;
+u64_t *new_posp;
+unsigned int *cum_iop;
+{
+  int r;
+  cp_grant_id_t grant_id;
+  message m;
+
+  if (ex64hi(pos) != 0)
+	  panic("req_readwrite: pos too large");
+
+  grant_id = cpf_grant_magic(fs_e, user_e, (vir_bytes) user_addr, num_of_bytes,
+			     (rw_flag==READING ? CPF_WRITE:CPF_READ));
+  if (grant_id == -1)
+	  panic("req_readwrite: cpf_grant_magic failed");
+
+  /* Fill in request message */
+  m.m_type = rw_flag == READING ? REQ_READ : REQ_WRITE;
+  m.REQ_INODE_NR = inode_nr;
+  m.REQ_GRANT = grant_id;
+  m.REQ_SEEK_POS_LO = ex64lo(pos);
+  m.REQ_SEEK_POS_HI = 0;	/* Not used for now, so clear it. */
+  m.REQ_NBYTES = num_of_bytes;
+
+  /* Send/rec request */
+  r = fs_sendrec(fs_e, &m);
+  cpf_revoke(grant_id);
+
+  if (r == OK) {
+	/* Fill in response structure */
+	*new_posp = cvul64(m.RES_SEEK_POS_LO);
+	*cum_iop = m.RES_NBYTES;
+  }
+
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				req_rename	     			     *
+ *===========================================================================*/
+PUBLIC int req_rename(fs_e, old_dir, old_name, new_dir, new_name)
+endpoint_t fs_e;
+ino_t old_dir;
+char *old_name;
+ino_t new_dir;
+char *new_name;
+{
+  int r;
+  cp_grant_id_t gid_old, gid_new;
+  size_t len_old, len_new;
+  message m;
+
+  len_old = strlen(old_name) + 1;
+  gid_old = cpf_grant_direct(fs_e, (vir_bytes) old_name, len_old, CPF_READ);
+  if(gid_old == -1)
+	  panic("req_rename: cpf_grant_direct failed");
+
+  len_new = strlen(new_name) + 1;
+  gid_new = cpf_grant_direct(fs_e, (vir_bytes) new_name, len_new, CPF_READ);
+  if(gid_new == -1)
+	  panic("req_rename: cpf_grant_direct failed");
+
+  /* Fill in request message */
+  m.m_type = REQ_RENAME;
+  m.REQ_REN_OLD_DIR = old_dir;
+  m.REQ_REN_NEW_DIR = new_dir;
+  m.REQ_REN_GRANT_OLD = gid_old;
+  m.REQ_REN_LEN_OLD = len_old;
+  m.REQ_REN_GRANT_NEW = gid_new;
+  m.REQ_REN_LEN_NEW = len_new;
+
+  /* Send/rec request */
+  r = fs_sendrec(fs_e, &m);
+  cpf_revoke(gid_old);
+  cpf_revoke(gid_new);
+
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				req_rmdir	      			     *
+ *===========================================================================*/
+PUBLIC int req_rmdir(fs_e, inode_nr, lastc)
+endpoint_t fs_e;
+ino_t inode_nr;
+char *lastc;
+{
+  int r;
+  cp_grant_id_t grant_id;
+  size_t len;
+  message m;
+
+  len = strlen(lastc) + 1;
+  grant_id = cpf_grant_direct(fs_e, (vir_bytes) lastc, len, CPF_READ);
+  if(grant_id == -1)
+	  panic("req_rmdir: cpf_grant_direct failed");
+
+  /* Fill in request message */
+  m.m_type = REQ_RMDIR;
+  m.REQ_INODE_NR = inode_nr;
+  m.REQ_GRANT = grant_id;
+  m.REQ_PATH_LEN = len;
+
+  /* Send/rec request */
+  r = fs_sendrec(fs_e, &m);
+  cpf_revoke(grant_id);
+
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				req_slink	      			     *
+ *===========================================================================*/
+PUBLIC int req_slink(
+  endpoint_t fs_e,
+  ino_t inode_nr,
+  char *lastc,
+  endpoint_t proc_e,
+  char *path_addr,
+  unsigned short path_length,
+  uid_t uid,
+  gid_t gid
+)
+{
+  int r;
+  size_t len;
+  cp_grant_id_t gid_name, gid_buf;
+  message m;
+
+  len = strlen(lastc) + 1;
+  gid_name = cpf_grant_direct(fs_e, (vir_bytes) lastc, len, CPF_READ);
+  if(gid_name == -1)
+	  panic("req_slink: cpf_grant_direct failed");
+
+  gid_buf = cpf_grant_magic(fs_e, proc_e, (vir_bytes) path_addr, path_length,
+			    CPF_READ);
+  if(gid_buf == -1) {
+	  cpf_revoke(gid_name);
+	  panic("req_slink: cpf_grant_magic failed");
+  }
+
+  /* Fill in request message */
+  m.m_type = REQ_SLINK;
+  m.REQ_INODE_NR = inode_nr;
+  m.REQ_UID = uid;
+  m.REQ_GID = gid;
+  m.REQ_GRANT = gid_name;
+  m.REQ_PATH_LEN = len;
+  m.REQ_GRANT3 = gid_buf;
+  m.REQ_MEM_SIZE = path_length;
+
+  /* Send/rec request */
+  r = fs_sendrec(fs_e, &m);
+  cpf_revoke(gid_name);
+  cpf_revoke(gid_buf);
+
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				req_stat	       			     *
+ *===========================================================================*/
+PUBLIC int req_stat(fs_e, inode_nr, proc_e, buf, pos, stat_version)
+int fs_e;
+ino_t inode_nr;
+int proc_e;
+char *buf;
+int pos;
+int stat_version;
+{
+  cp_grant_id_t grant_id;
+  int r;
+  message m;
+  struct stat sb;
+  struct minix_prev_stat old_sb; /* for backward compatibility */
+
+  if (pos != 0 || stat_version != 0)
+	  grant_id = cpf_grant_direct(fs_e, (vir_bytes) &sb,
+				      sizeof(struct stat), CPF_WRITE);
+  else
+	  grant_id = cpf_grant_magic(fs_e, proc_e, (vir_bytes) buf,
+				sizeof(struct stat), CPF_WRITE);
+
+  if (grant_id < 0)
+	panic("req_stat: cpf_grant_* failed");
+
+  /* Fill in request message */
+  m.m_type = REQ_STAT;
+  m.REQ_INODE_NR = inode_nr;
+  m.REQ_GRANT = grant_id;
+
+  /* Send/rec request */
+  r = fs_sendrec(fs_e, &m);
+  cpf_revoke(grant_id);
+
+  if (r != OK || (pos == 0 && stat_version == 0))
+	return(r);
+
+  if (pos != 0)
+	sb.st_size -= pos;
+  if (stat_version == 0) {
+	r = sys_vircopy(SELF, D, (vir_bytes) &sb, proc_e, D, (vir_bytes) buf,
+			sizeof(struct stat));
+	return(r);
+  }
+
+  /* User needs old struct stat.
+   * Just 1 prev version at this moment */
+  assert(stat_version == 1);
+
+/* XXX until that st_Xtime macroses used, we have to undefine them,
+ * because of minix_prev_stat
+ */
+#undef st_atime
+#undef st_ctime
+#undef st_mtime
+
+/* Copy field by field because of st_gid type mismath and
+ * difference in order after atime.
+ */
+  old_sb.st_dev = sb.st_dev;
+  old_sb.st_ino = sb.st_ino;
+  old_sb.st_mode = sb.st_mode;
+  old_sb.st_nlink = sb.st_nlink;
+  old_sb.st_uid = sb.st_uid;
+  old_sb.st_gid = sb.st_gid;
+  old_sb.st_rdev = sb.st_rdev;
+  old_sb.st_size = sb.st_size;
+#if defined(_NETBSD_SOURCE)
+  old_sb.st_atime = sb.st_atimespec.tv_sec;
+  old_sb.st_mtime = sb.st_mtimespec.tv_sec;
+  old_sb.st_ctime = sb.st_ctimespec.tv_sec;
+#else
+  old_sb.st_atime = sb.st_atime;
+  old_sb.st_mtime = sb.st_mtime;
+  old_sb.st_ctime = sb.st_ctime;
+#endif
+
+  r = sys_vircopy(SELF, D, (vir_bytes) &old_sb, proc_e, D, (vir_bytes) buf,
+		  sizeof(struct minix_prev_stat));
+
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				req_sync	       			     *
+ *===========================================================================*/
+PUBLIC int req_sync(fs_e)
+endpoint_t fs_e;
+{
+  message m;
+
+  /* Fill in request message */
+  m.m_type = REQ_SYNC;
+
+  /* Send/rec request */
+  return fs_sendrec(fs_e, &m);
+}
+
+
+/*===========================================================================*
+ *				req_unlink	     			     *
+ *===========================================================================*/
+PUBLIC int req_unlink(fs_e, inode_nr, lastc)
+endpoint_t fs_e;
+ino_t inode_nr;
+char *lastc;
+{
+  cp_grant_id_t grant_id;
+  size_t len;
+  int r;
+  message m;
+
+  len = strlen(lastc) + 1;
+  grant_id = cpf_grant_direct(fs_e, (vir_bytes) lastc, len, CPF_READ);
+  if(grant_id == -1)
+	  panic("req_unlink: cpf_grant_direct failed");
+
+  /* Fill in request message */
+  m.m_type = REQ_UNLINK;
+  m.REQ_INODE_NR = inode_nr;
+  m.REQ_GRANT = grant_id;
+  m.REQ_PATH_LEN = len;
+
+  /* Send/rec request */
+  r = fs_sendrec(fs_e, &m);
+  cpf_revoke(grant_id);
+
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				req_unmount	    			     *
+ *===========================================================================*/
+PUBLIC int req_unmount(fs_e)
+endpoint_t fs_e;
+{
+  message m;
+
+  /* Fill in request message */
+  m.m_type = REQ_UNMOUNT;
+
+  /* Send/rec request */
+  return fs_sendrec(fs_e, &m);
+}
+
+
+/*===========================================================================*
+ *				req_utime	      			     *
+ *===========================================================================*/
+PUBLIC int req_utime(fs_e, inode_nr, actime, modtime)
+endpoint_t fs_e;
+ino_t inode_nr;
+time_t actime;
+time_t modtime;
+{
+  message m;
+
+  /* Fill in request message */
+  m.m_type = REQ_UTIME;
+  m.REQ_INODE_NR = inode_nr;
+  m.REQ_ACTIME = actime;
+  m.REQ_MODTIME = modtime;
+
+  /* Send/rec request */
+  return fs_sendrec(fs_e, &m);
+}
diff --git a/servers/avfs/request.h b/servers/avfs/request.h
new file mode 100644
index 000000000..74f612bf9
--- /dev/null
+++ b/servers/avfs/request.h
@@ -0,0 +1,41 @@
+#ifndef __VFS_REQUEST_H__
+#define __VFS_REQUEST_H__
+
+/* Low level request messages are built and sent by wrapper functions.
+ * This file contains the request and response structures for accessing
+ * those wrappers functions.
+ */
+
+#include <sys/types.h>
+
+/* Structure for response that contains inode details */
+typedef struct node_details {
+  endpoint_t fs_e;
+  ino_t inode_nr;
+  mode_t fmode;
+  off_t fsize;
+  uid_t uid;
+  gid_t gid;
+
+  /* For char/block special files */
+  dev_t dev;
+} node_details_t;
+
+/* Structure for a lookup response */
+typedef struct lookup_res {
+  endpoint_t fs_e;
+  ino_t inode_nr;
+  mode_t fmode;
+  off_t fsize;
+  uid_t uid;
+  gid_t gid;
+  /* For char/block special files */
+  dev_t dev;
+
+  /* Fields used for handling mount point and symbolic links */
+  int char_processed;
+  unsigned char symloop;
+} lookup_res_t;
+
+
+#endif
diff --git a/servers/avfs/select.c b/servers/avfs/select.c
new file mode 100644
index 000000000..76780ac5d
--- /dev/null
+++ b/servers/avfs/select.c
@@ -0,0 +1,1058 @@
+/* Implement entry point to select system call.
+ *
+ * The entry points into this file are
+ *   do_select:	       perform the SELECT system call
+ *   select_callback:  notify select system of possible fd operation
+ *   select_unsuspend_by_endpt: cancel a blocking select on exiting driver
+ */
+
+#include "fs.h"
+#include <sys/time.h>
+#include <sys/select.h>
+#include <minix/com.h>
+#include <minix/u64.h>
+#include <string.h>
+#include <assert.h>
+
+#include "select.h"
+#include "file.h"
+#include "fproc.h"
+#include "dmap.h"
+#include "vnode.h"
+
+/* max. number of simultaneously pending select() calls */
+#define MAXSELECTS 25
+#define FROM_PROC 0
+#define TO_PROC   1
+
+PRIVATE struct selectentry {
+  struct fproc *requestor;	/* slot is free iff this is NULL */
+  endpoint_t req_endpt;
+  fd_set readfds, writefds, errorfds;
+  fd_set ready_readfds, ready_writefds, ready_errorfds;
+  fd_set *vir_readfds, *vir_writefds, *vir_errorfds;
+  struct filp *filps[OPEN_MAX];
+  int type[OPEN_MAX];
+  int nfds, nreadyfds;
+  int error;
+  char block;
+  clock_t expiry;
+  timer_t timer;	/* if expiry > 0 */
+} selecttab[MAXSELECTS];
+
+FORWARD _PROTOTYPE(int copy_fdsets, (struct selectentry *se, int nfds,
+				      int direction)			);
+FORWARD _PROTOTYPE(int do_select_request, (struct selectentry *se, int fd,
+					   int *ops)			);
+FORWARD _PROTOTYPE(void filp_status, (struct filp *fp, int status)	);
+FORWARD _PROTOTYPE(int is_deferred, (struct selectentry *se)		);
+FORWARD _PROTOTYPE(void restart_proc, (struct selectentry *se)		);
+FORWARD _PROTOTYPE(void ops2tab, (int ops, int fd, struct selectentry *e));
+FORWARD _PROTOTYPE(int is_regular_file, (struct filp *f)		);
+FORWARD _PROTOTYPE(int is_pipe, (struct filp *f)			);
+FORWARD _PROTOTYPE(int is_supported_major, (struct filp *f)			);
+FORWARD _PROTOTYPE(void select_lock_filp, (struct filp *f, int ops)	);
+FORWARD _PROTOTYPE(int select_request_async, (struct filp *f, int *ops,
+					       int block)		);
+FORWARD _PROTOTYPE(int select_request_file, (struct filp *f, int *ops,
+					     int block)			);
+FORWARD _PROTOTYPE(int select_request_major, (struct filp *f, int *ops,
+					     int block)			);
+FORWARD _PROTOTYPE(int select_request_pipe, (struct filp *f, int *ops,
+					     int block)			);
+FORWARD _PROTOTYPE(int select_request_sync, (struct filp *f, int *ops,
+					        int block)		);
+FORWARD _PROTOTYPE(void select_cancel_all, (struct selectentry *e)	);
+FORWARD _PROTOTYPE(void select_cancel_filp, (struct filp *f)		);
+FORWARD _PROTOTYPE(void select_return, (struct selectentry *)		);
+FORWARD _PROTOTYPE(void select_restart_filps, (void)				);
+FORWARD _PROTOTYPE(int tab2ops, (int fd, struct selectentry *e)		);
+FORWARD _PROTOTYPE(void wipe_select, (struct selectentry *s)		);
+
+PRIVATE struct fdtype {
+	int (*select_request)(struct filp *, int *ops, int block);
+	int (*type_match)(struct filp *f);
+} fdtypes[] = {
+	{ select_request_major, is_supported_major },
+	{ select_request_file, is_regular_file },
+	{ select_request_pipe, is_pipe },
+};
+#define SEL_FDS		(sizeof(fdtypes) / sizeof(fdtypes[0]))
+PRIVATE int select_majors[] = { /* List of majors that support selecting on */
+	TTY_MAJOR,
+	INET_MAJOR,
+	UDS_MAJOR,
+	LOG_MAJOR,
+};
+#define SEL_MAJORS	(sizeof(select_majors) / sizeof(select_majors[0]))
+
+/*===========================================================================*
+ *				do_select				      *
+ *===========================================================================*/
+PUBLIC int do_select(void)
+{
+/* Implement the select(nfds, readfds, writefds, errorfds, timeout) system
+ * call. First we copy the arguments and verify their sanity. Then we check
+ * whether there are file descriptors that satisfy the select call right of the
+ * bat. If so, or if there are no ready file descriptors but the process
+ * requested to return immediately, we return the result. Otherwise we set a
+ * timeout and wait for either the file descriptors to become ready or the
+ * timer to go off. If no timeout value was provided, we wait indefinitely. */
+
+  int r, nfds, do_timeout = 0, fd, s;
+  struct timeval timeout;
+  struct selectentry *se;
+
+  nfds = m_in.SEL_NFDS;
+
+  /* Sane amount of file descriptors? */
+  if (nfds < 0 || nfds > OPEN_MAX) return(EINVAL);
+
+  /* Find a slot to store this select request */
+  for (s = 0; s < MAXSELECTS; s++)
+	if (selecttab[s].requestor == NULL) /* Unused slot */
+		break;
+  if (s >= MAXSELECTS) return(ENOSPC);
+
+  se = &selecttab[s];
+  wipe_select(se);	/* Clear results of previous usage */
+  se->req_endpt = who_e;
+  se->vir_readfds = (fd_set *) m_in.SEL_READFDS;
+  se->vir_writefds = (fd_set *) m_in.SEL_WRITEFDS;
+  se->vir_errorfds = (fd_set *) m_in.SEL_ERRORFDS;
+
+  /* Copy fdsets from the process */
+  if ((r = copy_fdsets(se, nfds, FROM_PROC)) != OK) return(r);
+
+  /* Did the process set a timeout value? If so, retrieve it. */
+  if (m_in.SEL_TIMEOUT != NULL) {
+	do_timeout = 1;
+	r = sys_vircopy(who_e, D, (vir_bytes) m_in.SEL_TIMEOUT,	SELF, D,
+			(vir_bytes) &timeout, sizeof(timeout));
+	if (r != OK) return(r);
+  }
+
+  /* No nonsense in the timeval */
+  if (do_timeout && (timeout.tv_sec < 0 || timeout.tv_usec < 0))
+	return(EINVAL);
+
+  /* If there is no timeout, we block forever. Otherwise, we block up to the
+   * specified time interval.
+   */
+  if (!do_timeout)	/* No timeout value set */
+	se->block = 1;
+  else if (do_timeout && (timeout.tv_sec > 0 || timeout.tv_usec > 0))
+	se->block = 1;
+  else			/* timeout set as (0,0) - this effects a poll */
+	se->block = 0;
+  se->expiry = 0;	/* no timer set (yet) */
+
+  /* Verify that file descriptors are okay to select on */
+  for (fd = 0; fd < nfds; fd++) {
+	struct filp *f;
+	int type, ops;
+
+	/* Because the select() interface implicitly includes file descriptors
+	 * you might not want to select on, we have to figure out whether we're
+	 * interested in them. Typically, these file descriptors include fd's
+	 * inherited from the parent proc and file descriptors that have been
+	 * close()d, but had a lower fd than one in the current set.
+	 */
+	if (!(ops = tab2ops(fd, se)))
+		continue; /* No operations set; nothing to do for this fd */
+
+	/* Get filp belonging to this fd */
+	f = se->filps[fd] = get_filp(fd, VNODE_READ);
+	if (f == NULL) {
+		if (err_code == EBADF)
+			r = err_code;
+		else /* File descriptor is 'ready' to return EIO */
+			r = EINTR;
+
+		return(r);
+	}
+
+	/* Check file types. According to POSIX 2008:
+	 * "The pselect() and select() functions shall support regular files,
+	 * terminal and pseudo-terminal devices, FIFOs, pipes, and sockets. The
+	 * behavior of pselect() and select() on file descriptors that refer to
+	 * other types of file is unspecified."
+	 *
+	 * In our case, terminal and pseudo-terminal devices are handled by the
+	 * TTY major and sockets by either INET major (socket type AF_INET) or
+	 * PFS major (socket type AF_UNIX). PFS acts as an FS when it handles
+	 * pipes and as a driver when it handles sockets. Additionally, we
+	 * support select on the LOG major to handle kernel logging, which is
+	 * beyond the POSIX spec. */
+
+	se->type[fd] = -1;
+	for (type = 0; type < SEL_FDS; type++) {
+		if (fdtypes[type].type_match(f)) {
+			se->type[fd] = type;
+			se->nfds = fd+1;
+			se->filps[fd]->filp_selectors++;
+			break;
+		}
+	}
+	unlock_filp(f);
+	if (se->type[fd] == -1) /* Type not found */
+		return(EBADF);
+  }
+
+  /* Check all file descriptors in the set whether one is 'ready' now */
+  for (fd = 0; fd < nfds; fd++) {
+	int ops, r;
+	struct filp *f;
+
+	/* Again, check for involuntarily selected fd's */
+	if (!(ops = tab2ops(fd, se)))
+		continue; /* No operations set; nothing to do for this fd */
+
+	/* Test filp for select operations if not already done so. e.g.,
+	 * processes sharing a filp and both doing a select on that filp. */
+	f = se->filps[fd];
+	select_lock_filp(f, f->filp_select_ops | ops);
+	if ((f->filp_select_ops & ops) != ops) {
+		int wantops;
+
+		wantops = (f->filp_select_ops |= ops);
+		r = do_select_request(se, fd, &wantops);
+		unlock_filp(f);
+		if (r != SEL_OK) {
+			if (r == SEL_DEFERRED) continue;
+			else break; /* Error or bogus return code; abort */
+		}
+
+		/* The select request above might have turned on/off some
+		 * operations because they were 'ready' or not meaningful.
+		 * Either way, we might have a result and we need to store them
+		 * in the select table entry. */
+		if (wantops & ops) ops2tab(wantops, fd, se);
+	} else {
+		unlock_filp(f);
+	}
+  }
+
+  if ((se->nreadyfds > 0 || !se->block) && !is_deferred(se)) {
+	/* fd's were found that were ready to go right away, and/or
+	 * we were instructed not to block at all. Must return
+	 * immediately.
+	 */
+	r = copy_fdsets(se, se->nfds, TO_PROC);
+	select_cancel_all(se);
+
+	if (r != OK)
+		return(r);
+	else if (se->error != OK)
+		return(se->error);
+
+	return(se->nreadyfds);
+  }
+
+  /* Convert timeval to ticks and set the timer. If it fails, undo
+   * all, return error.
+   */
+  if (do_timeout) {
+	int ticks;
+	/* Open Group:
+	 * "If the requested timeout interval requires a finer
+	 * granularity than the implementation supports, the
+	 * actual timeout interval shall be rounded up to the next
+	 * supported value."
+	 */
+#define USECPERSEC 1000000
+	while(timeout.tv_usec >= USECPERSEC) {
+		/* this is to avoid overflow with *system_hz below */
+		timeout.tv_usec -= USECPERSEC;
+		timeout.tv_sec++;
+	}
+	ticks = timeout.tv_sec * system_hz +
+		(timeout.tv_usec * system_hz + USECPERSEC-1) / USECPERSEC;
+	se->expiry = ticks;
+	set_timer(&se->timer, ticks, select_timeout_check, s);
+  }
+
+  /* If we're blocking, the table entry is now valid  */
+  se->requestor = fp;
+
+  /* process now blocked */
+  suspend(FP_BLOCKED_ON_SELECT);
+  return(SUSPEND);
+}
+
+/*===========================================================================*
+ *				is_deferred				     *
+ *===========================================================================*/
+PRIVATE int is_deferred(struct selectentry *se)
+{
+/* Find out whether this select has pending initial replies */
+
+  int fd;
+  struct filp *f;
+
+  for (fd = 0; fd < se->nfds; fd++) {
+	if ((f = se->filps[fd]) == NULL) continue;
+	if (f->filp_select_flags & (FSF_UPDATE|FSF_BUSY)) return(TRUE);
+  }
+
+  return(FALSE);
+}
+
+
+/*===========================================================================*
+ *				is_regular_file				     *
+ *===========================================================================*/
+PRIVATE int is_regular_file(struct filp *f)
+{
+  return(f && f->filp_vno && (f->filp_vno->v_mode & I_TYPE) == I_REGULAR);
+}
+
+/*===========================================================================*
+ *				is_pipe					     *
+ *===========================================================================*/
+PRIVATE int is_pipe(struct filp *f)
+{
+/* Recognize either anonymous pipe or named pipe (FIFO) */
+  return(f && f->filp_vno && (f->filp_vno->v_mode & I_TYPE) == I_NAMED_PIPE);
+}
+
+/*===========================================================================*
+ *				is_supported_major					     *
+ *===========================================================================*/
+PRIVATE int is_supported_major(struct filp *f)
+{
+/* See if this filp is a handle on a device on which we support select() */
+  int m;
+
+  if (!(f && f->filp_vno)) return(FALSE);
+  if ((f->filp_vno->v_mode & I_TYPE) != I_CHAR_SPECIAL) return(FALSE);
+
+  for (m = 0; m < SEL_MAJORS; m++)
+	if (major(f->filp_vno->v_sdev) == select_majors[m])
+		return(TRUE);
+
+  return(FALSE);
+}
+
+/*===========================================================================*
+ *				select_request_async			     *
+ *===========================================================================*/
+PRIVATE int select_request_async(struct filp *f, int *ops, int block)
+{
+  int r, rops, major;
+  struct dmap *dp;
+
+  rops = *ops;
+
+  if (!block && (f->filp_select_flags & FSF_BLOCKED)) {
+	/* This filp is blocked waiting for a reply, but we don't want to
+	 * block ourselves. Unless we're awaiting the initial reply, these
+	 * operations won't be ready */
+	if (!(f->filp_select_flags & FSF_BUSY)) {
+		if ((rops & SEL_RD) && (f->filp_select_flags & FSF_RD_BLOCK))
+			rops &= ~SEL_RD;
+		if ((rops & SEL_WR) && (f->filp_select_flags & FSF_WR_BLOCK))
+			rops &= ~SEL_WR;
+		if ((rops & SEL_ERR) && (f->filp_select_flags & FSF_ERR_BLOCK))
+			rops &= ~SEL_ERR;
+		if (!(rops & (SEL_RD|SEL_WR|SEL_ERR))) {
+			/* Nothing left to do */
+			*ops = 0;
+			return(SEL_OK);
+		}
+	}
+  }
+
+  f->filp_select_flags |= FSF_UPDATE;
+  if (block) {
+	rops |= SEL_NOTIFY;
+	if (rops & SEL_RD)	f->filp_select_flags |= FSF_RD_BLOCK;
+	if (rops & SEL_WR)	f->filp_select_flags |= FSF_WR_BLOCK;
+	if (rops & SEL_ERR)	f->filp_select_flags |= FSF_ERR_BLOCK;
+  }
+
+  if (f->filp_select_flags & FSF_BUSY)
+	return(SEL_DEFERRED);
+
+  major = major(f->filp_vno->v_sdev);
+  if (major < 0 || major >= NR_DEVICES) return(SEL_ERROR);
+  dp = &dmap[major];
+  if (dp->dmap_sel_filp)
+	return(SEL_DEFERRED);
+
+  f->filp_select_flags &= ~FSF_UPDATE;
+  r = dev_io(VFS_DEV_SELECT, f->filp_vno->v_sdev, rops, NULL,
+	     cvu64(0), 0, 0, FALSE);
+  if (r < 0 && r != SUSPEND)
+	return(SEL_ERROR);
+
+  if (r != SUSPEND)
+	panic("select_request_asynch: expected SUSPEND got: %d", r);
+
+  dp->dmap_sel_filp = f;
+  f->filp_select_flags |= FSF_BUSY;
+
+  return(SEL_DEFERRED);
+}
+
+/*===========================================================================*
+ *				select_request_file			     *
+ *===========================================================================*/
+PRIVATE int select_request_file(struct filp *f, int *ops, int block)
+{
+  /* Files are always ready, so output *ops is input *ops */
+  return(SEL_OK);
+}
+
+/*===========================================================================*
+ *				select_request_major			     *
+ *===========================================================================*/
+PRIVATE int select_request_major(struct filp *f, int *ops, int block)
+{
+  int major, r;
+
+  major = major(f->filp_vno->v_sdev);
+  if (major < 0 || major >= NR_DEVICES) return(SEL_ERROR);
+
+  if (dmap[major].dmap_style == STYLE_DEVA ||
+      dmap[major].dmap_style == STYLE_CLONE_A)
+	r = select_request_async(f, ops, block);
+  else
+	r = select_request_sync(f, ops, block);
+
+  return(r);
+}
+
+/*===========================================================================*
+ *				select_request_sync			     *
+ *===========================================================================*/
+PRIVATE int select_request_sync(struct filp *f, int *ops, int block)
+{
+  int rops;
+
+  rops = *ops;
+  if (block) rops |= SEL_NOTIFY;
+  *ops = dev_io(VFS_DEV_SELECT, f->filp_vno->v_sdev, rops, NULL,
+		cvu64(0), 0, 0, FALSE);
+  if (*ops < 0)
+	return(SEL_ERROR);
+
+  return(SEL_OK);
+}
+
+/*===========================================================================*
+ *				select_request_pipe			     *
+ *===========================================================================*/
+PRIVATE int select_request_pipe(struct filp *f, int *ops, int block)
+{
+  int orig_ops, r = 0, err;
+
+  orig_ops = *ops;
+
+  if ((*ops & (SEL_RD|SEL_ERR))) {
+	err = pipe_check(f->filp_vno, READING, 0, 1, f->filp_pos, 1);
+
+	if (err != SUSPEND)
+		r |= SEL_RD;
+	if (err < 0 && err != SUSPEND)
+		r |= SEL_ERR;
+	if (err == SUSPEND && !(f->filp_mode & R_BIT)) {
+		/* A "meaningless" read select, therefore ready
+		 * for reading and no error set. */
+		r |= SEL_RD;
+		r &= ~SEL_ERR;
+	}
+  }
+
+  if ((*ops & (SEL_WR|SEL_ERR))) {
+	err = pipe_check(f->filp_vno, WRITING, 0, 1, f->filp_pos, 1);
+
+	if (err != SUSPEND)
+		r |= SEL_WR;
+	if (err < 0 && err != SUSPEND)
+		r |= SEL_ERR;
+	if (err == SUSPEND && !(f->filp_mode & W_BIT)) {
+		/* A "meaningless" write select, therefore ready
+                   for writing and no error set. */
+		r |= SEL_WR;
+		r &= ~SEL_ERR;
+	}
+  }
+
+  /* Some options we collected might not be requested. */
+  *ops = r & orig_ops;
+
+  if (!*ops && block)
+	f->filp_pipe_select_ops |= orig_ops;
+
+  return(SEL_OK);
+}
+
+/*===========================================================================*
+ *				tab2ops					     *
+ *===========================================================================*/
+PRIVATE int tab2ops(int fd, struct selectentry *e)
+{
+  int ops = 0;
+  if (FD_ISSET(fd, &e->readfds))  ops |= SEL_RD;
+  if (FD_ISSET(fd, &e->writefds)) ops |= SEL_WR;
+  if (FD_ISSET(fd, &e->errorfds)) ops |= SEL_ERR;
+
+  return(ops);
+}
+
+
+/*===========================================================================*
+ *				ops2tab					     *
+ *===========================================================================*/
+PRIVATE void ops2tab(int ops, int fd, struct selectentry *e)
+{
+  if ((ops & SEL_RD) && e->vir_readfds && FD_ISSET(fd, &e->readfds) &&
+      !FD_ISSET(fd, &e->ready_readfds)) {
+	FD_SET(fd, &e->ready_readfds);
+	e->nreadyfds++;
+  }
+
+  if ((ops & SEL_WR) && e->vir_writefds && FD_ISSET(fd, &e->writefds) &&
+      !FD_ISSET(fd, &e->ready_writefds)) {
+	FD_SET(fd, &e->ready_writefds);
+	e->nreadyfds++;
+  }
+
+  if ((ops & SEL_ERR) && e->vir_errorfds && FD_ISSET(fd, &e->errorfds) &&
+      !FD_ISSET(fd, &e->ready_errorfds)) {
+	FD_SET(fd, &e->ready_errorfds);
+	e->nreadyfds++;
+  }
+}
+
+
+/*===========================================================================*
+ *				copy_fdsets				     *
+ *===========================================================================*/
+PRIVATE int copy_fdsets(struct selectentry *se, int nfds, int direction)
+{
+  int r;
+  size_t fd_setsize;
+  endpoint_t src_e, dst_e;
+  fd_set *src_fds, *dst_fds;
+
+  if (nfds < 0 || nfds > OPEN_MAX)
+	panic("select copy_fdsets: nfds wrong: %d", nfds);
+
+  /* Only copy back as many bits as the user expects. */
+#ifdef __NBSD_LIBC
+  fd_setsize = (size_t) (howmany(nfds, __NFDBITS) * sizeof(__fd_mask));
+#else
+  fd_setsize = (size_t) (_FDSETWORDS(nfds) * _FDSETBITSPERWORD/8);
+#endif
+
+  /* Set source and destination endpoints */
+  src_e = (direction == FROM_PROC) ? se->req_endpt : SELF;
+  dst_e = (direction == FROM_PROC) ? SELF : se->req_endpt;
+
+  /* read set */
+  src_fds = (direction == FROM_PROC) ? se->vir_readfds : &se->ready_readfds;
+  dst_fds = (direction == FROM_PROC) ? &se->readfds : se->vir_readfds;
+  if (se->vir_readfds) {
+	r = sys_vircopy(src_e, D, (vir_bytes) src_fds, dst_e, D,
+			(vir_bytes) dst_fds, fd_setsize);
+	if (r != OK) return(r);
+  }
+
+  /* write set */
+  src_fds = (direction == FROM_PROC) ? se->vir_writefds : &se->ready_writefds;
+  dst_fds = (direction == FROM_PROC) ? &se->writefds : se->vir_writefds;
+  if (se->vir_writefds) {
+	r = sys_vircopy(src_e, D, (vir_bytes) src_fds, dst_e, D,
+			(vir_bytes) dst_fds, fd_setsize);
+	if (r != OK) return(r);
+  }
+
+  /* error set */
+  src_fds = (direction == FROM_PROC) ? se->vir_errorfds : &se->ready_errorfds;
+  dst_fds = (direction == FROM_PROC) ? &se->errorfds : se->vir_errorfds;
+  if (se->vir_errorfds) {
+	r = sys_vircopy(src_e, D, (vir_bytes) src_fds, dst_e, D,
+			(vir_bytes) dst_fds, fd_setsize);
+	if (r != OK) return(r);
+  }
+
+  return(OK);
+}
+
+
+/*===========================================================================*
+ *				select_cancel_all			     *
+ *===========================================================================*/
+PRIVATE void select_cancel_all(struct selectentry *se)
+{
+/* Cancel select. Decrease select usage and cancel timer */
+
+  int fd;
+  struct filp *f;
+
+  /* Always await results of asynchronous requests */
+  assert(!is_deferred(se));
+
+  for (fd = 0; fd < se->nfds; fd++) {
+	if ((f = se->filps[fd]) == NULL) continue;
+	se->filps[fd] = NULL;
+	select_cancel_filp(f);
+  }
+
+  if (se->expiry > 0) {
+	cancel_timer(&se->timer);
+	se->expiry = 0;
+  }
+
+  se->requestor = NULL;
+}
+
+/*===========================================================================*
+ *				select_cancel_filp			     *
+ *===========================================================================*/
+PRIVATE void select_cancel_filp(struct filp *f)
+{
+/* Reduce number of select users of this filp */
+
+  assert(f);
+  assert(f->filp_selectors >= 0);
+  if (f->filp_selectors == 0) return;
+
+  select_lock_filp(f, f->filp_select_ops);
+
+  f->filp_selectors--;
+  if (f->filp_selectors == 0) {
+	/* No one selecting on this filp anymore, forget about select state */
+	f->filp_select_ops = 0;
+	f->filp_select_flags = 0;
+	f->filp_pipe_select_ops = 0;
+  }
+
+  unlock_filp(f);
+}
+
+/*===========================================================================*
+ *				select_return				     *
+ *===========================================================================*/
+PRIVATE void select_return(struct selectentry *se)
+{
+  int r, r1;
+
+  assert(!is_deferred(se));	/* Not done yet, first wait for async reply */
+
+  select_cancel_all(se);
+  r1 = copy_fdsets(se, se->nfds, TO_PROC);
+  if (r1 != OK)
+	r = r1;
+  else if (se->error != OK)
+	r = se->error;
+  else
+	r = se->nreadyfds;
+
+  revive(se->req_endpt, r);
+}
+
+
+/*===========================================================================*
+ *				select_callback			             *
+ *===========================================================================*/
+PUBLIC void select_callback(struct filp *f, int status)
+{
+  filp_status(f, status);
+}
+
+/*===========================================================================*
+ *				init_select  				     *
+ *===========================================================================*/
+PUBLIC void init_select(void)
+{
+  int s;
+
+  for (s = 0; s < MAXSELECTS; s++)
+	init_timer(&selecttab[s].timer);
+}
+
+
+/*===========================================================================*
+ *				select_forget			             *
+ *===========================================================================*/
+PUBLIC void select_forget(endpoint_t proc_e)
+{
+/* Something has happened (e.g. signal delivered that interrupts select()).
+ * Totally forget about the select(). */
+
+  int slot;
+  struct selectentry *se;
+
+  for (slot = 0; slot < MAXSELECTS; slot++) {
+	se = &selecttab[slot];
+	if (se->requestor != NULL && se->req_endpt == proc_e)
+		break;
+  }
+
+  if (slot >= MAXSELECTS) return;	/* Entry not found */
+  se->error = EINTR;
+  if (is_deferred(se)) return;		/* Still awaiting initial reply */
+
+  select_cancel_all(se);
+}
+
+
+/*===========================================================================*
+ *				select_timeout_check	  	     	     *
+ *===========================================================================*/
+PUBLIC void select_timeout_check(timer_t *timer)
+{
+  int s;
+  struct selectentry *se;
+
+  s = tmr_arg(timer)->ta_int;
+  if (s < 0 || s >= MAXSELECTS) return;	/* Entry does not exist */
+
+  se = &selecttab[s];
+  if (se->requestor == NULL) return;
+  fp = se->requestor;
+  if (se->expiry <= 0) return;	/* Strange, did we even ask for a timeout? */
+  se->expiry = 0;
+  if (is_deferred(se)) return;	/* Wait for initial replies to DEV_SELECT */
+  select_return(se);
+}
+
+
+/*===========================================================================*
+ *				select_unsuspend_by_endpt  	     	     *
+ *===========================================================================*/
+PUBLIC void select_unsuspend_by_endpt(endpoint_t proc_e)
+{
+/* Revive blocked processes when a driver has disappeared */
+
+  int fd, s, major;
+  struct selectentry *se;
+  struct filp *f;
+
+  for (s = 0; s < MAXSELECTS; s++) {
+	int wakehim = 0;
+	se = &selecttab[s];
+	if (se->requestor == NULL) continue;
+
+	for (fd = 0; fd < se->nfds; fd++) {
+		if ((f = se->filps[fd]) == NULL || f->filp_vno == NULL)
+			continue;
+
+		major = major(f->filp_vno->v_sdev);
+		if (dmap_driver_match(proc_e, major)) {
+			se->filps[fd] = NULL;
+			se->error = EINTR;
+			select_cancel_filp(f);
+			wakehim = 1;
+		}
+	}
+
+	if (wakehim && !is_deferred(se))
+		select_return(se);
+  }
+}
+
+
+/*===========================================================================*
+ *				select_reply1				     *
+ *===========================================================================*/
+PUBLIC void select_reply1(driver_e, minor, status)
+endpoint_t driver_e;
+int minor;
+int status;
+{
+/* Handle reply to DEV_SELECT request */
+
+  int major;
+  dev_t dev;
+  struct filp *f;
+  struct dmap *dp;
+  struct vnode *vp;
+
+  /* Figure out which device is replying */
+  if ((dp = get_dmap(driver_e)) == NULL) {
+	printf("VFS (%s:%d): endpoint %d is not a known driver endpoint\n",
+		__FILE__, __LINE__, driver_e);
+	return;
+  }
+  major = dp-dmap;
+  dev = makedev(major, minor);
+
+  /* Get filp belonging to character special file */
+  if ((f = dp->dmap_sel_filp) == NULL) {
+	printf("VFS (%s:%d): major %d was not expecting a DEV_SELECT reply\n",
+		__FILE__, __LINE__, major);
+	return;
+  }
+
+  /* Is the filp still in use and busy waiting for a reply? The owner might
+   * have vanished before the driver was able to reply. */
+  if (f->filp_count >= 1 && (f->filp_select_flags & FSF_BUSY)) {
+	/* Find vnode and check we got a reply from the device we expected */
+	vp = f->filp_vno;
+	assert(vp != NULL);
+	assert((vp->v_mode & I_TYPE) == I_CHAR_SPECIAL); /* Must be char. special */
+	if (vp->v_sdev != dev) {
+		printf("VFS (%s:%d): expected reply from dev %d not %d\n",
+			__FILE__, __LINE__, vp->v_sdev, dev);
+		return;
+	}
+  }
+
+  select_lock_filp(f, f->filp_select_ops);
+
+  /* No longer waiting for a reply from this device */
+  f->filp_select_flags &= ~FSF_BUSY;
+  dp->dmap_sel_filp = NULL;
+
+  /* The select call is done now, except when
+   * - another process started a select on the same filp with possibly a
+   *   different set of operations.
+   * - a process does a select on the same filp but using different file
+   *   descriptors.
+   * - the select has a timeout. Upon receiving this reply the operations might
+   *   not be ready yet, so we want to wait for that to ultimately happen.
+   *   Therefore we need to keep remembering what the operations are. */
+  if (!(f->filp_select_flags & (FSF_UPDATE|FSF_BLOCKED)))
+	f->filp_select_ops = 0;		/* done selecting */
+  else if (!(f->filp_select_flags & FSF_UPDATE))
+	f->filp_select_ops &= ~status;	/* there may be operations pending */
+
+  /* Tell filp owners about result unless we need to wait longer */
+  if (!(status == 0 && (f->filp_select_flags & FSF_BLOCKED))) {
+	if (status > 0) {	/* operations ready */
+		if (status & SEL_RD) f->filp_select_flags &= ~FSF_RD_BLOCK;
+		if (status & SEL_WR) f->filp_select_flags &= ~FSF_WR_BLOCK;
+		if (status & SEL_ERR) f->filp_select_flags &= ~FSF_ERR_BLOCK;
+	} else if (status < 0) { /* error */
+		f->filp_select_flags &= ~FSF_BLOCKED; /* No longer blocking */
+	}
+
+	unlock_filp(f);
+	filp_status(f, status); /* Tell filp owners about the results */
+  } else {
+	unlock_filp(f);
+  }
+
+  select_restart_filps();
+}
+
+
+/*===========================================================================*
+ *				select_reply2				     *
+ *===========================================================================*/
+PUBLIC void select_reply2(driver_e, minor, status)
+endpoint_t driver_e;
+int minor;
+int status;
+{
+/* Handle secondary reply to DEV_SELECT request. A secondary reply occurs when
+ * the select request is 'blocking' until an operation becomes ready. */
+  int major, slot, fd;
+  dev_t dev;
+  struct filp *f;
+  struct dmap *dp;
+  struct vnode *vp;
+  struct selectentry *se;
+
+  if (status == 0) {
+	printf("VFS (%s:%d): weird status (%d) to report\n",
+		__FILE__, __LINE__, status);
+	return;
+  }
+
+  /* Figure out which device is replying */
+  if ((dp = get_dmap(driver_e)) == NULL) {
+	printf("VFS (%s:%d): endpoint %d is not a known driver endpoint\n",
+		__FILE__, __LINE__, driver_e);
+	return;
+  }
+  major = dp-dmap;
+  dev = makedev(major, minor);
+
+  /* Find all file descriptors selecting for this device */
+  for (slot = 0; slot < MAXSELECTS; slot++) {
+	se = &selecttab[slot];
+	if (se->requestor == NULL) continue;	/* empty slot */
+
+	for (fd = 0; fd < se->nfds; fd++) {
+		if ((f = se->filps[fd]) == NULL) continue;
+		if ((vp = f->filp_vno) == NULL) continue;
+		if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL) continue;
+		if (vp->v_sdev != dev) continue;
+
+		select_lock_filp(f, f->filp_select_ops);
+		if (status > 0) {	/* Operations ready */
+			/* Clear the replied bits from the request
+			 * mask unless FSF_UPDATE is set.
+			 */
+			if (!(f->filp_select_flags & FSF_UPDATE))
+				f->filp_select_ops &= ~status;
+			if (status & SEL_RD)
+				f->filp_select_flags &= ~FSF_RD_BLOCK;
+			if (status & SEL_WR)
+				f->filp_select_flags &= ~FSF_WR_BLOCK;
+			if (status & SEL_ERR)
+				f->filp_select_flags &= ~FSF_ERR_BLOCK;
+
+			ops2tab(status, fd, se);
+		} else {
+			f->filp_select_flags &= ~FSF_BLOCKED;
+			ops2tab(SEL_RD|SEL_WR|SEL_ERR, fd, se);
+		}
+		unlock_filp(f);
+		if (se->nreadyfds > 0) restart_proc(se);
+	}
+  }
+
+  select_restart_filps();
+}
+
+/*===========================================================================*
+ *				select_restart_filps			     *
+ *===========================================================================*/
+PRIVATE void select_restart_filps()
+{
+  int fd, slot;
+  struct filp *f;
+  struct vnode *vp;
+  struct selectentry *se;
+
+  /* Locate filps that can be restarted */
+  for (slot = 0; slot < MAXSELECTS; slot++) {
+	se = &selecttab[slot];
+	if (se->requestor == NULL) continue; /* empty slot */
+
+	/* Only 'deferred' processes are eligible to restart */
+	if (!is_deferred(se)) continue;
+
+	/* Find filps that are not waiting for a reply, but have an updated
+	 * status (i.e., another select on the same filp with possibly a
+	 * different set of operations is to be done), and thus requires the
+	 * select request to be sent again).
+	 */
+	for (fd = 0; fd < se->nfds; fd++) {
+		int r, wantops, ops;
+		if ((f = se->filps[fd]) == NULL) continue;
+		if (f->filp_select_flags & FSF_BUSY) /* Still waiting for */
+			continue;		     /* initial reply */
+		if (!(f->filp_select_flags & FSF_UPDATE)) /* Must be in  */
+			continue;			  /* 'update' state */
+
+		wantops = ops = f->filp_select_ops;
+		select_lock_filp(f, ops);
+		vp = f->filp_vno;
+		assert((vp->v_mode & I_TYPE) == I_CHAR_SPECIAL);
+		r = do_select_request(se, fd, &wantops);
+		unlock_filp(f);
+		if (r != SEL_OK) {
+			if (r == SEL_DEFERRED) continue;
+			else break; /* Error or bogus return code; abort */
+		}
+		if (wantops & ops) ops2tab(wantops, fd, se);
+	}
+  }
+}
+
+/*===========================================================================*
+ *				do_select_request			     *
+ *===========================================================================*/
+PRIVATE int do_select_request(se, fd, ops)
+struct selectentry *se;
+int fd;
+int *ops;
+{
+/* Perform actual select request for file descriptor fd */
+
+  int r, type;
+  struct filp *f;
+
+  type = se->type[fd];
+  f = se->filps[fd];
+  r = fdtypes[type].select_request(f, ops, se->block);
+  if (r != SEL_OK && r != SEL_DEFERRED) {
+	se->error = EINTR;
+	se->block = 0;	/* Stop blocking to return asap */
+	if (!is_deferred(se)) select_cancel_all(se);
+  }
+
+  return(r);
+}
+
+/*===========================================================================*
+ *				filp_status				     *
+ *===========================================================================*/
+PRIVATE void filp_status(f, status)
+struct filp *f;
+int status;
+{
+/* Tell processes that need to know about the status of this filp */
+  int fd, slot;
+  struct selectentry *se;
+
+  for (slot = 0; slot < MAXSELECTS; slot++) {
+	se = &selecttab[slot];
+	if (se->requestor == NULL) continue; /* empty slot */
+
+	for (fd = 0; fd < se->nfds; fd++) {
+		if (se->filps[fd] != f) continue;
+		if (status < 0)
+			ops2tab(SEL_RD|SEL_WR|SEL_ERR, fd, se);
+		else
+			ops2tab(status, fd, se);
+		restart_proc(se);
+	}
+  }
+}
+
+/*===========================================================================*
+ *				restart_proc				     *
+ *===========================================================================*/
+PRIVATE void restart_proc(se)
+struct selectentry *se;
+{
+/* Tell process about select results (if any) unless there are still results
+ * pending. */
+
+  if ((se->nreadyfds > 0 || !se->block) && !is_deferred(se))
+	select_return(se);
+}
+
+/*===========================================================================*
+ *				wipe_select				     *
+ *===========================================================================*/
+PRIVATE void wipe_select(struct selectentry *se)
+{
+  se->nfds = 0;
+  se->nreadyfds = 0;
+  se->error = OK;
+  se->block = 0;
+  memset(se->filps, 0, sizeof(se->filps));
+
+  FD_ZERO(&se->readfds);
+  FD_ZERO(&se->writefds);
+  FD_ZERO(&se->errorfds);
+  FD_ZERO(&se->ready_readfds);
+  FD_ZERO(&se->ready_writefds);
+  FD_ZERO(&se->ready_errorfds);
+}
+
+/*===========================================================================*
+ *				select_lock_filp				     *
+ *===========================================================================*/
+PRIVATE void select_lock_filp(struct filp *f, int ops)
+{
+/* Lock a filp and vnode based on which operations are requested */
+  tll_access_t locktype;;
+
+  locktype = VNODE_READ; /* By default */
+
+  if (ops & (SEL_WR|SEL_ERR))
+	/* Selecting for error or writing requires exclusive access */
+	locktype = VNODE_WRITE;
+
+  lock_filp(f, locktype);
+}
diff --git a/servers/avfs/select.h b/servers/avfs/select.h
new file mode 100644
index 000000000..5215b1a90
--- /dev/null
+++ b/servers/avfs/select.h
@@ -0,0 +1,9 @@
+#ifndef __VFS_SELECT_H__
+#define __VFS_SELECT_H__
+
+/* return codes for select_request_* and select_cancel_* */
+#define SEL_OK		0	/* ready */
+#define SEL_ERROR	1	/* failed */
+#define SEL_DEFERRED	2	/* request is sent to driver */
+
+#endif
diff --git a/servers/avfs/stadir.c b/servers/avfs/stadir.c
new file mode 100644
index 000000000..fbc8fa088
--- /dev/null
+++ b/servers/avfs/stadir.c
@@ -0,0 +1,287 @@
+/* This file contains the code for performing four system calls relating to
+ * status and directories.
+ *
+ * The entry points into this file are
+ *   do_chdir:	perform the CHDIR system call
+ *   do_chroot:	perform the CHROOT system call
+ *   do_lstat:  perform the LSTAT system call
+ *   do_stat:	perform the STAT system call
+ *   do_fstat:	perform the FSTAT system call
+ *   do_fstatfs: perform the FSTATFS system call
+ *   do_statvfs: perform the STATVFS system call
+ *   do_fstatvfs: perform the FSTATVFS system call
+ */
+
+#include "fs.h"
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <minix/com.h>
+#include <minix/u64.h>
+#include <string.h>
+#include "file.h"
+#include "fproc.h"
+#include "path.h"
+#include "param.h"
+#include <minix/vfsif.h>
+#include <minix/callnr.h>
+#include "vnode.h"
+#include "vmnt.h"
+
+FORWARD _PROTOTYPE( int change, (struct vnode **iip, char *name_ptr, int len));
+FORWARD _PROTOTYPE( int change_into, (struct vnode **iip, struct vnode *vp));
+
+/*===========================================================================*
+ *				do_fchdir				     *
+ *===========================================================================*/
+PUBLIC int do_fchdir()
+{
+  /* Change directory on already-opened fd. */
+  struct filp *rfilp;
+  int r;
+
+  /* Is the file descriptor valid? */
+  if ((rfilp = get_filp(m_in.fd, VNODE_READ)) == NULL) return(err_code);
+  r = change_into(&fp->fp_wd, rfilp->filp_vno);
+  unlock_filp(rfilp);
+  return(r);
+}
+
+/*===========================================================================*
+ *				do_chdir				     *
+ *===========================================================================*/
+PUBLIC int do_chdir()
+{
+/* Perform the chdir(name) system call. */
+
+  return change(&fp->fp_wd, m_in.name, m_in.name_length);
+}
+
+/*===========================================================================*
+ *				do_chroot				     *
+ *===========================================================================*/
+PUBLIC int do_chroot()
+{
+/* Perform the chroot(name) system call. */
+
+  if (!super_user) return(EPERM);	/* only su may chroot() */
+  return change(&fp->fp_rd, m_in.name, m_in.name_length);
+}
+
+/*===========================================================================*
+ *				change					     *
+ *===========================================================================*/
+PRIVATE int change(iip, name_ptr, len)
+struct vnode **iip;		/* pointer to the inode pointer for the dir */
+char *name_ptr;			/* pointer to the directory name to change to */
+int len;			/* length of the directory name string */
+{
+/* Do the actual work for chdir() and chroot(). */
+  struct vnode *vp;
+  struct vmnt *vmp;
+  char fullpath[PATH_MAX+1];
+  struct lookup resolve;
+  int r;
+
+  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+  resolve.l_vmnt_lock = VMNT_READ;
+  resolve.l_vnode_lock = VNODE_READ;
+
+  /* Try to open the directory */
+  if (fetch_name(name_ptr, len, M3, fullpath) != OK) return(err_code);
+  if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
+  r = change_into(iip, vp);
+  unlock_vnode(vp);
+  unlock_vmnt(vmp);
+  put_vnode(vp);
+  return(r);
+}
+
+/*===========================================================================*
+ *				change_into				     *
+ *===========================================================================*/
+PRIVATE int change_into(iip, vp)
+struct vnode **iip;		/* pointer to the inode pointer for the dir */
+struct vnode *vp;		/* this is what the inode has to become */
+{
+  int r;
+
+  if (*iip == vp) return(OK);	/* Nothing to do */
+
+  /* It must be a directory and also be searchable */
+  if ((vp->v_mode & I_TYPE) != I_DIRECTORY)
+	r = ENOTDIR;
+  else
+	r = forbidden(vp, X_BIT);	/* Check if dir is searchable*/
+  if (r != OK) return(r);
+
+  /* Everything is OK.  Make the change. */
+  put_vnode(*iip);		/* release the old directory */
+  dup_vnode(vp);
+  *iip = vp;			/* acquire the new one */
+  return(OK);
+}
+
+/*===========================================================================*
+ *				do_stat					     *
+ *===========================================================================*/
+PUBLIC int do_stat()
+{
+/* Perform the stat(name, buf) system call. */
+  int r;
+  struct vnode *vp;
+  struct vmnt *vmp;
+  char fullpath[PATH_MAX+1];
+  struct lookup resolve;
+  int old_stat = 0;
+
+  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+  resolve.l_vmnt_lock = VMNT_READ;
+  resolve.l_vnode_lock = VNODE_READ;
+
+  if (call_nr == PREV_STAT)
+	old_stat = 1;
+
+  if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
+	return(err_code);
+  if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
+  r = req_stat(vp->v_fs_e, vp->v_inode_nr, who_e, m_in.name2, 0, old_stat);
+
+  unlock_vnode(vp);
+  unlock_vmnt(vmp);
+
+  put_vnode(vp);
+  return r;
+}
+
+/*===========================================================================*
+ *				do_fstat				     *
+ *===========================================================================*/
+PUBLIC int do_fstat()
+{
+/* Perform the fstat(fd, buf) system call. */
+  register struct filp *rfilp;
+  int r;
+  int pipe_pos = 0;
+  int old_stat = 0;
+
+  if (call_nr == PREV_FSTAT)
+	old_stat = 1;
+
+  /* Is the file descriptor valid? */
+  if ((rfilp = get_filp(m_in.fd, VNODE_READ)) == NULL) return(err_code);
+
+  /* If we read from a pipe, send position too */
+  if (rfilp->filp_vno->v_pipe == I_PIPE) {
+	if (rfilp->filp_mode & R_BIT)
+		if (ex64hi(rfilp->filp_pos) != 0) {
+			panic("do_fstat: bad position in pipe");
+		}
+	pipe_pos = ex64lo(rfilp->filp_pos);
+  }
+
+  r = req_stat(rfilp->filp_vno->v_fs_e, rfilp->filp_vno->v_inode_nr,
+	       who_e, m_in.buffer, pipe_pos, old_stat);
+
+  unlock_filp(rfilp);
+
+  return(r);
+}
+
+/*===========================================================================*
+ *				do_fstatfs				     *
+ *===========================================================================*/
+PUBLIC int do_fstatfs()
+{
+/* Perform the fstatfs(fd, buf) system call. */
+  struct filp *rfilp;
+  int r;
+
+  /* Is the file descriptor valid? */
+  if( (rfilp = get_filp(m_in.fd, VNODE_READ)) == NULL) return(err_code);
+
+  r = req_fstatfs(rfilp->filp_vno->v_fs_e, who_e, m_in.buffer);
+
+  unlock_filp(rfilp);
+
+  return(r);
+}
+
+/*===========================================================================*
+ *				do_statvfs					     *
+ *===========================================================================*/
+PUBLIC int do_statvfs()
+{
+/* Perform the stat(name, buf) system call. */
+  int r;
+  struct vnode *vp;
+  struct vmnt *vmp;
+  char fullpath[PATH_MAX+1];
+  struct lookup resolve;
+
+  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+  resolve.l_vmnt_lock = VMNT_READ;
+  resolve.l_vnode_lock = VNODE_READ;
+
+  if (fetch_name(m_in.STATVFS_NAME, m_in.STATVFS_LEN, M1, fullpath) != OK)
+	return(err_code);
+  if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
+  r = req_statvfs(vp->v_fs_e, who_e, m_in.STATVFS_BUF);
+
+  unlock_vnode(vp);
+  unlock_vmnt(vmp);
+
+  put_vnode(vp);
+  return r;
+}
+
+/*===========================================================================*
+ *				do_fstatvfs				     *
+ *===========================================================================*/
+PUBLIC int do_fstatvfs()
+{
+/* Perform the fstat(fd, buf) system call. */
+  register struct filp *rfilp;
+  int r;
+
+  /* Is the file descriptor valid? */
+  if ((rfilp = get_filp(m_in.FSTATVFS_FD, VNODE_READ)) == NULL)
+	return(err_code);
+
+  r = req_statvfs(rfilp->filp_vno->v_fs_e, who_e, m_in.FSTATVFS_BUF);
+
+  unlock_filp(rfilp);
+
+  return(r);
+}
+
+/*===========================================================================*
+ *                             do_lstat					     *
+ *===========================================================================*/
+PUBLIC int do_lstat()
+{
+/* Perform the lstat(name, buf) system call. */
+  struct vnode *vp;
+  struct vmnt *vmp;
+  int r;
+  char fullpath[PATH_MAX+1];
+  struct lookup resolve;
+  int old_stat = 0;
+
+  lookup_init(&resolve, fullpath, PATH_RET_SYMLINK, &vmp, &vp);
+  resolve.l_vmnt_lock = VMNT_READ;
+  resolve.l_vnode_lock = VNODE_READ;
+
+  if (call_nr == PREV_LSTAT)
+	old_stat = 1;
+  if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
+	return(err_code);
+
+  if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
+  r = req_stat(vp->v_fs_e, vp->v_inode_nr, who_e, m_in.name2, 0, old_stat);
+
+  unlock_vnode(vp);
+  unlock_vmnt(vmp);
+
+  put_vnode(vp);
+  return(r);
+}
diff --git a/servers/avfs/table.c b/servers/avfs/table.c
new file mode 100644
index 000000000..df9b57df3
--- /dev/null
+++ b/servers/avfs/table.c
@@ -0,0 +1,145 @@
+/* This file contains the table used to map system call numbers onto the
+ * routines that perform them.
+ */
+
+#define _TABLE
+
+#include "fs.h"
+#include <minix/callnr.h>
+#include <minix/com.h>
+#include "file.h"
+#include "fproc.h"
+#include "lock.h"
+#include "vnode.h"
+#include "vmnt.h"
+
+PUBLIC _PROTOTYPE (int (*call_vec[]), (void) ) = {
+	no_sys,		/*  0 = unused	*/
+	no_sys,		/*  1 = (exit)	*/
+	no_sys,		/*  2 = (fork)	*/
+	do_read,	/*  3 = read	*/
+	do_write,	/*  4 = write	*/
+	do_open,	/*  5 = open	*/
+	do_close,	/*  6 = close	*/
+	no_sys,		/*  7 = wait	*/
+	do_creat,	/*  8 = creat	*/
+	do_link,	/*  9 = link	*/
+	do_unlink,	/* 10 = unlink	*/
+	no_sys,		/* 11 = waitpid	*/
+	do_chdir,	/* 12 = chdir	*/
+	no_sys,		/* 13 = time	*/
+	do_mknod,	/* 14 = mknod	*/
+	do_chmod,	/* 15 = chmod	*/
+	do_chown,	/* 16 = chown	*/
+	no_sys,		/* 17 = break	*/
+	do_stat,	/* 18 = stat (prev)*/
+	do_lseek,	/* 19 = lseek	*/
+	no_sys,		/* 20 = getpid	*/
+	do_mount,	/* 21 = mount	*/
+	do_umount,	/* 22 = umount	*/
+	no_sys,		/* 23 = (setuid) */
+	no_sys,		/* 24 = getuid	*/
+	no_sys,		/* 25 = (stime)	*/
+	no_sys,		/* 26 = ptrace	*/
+	no_sys,		/* 27 = alarm	*/
+	do_fstat,	/* 28 = fstat (prev)*/
+	no_sys,		/* 29 = pause	*/
+	do_utime,	/* 30 = utime	*/
+	no_sys,		/* 31 = (stty)	*/
+	no_sys,		/* 32 = (gtty)	*/
+	do_access,	/* 33 = access	*/
+	no_sys,		/* 34 = (nice)	*/
+	no_sys,		/* 35 = (ftime)	*/
+	do_sync,	/* 36 = sync	*/
+	no_sys,		/* 37 = kill	*/
+	do_rename,	/* 38 = rename	*/
+	do_mkdir,	/* 39 = mkdir	*/
+	do_unlink,	/* 40 = rmdir	*/
+	do_dup,		/* 41 = dup	*/
+	do_pipe,	/* 42 = pipe	*/
+	no_sys,		/* 43 = times	*/
+	no_sys,		/* 44 = (prof)	*/
+	do_slink,	/* 45 = symlink	*/
+	no_sys,		/* 46 = (setgid)*/
+	no_sys,		/* 47 = getgid	*/
+	no_sys,		/* 48 = (signal)*/
+	do_rdlink,	/* 49 = readlink*/
+	do_lstat,	/* 50 = lstat (prev)*/
+	no_sys,		/* 51 = (acct)	*/
+	no_sys,		/* 52 = (phys)	*/
+	no_sys,		/* 53 = (lock)	*/
+	do_ioctl,	/* 54 = ioctl	*/
+	do_fcntl,	/* 55 = fcntl	*/
+	no_sys,		/* 56 = (mpx)	*/
+	do_fsready,	/* 57 = FS proc ready */
+	no_sys,		/* 58 = unused	*/
+	no_sys,		/* 59 = (execve)*/
+	do_umask,	/* 60 = umask	*/
+	do_chroot,	/* 61 = chroot	*/
+	no_sys,		/* 62 = (setsid)*/
+	no_sys,		/* 63 = (getpgrp)*/
+	no_sys,		/* 64 = (itimer)*/
+	do_stat,	/* 65 = stat	*/
+	do_fstat, 	/* 66 = fstat   */
+	do_lstat,	/* 67 = lstat	*/
+	no_sys,		/* 68 = unused	*/
+	no_sys,		/* 69 = unused  */
+	no_sys,		/* 70 = unused  */
+	no_sys,		/* 71 = (sigaction) */
+	no_sys,		/* 72 = (sigsuspend) */
+	no_sys,		/* 73 = (sigpending) */
+	no_sys,		/* 74 = (sigprocmask) */
+	no_sys,		/* 75 = (sigreturn) */
+	no_sys,		/* 76 = (reboot) */
+	do_svrctl,	/* 77 = svrctl */
+	no_sys,		/* 78 = (sysuname) */
+	do_getsysinfo,  /* 79 = getsysinfo */
+	do_getdents,	/* 80 = getdents */
+	do_llseek,	/* 81 = llseek */
+	do_fstatfs,	/* 82 = fstatfs */
+	do_statvfs,		/* 83 = fstatvfs */
+	do_fstatvfs,		/* 84 = statvfs */
+	do_select,	/* 85 = select */
+	do_fchdir,	/* 86 = fchdir */
+	do_fsync,	/* 87 = fsync */
+	no_sys,		/* 88 = (getpriority) */
+	no_sys,		/* 89 = (setpriority) */
+	no_sys,		/* 90 = (gettimeofday) */
+	no_sys,		/* 91 = (seteuid) */
+	no_sys,		/* 92 = (setegid) */
+	do_truncate,	/* 93 = truncate */
+	do_ftruncate,	/* 94 = truncate */
+	do_chmod,	/* 95 = fchmod */
+	do_chown,	/* 96 = fchown */
+	no_sys,		/* 97 = (getsysinfo_up) */
+	no_sys,		/* 98 = (sprofile) */
+	no_sys,		/* 99 = (cprofile) */
+	/* THE MINIX3 ABI ENDS HERE */
+	no_sys,		/* 100 = (exec_newmem) */
+	no_sys,		/* 101 = (srv_fork) */
+	no_sys,		/* 102 = (exec_restart) */
+	no_sys,		/* 103 = (procstat) */
+	no_sys,		/* 104 = (getprocnr) */
+	no_sys,		/* 105 = unused */
+	no_sys,		/* 106 = unused */
+	no_sys,		/* 107 = (getepinfo) */
+	no_sys,		/* 108 = (adddma) */
+	no_sys,		/* 109 = (deldma) */
+	no_sys,		/* 110 = (getdma) */
+	no_sys,		/* 111 = (srv_kill) */
+	do_gcov_flush,	/* 112 = gcov_flush */
+	no_sys,		/* 113 = (getsid) */
+};
+/* This should not fail with "array size is negative": */
+extern int dummy[sizeof(call_vec) == NCALLS * sizeof(call_vec[0]) ? 1 : -1];
+
+PUBLIC _PROTOTYPE (int (*pfs_call_vec[]), (void) ) = {
+
+	no_sys,		/* 0 */
+	do_check_perms,	/* 1 */
+	do_verify_fd,	/* 2 */
+	do_set_filp,	/* 3 */
+	do_copy_filp,	/* 4 */
+	do_put_filp,	/* 5 */
+	do_cancel_fd	/* 6 */
+};
diff --git a/servers/avfs/threads.h b/servers/avfs/threads.h
new file mode 100644
index 000000000..02f03cdee
--- /dev/null
+++ b/servers/avfs/threads.h
@@ -0,0 +1,35 @@
+#ifndef __VFS_WORKERS_H__
+#define __VFS_WORKERS_H__
+#include <minix/mthread.h>
+#include "job.h"
+
+#define thread_t	mthread_thread_t
+#define mutex_t		mthread_mutex_t
+#define cond_t		mthread_cond_t
+#define attr_t		mthread_attr_t
+
+#define threads_init	mthread_init
+#define yield		mthread_yield
+#define yield_all	mthread_yield_all
+
+#define mutex_init	mthread_mutex_init
+#define mutex_destroy	mthread_mutex_destroy
+#define mutex_lock	mthread_mutex_lock
+#define mutex_trylock	mthread_mutex_trylock
+#define mutex_unlock	mthread_mutex_unlock
+
+#define cond_init	mthread_cond_init
+#define cond_destroy	mthread_cond_destroy
+#define cond_wait	mthread_cond_wait
+#define cond_signal	mthread_cond_signal
+
+struct worker_thread {
+  thread_t w_tid;
+  mutex_t w_event_mutex;
+  cond_t w_event;
+  struct job w_job;
+  struct fproc *w_fp;
+  struct worker_thread *w_next;
+};
+
+#endif
diff --git a/servers/avfs/time.c b/servers/avfs/time.c
new file mode 100644
index 000000000..315fc4b78
--- /dev/null
+++ b/servers/avfs/time.c
@@ -0,0 +1,66 @@
+/* This file takes care of those system calls that deal with time.
+ *
+ * The entry points into this file are
+ *   do_utime:		perform the UTIME system call
+ */
+
+#include "fs.h"
+#include <minix/callnr.h>
+#include <minix/com.h>
+#include "file.h"
+#include "fproc.h"
+#include "path.h"
+#include "param.h"
+#include "vnode.h"
+#include <minix/vfsif.h>
+#include "vmnt.h"
+
+/*===========================================================================*
+ *				do_utime				     *
+ *===========================================================================*/
+PUBLIC int do_utime()
+{
+/* Perform the utime(name, timep) system call. */
+  register int len;
+  int r;
+  time_t actime, modtime;
+  struct vnode *vp;
+  struct vmnt *vmp;
+  char fullpath[PATH_MAX+1];
+  struct lookup resolve;
+
+  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
+  resolve.l_vmnt_lock = VMNT_WRITE;
+  resolve.l_vnode_lock = VNODE_READ;
+
+  /* Adjust for case of 'timep' being NULL;
+   * utime_strlen then holds the actual size: strlen(name)+1 */
+  len = m_in.utime_length;
+  if (len == 0) len = m_in.utime_strlen;
+
+  /* Temporarily open the file */
+  if (fetch_name(m_in.utime_file, len, M1, fullpath) != OK) return(err_code);
+  if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
+
+  /* Only the owner of a file or the super user can change its name. */
+  r = OK;
+  if (vp->v_uid != fp->fp_effuid && fp->fp_effuid != SU_UID) r = EPERM;
+  if (m_in.utime_length == 0 && r != OK) r = forbidden(vp, W_BIT);
+  if (read_only(vp) != OK) r = EROFS; /* Not even su can touch if R/O */
+  if (r == OK) {
+	/* Issue request */
+	if(m_in.utime_length == 0) {
+		actime = modtime = clock_time();
+	} else {
+		actime = m_in.utime_actime;
+		modtime = m_in.utime_modtime;
+	}
+	r = req_utime(vp->v_fs_e, vp->v_inode_nr, actime, modtime);
+  }
+
+  unlock_vnode(vp);
+  unlock_vmnt(vmp);
+
+  put_vnode(vp);
+  return(r);
+}
diff --git a/servers/avfs/tll.c b/servers/avfs/tll.c
new file mode 100644
index 000000000..6ce2f2c80
--- /dev/null
+++ b/servers/avfs/tll.c
@@ -0,0 +1,310 @@
+/* This file contains the implementation of the three-level-lock. */
+
+#include "fs.h"
+#include "glo.h"
+#include "tll.h"
+#include "threads.h"
+#include <assert.h>
+
+FORWARD _PROTOTYPE( int tll_append, (tll_t *tllp, tll_access_t locktype));
+
+PRIVATE int tll_append(tll_t *tllp, tll_access_t locktype)
+{
+  struct worker_thread *queue;
+
+  assert(self != NULL);
+  assert(tllp != NULL);
+  assert(locktype != TLL_NONE);
+
+  /* Read-only and write-only requests go to the write queue. Read-serialized
+   * requests go to the serial queue. Then we wait for an event to signal it's
+   * our turn to go. */
+  queue = NULL;
+  if (locktype == TLL_READ || locktype == TLL_WRITE) {
+	if (tllp->t_write == NULL)
+		tllp->t_write = self;
+	else
+		queue = tllp->t_write;
+  } else {
+	if (tllp->t_serial == NULL)
+		tllp->t_serial = self;
+	else
+		queue = tllp->t_serial;
+  }
+
+  if (queue != NULL) {	/* Traverse to end of queue */
+	while (queue->w_next != NULL) queue = queue->w_next;
+	queue->w_next = self;
+  }
+  self->w_next = NULL; /* End of queue */
+
+  /* Now wait for the event it's our turn */
+  worker_wait();
+
+  tllp->t_current = locktype;
+  tllp->t_status &= ~TLL_PEND;
+  tllp->t_owner = self;
+
+  if (tllp->t_current == TLL_READ) {
+	tllp->t_readonly++;
+	tllp->t_owner = NULL;
+  }
+
+  if (verbose) {
+	printf("got lock on tllp=%p with type %d (self=%p)\n", tllp,
+			locktype, self);
+  }
+
+  /* Due to the way upgrading and downgrading works, read-only requests are
+   * scheduled to run after a downgraded lock is released (because they are
+   * queued on the write-only queue which has priority). This results from the
+   * fact that the downgrade operation cannot know whether the next locktype on
+   * the write-only queue is really write-only or actually read-only. However,
+   * that means that read-serialized requests stay queued, while they could run
+   * simultaneously with read-only requests. See if there are any and grant
+   * the head request access */
+  if (tllp->t_current == TLL_READ && tllp->t_serial != NULL) {
+	tllp->t_owner = tllp->t_serial;
+	tllp->t_serial = tllp->t_serial->w_next;
+	tllp->t_owner->w_next = NULL;
+	assert(!(tllp->t_status & TLL_PEND));
+	tllp->t_status |= TLL_PEND;
+	worker_signal(tllp->t_owner);
+  }
+
+  return(OK);
+}
+
+PUBLIC void tll_downgrade(tll_t *tllp)
+{
+/* Downgrade three-level-lock tll from write-only to read-serialized, or from
+ * read-serialized to read-only. Caveat: as we can't know whether the next
+ * lock type on the write queue is actually read-only or write-only, we can't
+ * grant access to that type. It will be granted access once we unlock. Also,
+ * because we apply write-bias, we can't grant access to read-serialized
+ * either, unless nothing is queued on the write-only stack. */
+
+  assert(self != NULL);
+  assert(tllp != NULL);
+  assert(tllp->t_owner == self);
+
+  switch(tllp->t_current) {
+    case TLL_WRITE: tllp->t_current = TLL_READSER; break;
+    case TLL_READSER:
+	/* If nothing is queued on write-only, but there is a pending lock
+	 * requesting read-serialized, grant it and keep the lock type. */
+	if (tllp->t_write == NULL && tllp->t_serial != NULL) {
+		tllp->t_owner = tllp->t_serial;
+		tllp->t_serial = tllp->t_serial->w_next; /* Remove head */
+		tllp->t_owner->w_next = NULL;
+		assert(!(tllp->t_status & TLL_PEND));
+		tllp->t_status |= TLL_PEND;
+		worker_signal(tllp->t_owner);
+	} else {
+		tllp->t_current = TLL_READ;
+		tllp->t_owner = NULL;
+	}
+	tllp->t_readonly++; /* Either way, there's one more read-only lock */
+	break;
+    default: panic("VFS: Incorrect lock state");
+  }
+}
+
+PUBLIC void tll_init(tll_t *tllp)
+{
+/* Initialize three-level-lock tll */
+  assert(tllp != NULL);
+
+  tllp->t_current = TLL_NONE;
+  tllp->t_readonly = 0;
+  tllp->t_status = TLL_DFLT;
+  tllp->t_write = NULL;
+  tllp->t_serial = NULL;
+  tllp->t_owner = NULL;
+}
+
+PUBLIC int tll_islocked(tll_t *tllp)
+{
+  return(tllp->t_current != TLL_NONE);
+}
+
+PUBLIC int tll_locked_by_me(tll_t *tllp)
+{
+  assert(self != NULL);
+  return(tllp->t_owner == self && !(tllp->t_status & TLL_PEND));
+}
+
+PUBLIC int tll_lock(tll_t *tllp, tll_access_t locktype)
+{
+/* Try to lock three-level-lock tll with type locktype */
+
+  assert(self != NULL);
+  assert(tllp != NULL);
+  assert(locktype != TLL_NONE);
+
+  self->w_next = NULL;
+
+  if (locktype != TLL_READ && locktype != TLL_READSER && locktype != TLL_WRITE)
+	panic("Invalid lock type %d\n", locktype);
+
+  /* If this locking has pending locks, we wait */
+  if (tllp->t_status & TLL_PEND)
+	return tll_append(tllp, locktype);
+
+  /* If we already own this lock don't lock it again and return immediately */
+  if (tllp->t_owner == self) {
+	assert(tllp->t_status == TLL_DFLT);
+	return(EBUSY);
+  }
+
+  /* If this lock is not accessed by anyone, locktype is granted off the bat */
+  if (tllp->t_current == TLL_NONE) {
+	tllp->t_current = locktype;
+	if (tllp->t_current == TLL_READ)
+		tllp->t_readonly = 1;
+	else { /* Record owner if locktype is read-serialized or write-only */
+		tllp->t_owner = self;
+	}
+	return(OK);
+  }
+
+  /* If the current lock is write-only, we have to wait for that lock to be
+   * released (regardless of the value of locktype). */
+  if (tllp->t_current == TLL_WRITE)
+	return tll_append(tllp, locktype);
+
+  /* However, if it's not and we're requesting a write-only lock, we have to
+   * wait until the last read access is released (additional read requests
+   * after this write-only requests are to be queued) */
+  if (locktype == TLL_WRITE)
+	return tll_append(tllp, locktype);
+
+  /* We have to queue read and read-serialized requests if we have a write-only
+   * request queued ("write bias") or when a read-serialized lock is trying to
+   * upgrade to write-only. The current lock for this tll is either read or
+   * read-serialized. */
+  if (tllp->t_write != NULL || (tllp->t_status & TLL_UPGR))
+	return tll_append(tllp, locktype);
+
+  /* If this lock is in read-serialized mode, we can allow read requests and
+   * queue read-serialized requests */
+  if (tllp->t_current == TLL_READSER) {
+	if (locktype == TLL_READ) {
+		tllp->t_readonly++;
+		return(OK);
+	} else
+		return tll_append(tllp, locktype);
+  }
+
+  /* Finally, if the current lock is read-only, we can change it to
+   * read-serialized if necessary without a problem. */
+  tllp->t_current = locktype; /* Either read-only or read-serialized */
+  if (tllp->t_current == TLL_READ) {	/* We now have an additional reader */
+	tllp->t_readonly++;
+	tllp->t_owner = NULL;
+  } else {
+	assert(tllp->t_current != TLL_WRITE);
+	tllp->t_owner = self;		/* We now have a new owner */
+	self->w_next = NULL;
+  }
+
+  return(OK);
+}
+
+PUBLIC int tll_haspendinglock(tll_t *tllp)
+{
+/* Is someone trying to obtain a lock? */
+  assert(tllp != NULL);
+
+  /* Someone is trying to obtain a lock if either the write/read-only queue or
+   * the read-serialized queue is not empty. */
+  return(tllp->t_write != NULL || tllp->t_serial != NULL);
+}
+
+PUBLIC int tll_unlock(tll_t *tllp)
+{
+/* Unlock a previously locked three-level-lock tll */
+  int signal_owner = 0;
+
+  assert(self != NULL);
+  assert(tllp != NULL);
+
+  if (tllp->t_owner == NULL || tllp->t_owner != self) {
+	/* This unlock must have been done by a read-only lock */
+	tllp->t_readonly--;
+	assert(tllp->t_readonly >= 0);
+
+	/* If a read-serialized lock is trying to upgrade and there are no more
+	 * read-only locks, the lock can now be upgraded to write-only */
+	if ((tllp->t_status & TLL_UPGR) && tllp->t_readonly == 0)
+		signal_owner = 1;
+  }
+
+  if(tllp->t_owner == self || (tllp->t_owner == NULL && tllp->t_readonly == 0)){
+	/* Let another read-serialized or write-only request obtain access.
+	 * Write-only has priority, but only after the last read-only access
+	 * has left. Read-serialized access will only be granted if there is
+	 * no pending write-only access request. */
+	struct worker_thread *new_owner;
+	new_owner = NULL;
+	tllp->t_owner = NULL;	/* Remove owner of lock */
+
+	if (tllp->t_write != NULL) {
+		if (tllp->t_readonly == 0) {
+			new_owner = tllp->t_write;
+			tllp->t_write = tllp->t_write->w_next;
+		}
+	} else if (tllp->t_serial != NULL) {
+		new_owner = tllp->t_serial;
+		tllp->t_serial = tllp->t_serial->w_next;
+	}
+
+	/* New owner is head of queue or NULL if no proc is available */
+	if (new_owner != NULL) {
+		tllp->t_owner = new_owner;
+		tllp->t_owner->w_next = NULL;
+		assert(tllp->t_owner != self);
+		signal_owner = 1;
+	}
+  }
+
+  /* If no one is using this lock, mark it as not in use */
+  if (tllp->t_owner == NULL && tllp->t_readonly == 0)
+	tllp->t_current = TLL_NONE;
+
+  if (tllp->t_current == TLL_NONE || tllp->t_current == TLL_READ) {
+	if (!signal_owner) {
+		tllp->t_owner = NULL;
+	}
+  }
+
+  /* If we have a new owner or the current owner managed to upgrade its lock,
+   * tell it to start/continue running */
+  if (signal_owner) {
+	assert(!(tllp->t_status & TLL_PEND));
+	tllp->t_status |= TLL_PEND;
+	worker_signal(tllp->t_owner);
+  }
+
+  return(OK);
+}
+
+PUBLIC void tll_upgrade(tll_t *tllp)
+{
+/* Upgrade three-level-lock tll from read-serialized to write-only */
+
+  assert(self != NULL);
+  assert(tllp != NULL);
+  assert(tllp->t_owner == self);
+  assert(tllp->t_current != TLL_READ); /* i.e., read-serialized or write-only*/
+  if (tllp->t_current == TLL_WRITE) return;	/* Nothing to do */
+  if (tllp->t_readonly != 0) {		/* Wait for readers to leave */
+	assert(!(tllp->t_status & TLL_UPGR));
+	tllp->t_status |= TLL_UPGR;
+	worker_wait();
+	tllp->t_status &= ~TLL_UPGR;
+	tllp->t_status &= ~TLL_PEND;
+	assert(tllp->t_readonly == 0);
+  }
+  tllp->t_current = TLL_WRITE;
+}
diff --git a/servers/avfs/tll.h b/servers/avfs/tll.h
new file mode 100644
index 000000000..bfca394f3
--- /dev/null
+++ b/servers/avfs/tll.h
@@ -0,0 +1,20 @@
+#ifndef __VFS_TLL_H__
+#define __VFS_TLL_H__
+
+/* Three-level-lock. Allows read-only, read-serialized, and write-only locks */
+
+typedef enum { TLL_NONE, TLL_READ, TLL_READSER, TLL_WRITE } tll_access_t;
+typedef enum { TLL_DFLT = 0x0, TLL_UPGR = 0x1, TLL_PEND = 0x2 } tll_status_t;
+
+typedef struct {
+  tll_access_t t_current;	/* Current type of access to lock */
+  struct worker_thread *t_owner;/* Owner of non-read-only lock */
+  signed int t_readonly;	/* No. of current read-only access */
+  tll_status_t t_status;	/* Lock status; nothing, pending upgrade, or
+				 * pending upgrade of read-serialized to
+				 * write-only */
+  struct worker_thread *t_write;/* Write/read-only access requestors queue */
+  struct worker_thread *t_serial;/* Read-serialized access requestors queue */
+} tll_t;
+
+#endif
diff --git a/servers/avfs/utility.c b/servers/avfs/utility.c
new file mode 100644
index 000000000..5ff6fd134
--- /dev/null
+++ b/servers/avfs/utility.c
@@ -0,0 +1,153 @@
+/* This file contains a few general purpose utility routines.
+ *
+ * The entry points into this file are
+ *   clock_time:  ask the clock task for the real time
+ *   copy:	  copy a block of data
+ *   fetch_name:  go get a path name from user space
+ *   no_sys:      reject a system call that FS does not handle
+ *   panic:       something awful has occurred;  MINIX cannot continue
+ *   conv2:	  do byte swapping on a 16-bit int
+ *   conv4:	  do byte swapping on a 32-bit long
+ *   in_group:    determines if group 'grp' is in rfp->fp_sgroups[]
+ */
+
+#include "fs.h"
+#include <minix/com.h>
+#include <minix/endpoint.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "file.h"
+#include "fproc.h"
+#include "param.h"
+#include "vmnt.h"
+
+/*===========================================================================*
+ *				fetch_name				     *
+ *===========================================================================*/
+PUBLIC int fetch_name(path, len, flag, dest)
+char *path;			/* pointer to the path in user space */
+int len;			/* path length, including 0 byte */
+int flag;			/* M3 means path may be in message */
+char *dest;			/* pointer to where path is to be stored */
+{
+/* Go get path and put it in 'user_fullpath'.
+ * If 'flag' = M3 and 'len' <= M3_STRING, the path is present in 'message'.
+ * If it is not, go copy it from user space.
+ */
+  register char *rpu, *rpm;
+  int r, count;
+
+  if (len > PATH_MAX) {
+	err_code = ENAMETOOLONG;
+	return(EGENERIC);
+  }
+
+  /* Check name length for validity. */
+  if (len <= 0) {
+	err_code = EINVAL;
+	return(EGENERIC);
+  }
+
+  if (flag == M3 && len <= M3_STRING) {
+	/* Just copy the path from the message to 'user_fullpath'. */
+	rpu = &dest[0];
+	rpm = m_in.pathname;		/* contained in input message */
+	count = len;
+	do { *rpu++ = *rpm++; } while (--count);
+	r = OK;
+  } else {
+	/* String is not contained in the message.  Get it from user space. */
+	r = sys_datacopy(who_e, (vir_bytes) path,
+		VFS_PROC_NR, (vir_bytes) dest, (phys_bytes) len);
+  }
+
+  if (dest[len - 1] != '\0') {
+	err_code = ENAMETOOLONG;
+	return(EGENERIC);
+  }
+
+  return(r);
+}
+
+
+/*===========================================================================*
+ *				no_sys					     *
+ *===========================================================================*/
+PUBLIC int no_sys()
+{
+/* Somebody has used an illegal system call number */
+  return(ENOSYS);
+}
+
+
+/*===========================================================================*
+ *				isokendpt_f				     *
+ *===========================================================================*/
+PUBLIC int isokendpt_f(char *file, int line, endpoint_t endpoint, int *proc, int fatal)
+{
+  int failed = 0;
+  endpoint_t ke;
+  *proc = _ENDPOINT_P(endpoint);
+  if (endpoint == NONE) {
+	printf("VFS %s:%d: endpoint is NONE\n", file, line);
+	failed = 1;
+  } else if (*proc < 0 || *proc >= NR_PROCS) {
+	printf("VFS %s:%d: proc (%d) from endpoint (%d) out of range\n",
+		file, line, *proc, endpoint);
+	failed = 1;
+  } else if ((ke = fproc[*proc].fp_endpoint) != endpoint) {
+	if(ke == NONE) {
+		printf("VFS %s:%d: endpoint (%d) points to NONE slot (%d)\n",
+			file, line, endpoint, *proc);
+		assert(fproc[*proc].fp_pid == PID_FREE);
+	} else {
+		printf("VFS %s:%d: proc (%d) from endpoint (%d) doesn't match "
+			"known endpoint (%d)\n", file, line, *proc, endpoint,
+			fproc[*proc].fp_endpoint);
+		assert(fproc[*proc].fp_pid != PID_FREE);
+	}
+	failed = 1;
+  }
+
+  if(failed && fatal)
+	panic("isokendpt_f failed");
+
+  return(failed ? EDEADEPT : OK);
+}
+
+
+/*===========================================================================*
+ *				clock_time				     *
+ *===========================================================================*/
+PUBLIC time_t clock_time()
+{
+/* This routine returns the time in seconds since 1.1.1970.  MINIX is an
+ * astrophysically naive system that assumes the earth rotates at a constant
+ * rate and that such things as leap seconds do not exist.
+ */
+
+  register int r;
+  clock_t uptime;
+  time_t boottime;
+
+  r = getuptime2(&uptime, &boottime);
+  if (r != OK)
+	panic("clock_time err: %d", r);
+
+  return( (time_t) (boottime + (uptime/system_hz)));
+}
+
+/*===========================================================================*
+ *                              in_group                                     *
+ *===========================================================================*/
+PUBLIC int in_group(struct fproc *rfp, gid_t grp)
+{
+  int i;
+
+  for (i = 0; i < rfp->fp_ngroups; i++)
+	if (rfp->fp_sgroups[i] == grp)
+		return(OK);
+
+  return(EINVAL);
+}
diff --git a/servers/avfs/vmnt.c b/servers/avfs/vmnt.c
new file mode 100644
index 000000000..cbb517f7f
--- /dev/null
+++ b/servers/avfs/vmnt.c
@@ -0,0 +1,168 @@
+/* Virtual mount table related routines.
+ *
+ */
+
+#include "fs.h"
+#include "threads.h"
+#include "vmnt.h"
+#include <assert.h>
+#include "fproc.h"
+
+FORWARD _PROTOTYPE( int is_vmnt_locked, (struct vmnt *vmp)		);
+
+/* Is vmp pointer reasonable? */
+#define SANEVMP(v) ((((v) >= &vmnt[0] && (v) < &vmnt[NR_MNTS])))
+#define BADVMP(v, f, l) printf("%s:%d: bad vmp %p\n", f, l, v)
+/* vp check that panics */
+#define ASSERTVMP(v) if(!SANEVMP(v)) { \
+	BADVMP(v, __FILE__, __LINE__); panic("bad vmp"); }
+
+#if LOCK_DEBUG
+/*===========================================================================*
+ *				check_vmnt_locks_by_me			     *
+ *===========================================================================*/
+PUBLIC void check_vmnt_locks_by_me(struct fproc *rfp)
+{
+/* Check whether this thread still has locks held on vmnts */
+  struct vmnt *vmp;
+
+  for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; vmp++) {
+	if (tll_locked_by_me(&vmp->m_lock))
+		panic("Thread %d still holds vmnt lock on vmp %p call_nr=%d\n",
+		      mthread_self(), vmp, call_nr);
+  }
+
+  if (rfp->fp_vmnt_rdlocks != 0)
+	panic("Thread %d still holds read locks on a vmnt (%d) call_nr=%d\n",
+	      mthread_self(), rfp->fp_vmnt_rdlocks, call_nr);
+}
+#endif
+
+/*===========================================================================*
+ *				check_vmnt_locks			     *
+ *===========================================================================*/
+PUBLIC void check_vmnt_locks()
+{
+  struct vmnt *vmp;
+  int count = 0;
+
+  for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; vmp++)
+	if (is_vmnt_locked(vmp)) {
+		count++;
+		printf("vmnt %p is %s, fs_e=%d dev=%d\n", vmp, (tll_islocked(&vmp->m_lock) ? "locked":"pending locked"), vmp->m_fs_e, vmp->m_dev);
+	}
+
+  if (count) panic("%d locked vmnts\n", count);
+#if 0
+  printf("check_vmnt_locks OK\n");
+#endif
+}
+
+/*===========================================================================*
+ *                             get_free_vmnt				     *
+ *===========================================================================*/
+PUBLIC struct vmnt *get_free_vmnt(void)
+{
+  struct vmnt *vp;
+
+  for (vp = &vmnt[0]; vp < &vmnt[NR_MNTS]; ++vp)
+      if (vp->m_dev == NO_DEV) return(vp);
+
+  return(NULL);
+}
+
+/*===========================================================================*
+ *                             find_vmnt				     *
+ *===========================================================================*/
+PUBLIC struct vmnt *find_vmnt(endpoint_t fs_e)
+{
+/* Find the vmnt belonging to an FS with endpoint 'fs_e' iff it's in use */
+  struct vmnt *vp;
+
+  for (vp = &vmnt[0]; vp < &vmnt[NR_MNTS]; ++vp)
+	if (vp->m_fs_e == fs_e && vp->m_dev != NO_DEV)
+		return(vp);
+
+  return(NULL);
+}
+
+/*===========================================================================*
+ *                             init_vmnts				     *
+ *===========================================================================*/
+PUBLIC void init_vmnts(void)
+{
+/* Initialize vmnt table */
+  struct vmnt *vp;
+
+  for (vp = &vmnt[0]; vp < &vmnt[NR_MNTS]; vp++) {
+	vp->m_fs_e = NONE;
+	vp->m_dev = NO_DEV;
+	vp->m_flags = 0;
+	vp->m_mounted_on = NULL;
+	vp->m_root_node = NULL;
+	vp->m_label[0] = '\0';
+	vp->m_comm.c_max_reqs = 1;
+	vp->m_comm.c_cur_reqs = 0;
+	vp->m_comm.c_req_queue = NULL;
+	tll_init(&vp->m_lock);
+  }
+}
+
+/*===========================================================================*
+ *                             is_vmnt_locked				     *
+ *===========================================================================*/
+PRIVATE int is_vmnt_locked(struct vmnt *vmp)
+{
+  ASSERTVMP(vmp);
+  return(tll_islocked(&vmp->m_lock) || tll_haspendinglock(&vmp->m_lock));
+}
+
+/*===========================================================================*
+ *                             lock_vmnt				     *
+ *===========================================================================*/
+PUBLIC int lock_vmnt(struct vmnt *vmp, tll_access_t locktype)
+{
+  int r;
+  tll_access_t initial_locktype;
+
+  ASSERTVMP(vmp);
+
+  initial_locktype = (locktype == VMNT_EXCL) ? VMNT_WRITE : locktype;
+
+  r = tll_lock(&vmp->m_lock, initial_locktype);
+
+  if (r == EBUSY) return(r);
+
+  if (initial_locktype != locktype) {
+	tll_upgrade(&vmp->m_lock);
+  }
+
+#if LOCK_DEBUG
+  if (locktype == VMNT_READ)
+	fp->fp_vmnt_rdlocks++;
+#endif
+
+  return(OK);
+}
+
+/*===========================================================================*
+ *                             unlock_vmnt				     *
+ *===========================================================================*/
+PUBLIC void unlock_vmnt(struct vmnt *vmp)
+{
+  ASSERTVMP(vmp);
+
+#if LOCK_DEBUG
+  /* Decrease read-only lock counter when not locked as VMNT_WRITE or
+   * VMNT_EXCL */
+  if (!tll_locked_by_me(&vmp->m_lock))
+	fp->fp_vmnt_rdlocks--;
+#endif
+
+  tll_unlock(&vmp->m_lock);
+
+#if LOCK_DEBUG
+  assert(!tll_locked_by_me(&vmp->m_lock));
+#endif
+
+}
diff --git a/servers/avfs/vmnt.h b/servers/avfs/vmnt.h
new file mode 100644
index 000000000..3143f045e
--- /dev/null
+++ b/servers/avfs/vmnt.h
@@ -0,0 +1,24 @@
+#ifndef __VFS_VMNT_H__
+#define __VFS_VMNT_H__
+
+EXTERN struct vmnt {
+  int m_fs_e;			/* FS process' kernel endpoint */
+  tll_t m_lock;
+  comm_t m_comm;
+  dev_t m_dev;			/* device number */
+  unsigned int m_flags;		/* mount flags */
+  struct vnode *m_mounted_on;	/* vnode on which the partition is mounted */
+  struct vnode *m_root_node;	/* root vnode */
+  char m_label[LABEL_MAX];	/* label of the file system process */
+} vmnt[NR_MNTS];
+
+/* vmnt flags */
+#define VMNT_READONLY		01	/* Device mounted readonly */
+#define VMNT_BACKCALL		02	/* FS did back call */
+
+/* vmnt lock types mapping */
+#define VMNT_READ TLL_READ
+#define VMNT_WRITE TLL_READSER
+#define VMNT_EXCL TLL_WRITE
+
+#endif
diff --git a/servers/avfs/vnode.c b/servers/avfs/vnode.c
new file mode 100644
index 000000000..5cd641780
--- /dev/null
+++ b/servers/avfs/vnode.c
@@ -0,0 +1,387 @@
+/* This file contains the routines related to vnodes.
+ * The entry points are:
+ *
+ *  get_vnode - increase counter and get details of an inode
+ *  get_free_vnode - get a pointer to a free vnode obj
+ *  find_vnode - find a vnode according to the FS endpoint and the inode num.
+ *  dup_vnode - duplicate vnode (i.e. increase counter)
+ *  put_vnode - drop vnode (i.e. decrease counter)
+ */
+
+#include "fs.h"
+#include "threads.h"
+#include "vnode.h"
+#include "vmnt.h"
+#include "fproc.h"
+#include "file.h"
+#include <minix/vfsif.h>
+#include <assert.h>
+
+/* Is vnode pointer reasonable? */
+#if NDEBUG
+#define SANEVP(v)
+#define CHECKVN(v)
+#define ASSERTVP(v)
+#else
+#define SANEVP(v) ((((v) >= &vnode[0] && (v) < &vnode[NR_VNODES])))
+
+#define BADVP(v, f, l) printf("%s:%d: bad vp %p\n", f, l, v)
+
+/* vp check that returns 0 for use in check_vrefs() */
+#define CHECKVN(v) if(!SANEVP(v)) {				\
+	BADVP(v, __FILE__, __LINE__);	\
+	return 0;	\
+}
+
+/* vp check that panics */
+#define ASSERTVP(v) if(!SANEVP(v)) { \
+	BADVP(v, __FILE__, __LINE__); panic("bad vp"); }
+#endif
+
+#if LOCK_DEBUG
+/*===========================================================================*
+ *				check_vnode_locks_by_me			     *
+ *===========================================================================*/
+PUBLIC void check_vnode_locks_by_me(struct fproc *rfp)
+{
+/* Check whether this thread still has locks held on vnodes */
+  struct vnode *vp;
+
+  for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; vp++) {
+	if (tll_locked_by_me(&vp->v_lock)) {
+		panic("Thread %d still holds vnode lock on vp %x call_nr=%d\n",
+		      mthread_self(), vp, call_nr);
+	}
+  }
+
+  if (rfp->fp_vp_rdlocks != 0)
+	panic("Thread %d still holds read locks on a vnode (%d) call_nr=%d\n",
+	      mthread_self(), rfp->fp_vp_rdlocks, call_nr);
+}
+#endif
+
+/*===========================================================================*
+ *				check_vnode_locks			     *
+ *===========================================================================*/
+PUBLIC void check_vnode_locks()
+{
+  struct vnode *vp;
+  int count = 0;
+
+  for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; vp++)
+	if (is_vnode_locked(vp)) {
+		count++;
+	}
+
+  if (count) panic("%d locked vnodes\n", count);
+#if 0
+  printf("check_vnode_locks OK\n");
+#endif
+}
+
+/*===========================================================================*
+ *				get_free_vnode				     *
+ *===========================================================================*/
+PUBLIC struct vnode *get_free_vnode()
+{
+/* Find a free vnode slot in the vnode table (it's not actually allocated) */
+  struct vnode *vp;
+
+  for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp) {
+	if (vp->v_ref_count == 0 && !is_vnode_locked(vp)) {
+		vp->v_pipe = NO_PIPE;
+		vp->v_uid  = -1;
+		vp->v_gid  = -1;
+		vp->v_sdev = NO_DEV;
+		vp->v_mapfs_e = NONE;
+		vp->v_mapfs_count = 0;
+		vp->v_mapinode_nr = 0;
+		return(vp);
+	}
+  }
+
+  err_code = ENFILE;
+  return(NULL);
+}
+
+
+/*===========================================================================*
+ *				find_vnode				     *
+ *===========================================================================*/
+PUBLIC struct vnode *find_vnode(int fs_e, int ino)
+{
+/* Find a specified (FS endpoint and inode number) vnode in the
+ * vnode table */
+  struct vnode *vp;
+
+  for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp)
+	if (vp->v_ref_count > 0 && vp->v_inode_nr == ino && vp->v_fs_e == fs_e)
+		return(vp);
+
+  return(NULL);
+}
+
+/*===========================================================================*
+ *				is_vnode_locked				     *
+ *===========================================================================*/
+PUBLIC int is_vnode_locked(struct vnode *vp)
+{
+/* Find out whether a thread holds a lock on this vnode or is trying to obtain
+ * a lock. */
+  ASSERTVP(vp);
+
+  return(tll_islocked(&vp->v_lock) || tll_haspendinglock(&vp->v_lock));
+}
+
+/*===========================================================================*
+ *				init_vnodes				     *
+ *===========================================================================*/
+PUBLIC void init_vnodes(void)
+{
+  struct vnode *vp;
+
+  for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp) {
+	vp->v_fs_e = NONE;
+	vp->v_mapfs_e = NONE;
+	vp->v_inode_nr = 0;
+	vp->v_ref_count = 0;
+	vp->v_fs_count = 0;
+	vp->v_mapfs_count = 0;
+	tll_init(&vp->v_lock);
+  }
+}
+
+/*===========================================================================*
+ *				lock_vnode				     *
+ *===========================================================================*/
+PUBLIC int lock_vnode(struct vnode *vp, tll_access_t locktype)
+{
+  int r;
+
+  ASSERTVP(vp);
+
+  r = tll_lock(&vp->v_lock, locktype);
+
+#if LOCK_DEBUG
+  if (locktype == VNODE_READ) {
+	fp->fp_vp_rdlocks++;
+  }
+#endif
+
+  if (r == EBUSY) return(r);
+  return(OK);
+}
+
+/*===========================================================================*
+ *				unlock_vnode				     *
+ *===========================================================================*/
+PUBLIC void unlock_vnode(struct vnode *vp)
+{
+  int i;
+  register struct vnode *rvp;
+  struct worker_thread *w;
+  ASSERTVP(vp);
+
+#if LOCK_DEBUG
+  /* Decrease read-only lock counter when not locked as VNODE_OPCL or
+   * VNODE_WRITE */
+  if (!tll_locked_by_me(&vp->v_lock)) {
+	fp->fp_vp_rdlocks--;
+  }
+
+  for (i = 0; i < NR_VNODES; i++) {
+	rvp = &vnode[i];
+
+	w = rvp->v_lock.t_write;
+	assert(w != self);
+	while (w && w->w_next != NULL) {
+		w = w->w_next;
+		assert(w != self);
+	}
+
+	w = rvp->v_lock.t_serial;
+	assert(w != self);
+	while (w && w->w_next != NULL) {
+		w = w->w_next;
+		assert(w != self);
+	}
+  }
+#endif
+
+  tll_unlock(&vp->v_lock);
+}
+
+/*===========================================================================*
+ *				dup_vnode				     *
+ *===========================================================================*/
+PUBLIC void dup_vnode(struct vnode *vp)
+{
+/* dup_vnode() is called to increment the vnode and therefore the
+ * referred inode's counter.
+ */
+  ASSERTVP(vp);
+  vp->v_ref_count++;
+}
+
+
+/*===========================================================================*
+ *				put_vnode				     *
+ *===========================================================================*/
+PUBLIC void put_vnode(struct vnode *vp)
+{
+/* Decrease vnode's usage counter and decrease inode's usage counter in the
+ * corresponding FS process. Decreasing the fs_count each time we decrease the
+ * ref count would lead to poor performance. Instead, only decrease fs_count
+ * when the ref count hits zero. However, this could lead to fs_count to wrap.
+ * To prevent this, we drop the counter to 1 when the counter hits 256.
+ * We maintain fs_count as a sanity check to make sure VFS and the FS are in
+ * sync.
+ */
+  int r, lock_vp;
+
+  ASSERTVP(vp);
+
+  /* Lock vnode. It's quite possible this thread already has a lock on this
+   * vnode. That's no problem, because the reference counter will not decrease
+   * to zero in that case. However, if the counter does decrease to zero *and*
+   * is already locked, we have a consistency problem somewhere. */
+  lock_vp = lock_vnode(vp, VNODE_OPCL);
+
+  if (vp->v_ref_count > 1) {
+	/* Decrease counter */
+	vp->v_ref_count--;
+	if (vp->v_fs_count > 256)
+		vnode_clean_refs(vp);
+	if (lock_vp != EBUSY) unlock_vnode(vp);
+	return;
+  }
+
+  /* If we already had a lock, there is a consistency problem */
+  assert(lock_vp != EBUSY);
+  tll_upgrade(&vp->v_lock);	/* Make sure nobody else accesses this vnode */
+
+  /* A vnode that's not in use can't be put back. */
+  if (vp->v_ref_count <= 0)
+	panic("put_vnode failed: bad v_ref_count %d\n", vp->v_ref_count);
+
+  /* fs_count should indicate that the file is in use. */
+  if (vp->v_fs_count <= 0)
+	panic("put_vnode failed: bad v_fs_count %d\n", vp->v_fs_count);
+
+  /* Tell FS we don't need this inode to be open anymore. */
+  r = req_putnode(vp->v_fs_e, vp->v_inode_nr, vp->v_fs_count);
+
+  if (r != OK) {
+	printf("VFS: putnode failed: %d\n", r);
+	util_stacktrace();
+  }
+
+  /* This inode could've been mapped. If so, tell mapped FS to close it as
+   * well. If mapped onto same FS, this putnode is not needed. */
+  if (vp->v_mapfs_e != NONE && vp->v_mapfs_e != vp->v_fs_e)
+	req_putnode(vp->v_mapfs_e, vp->v_mapinode_nr, vp->v_mapfs_count);
+
+  vp->v_fs_count = 0;
+  vp->v_ref_count = 0;
+  vp->v_mapfs_count = 0;
+
+  unlock_vnode(vp);
+}
+
+
+/*===========================================================================*
+ *				vnode_clean_refs			     *
+ *===========================================================================*/
+PUBLIC void vnode_clean_refs(struct vnode *vp)
+{
+/* Tell the underlying FS to drop all reference but one. */
+
+  if (vp == NULL) return;
+  if (vp->v_fs_count <= 1) return;	/* Nothing to do */
+
+  /* Drop all references except one */
+  req_putnode(vp->v_fs_e, vp->v_inode_nr, vp->v_fs_count - 1);
+  vp->v_fs_count = 1;
+}
+
+
+#define REFVP(v) { vp = (v); CHECKVN(v); vp->v_ref_check++; }
+
+#if DO_SANITYCHECKS
+/*===========================================================================*
+ *				check_vrefs				     *
+ *===========================================================================*/
+PUBLIC int check_vrefs()
+{
+	int i, bad;
+	int ispipe_flag, ispipe_mode;
+	struct vnode *vp;
+	struct vmnt *vmp;
+	struct fproc *rfp;
+	struct filp *f;
+
+	/* Clear v_ref_check */
+	for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp)
+		vp->v_ref_check= 0;
+
+	/* Count reference for processes */
+	for (rfp=&fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+		if (rfp->fp_pid == PID_FREE)
+			continue;
+		if(rfp->fp_rd) REFVP(rfp->fp_rd);
+                if(rfp->fp_wd) REFVP(rfp->fp_wd);
+	}
+
+	/* Count references from filedescriptors */
+	for (f = &filp[0]; f < &filp[NR_FILPS]; f++)
+	{
+		if (f->filp_count == 0)
+			continue;
+		REFVP(f->filp_vno);
+	}
+
+	/* Count references to mount points */
+	for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp)
+	{
+		if (vmp->m_dev == NO_DEV)
+			continue;
+		REFVP(vmp->m_root_node);
+		if(vmp->m_mounted_on)
+			REFVP(vmp->m_mounted_on);
+	}
+
+	/* Check references */
+	bad= 0;
+	for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp)
+	{
+		if (vp->v_ref_count != vp->v_ref_check)
+		{
+			printf(
+"Bad reference count for inode %d on device 0x%x: found %d, listed %d\n",
+				vp->v_inode_nr, vp->v_dev, vp->v_ref_check,
+				vp->v_ref_count);
+			printf("last marked at %s, %d\n",
+				vp->v_file, vp->v_line);
+			bad= 1;
+		}
+
+		/* Also check v_pipe */
+		if (vp->v_ref_count != 0)
+		{
+			ispipe_flag= (vp->v_pipe == I_PIPE);
+			ispipe_mode= ((vp->v_mode & I_TYPE) == I_NAMED_PIPE);
+			if (ispipe_flag != ispipe_mode)
+			{
+				printf(
+"Bad v_pipe for inode %d on device 0x%x: found %d, mode 0%o\n",
+				vp->v_inode_nr, vp->v_dev, vp->v_pipe,
+				vp->v_mode);
+				printf("last marked at %s, %d\n",
+					vp->v_file, vp->v_line);
+				bad= 1;
+			}
+		}
+	}
+	return !bad;
+}
+#endif
diff --git a/servers/avfs/vnode.h b/servers/avfs/vnode.h
new file mode 100644
index 000000000..babb6aab4
--- /dev/null
+++ b/servers/avfs/vnode.h
@@ -0,0 +1,40 @@
+#ifndef __VFS_VNODE_H__
+#define __VFS_VNODE_H__
+
+EXTERN struct vnode {
+  endpoint_t v_fs_e;            /* FS process' endpoint number */
+  endpoint_t v_mapfs_e;		/* mapped FS process' endpoint number */
+  ino_t v_inode_nr;		/* inode number on its (minor) device */
+  ino_t v_mapinode_nr;		/* mapped inode number of mapped FS. */
+  mode_t v_mode;		/* file type, protection, etc. */
+  uid_t v_uid;			/* uid of inode. */
+  gid_t v_gid;			/* gid of inode. */
+  off_t v_size;			/* current file size in bytes */
+  int v_ref_count;		/* # times vnode used; 0 means slot is free */
+  int v_fs_count;		/* # reference at the underlying FS */
+  int v_mapfs_count;		/* # reference at the underlying mapped FS */
+#if 0
+  int v_ref_check;		/* for consistency checks */
+#endif
+  char v_pipe;			/* set to I_PIPE if pipe */
+  off_t v_pipe_rd_pos;
+  off_t v_pipe_wr_pos;
+  endpoint_t v_bfs_e;		/* endpoint number for the FS proces in case
+				   of a block special file */
+  dev_t v_dev;                  /* device number on which the corresponding
+                                   inode resides */
+  dev_t v_sdev;                 /* device number for special files */
+  struct vmnt *v_vmnt;          /* vmnt object of the partition */
+  tll_t v_lock;			/* three-level-lock */
+} vnode[NR_VNODES];
+
+
+/* Field values. */
+#define NO_PIPE            0	/* i_pipe is NO_PIPE if inode is not a pipe */
+#define I_PIPE             1	/* i_pipe is I_PIPE if inode is a pipe */
+
+/* vnode lock types mapping */
+#define VNODE_READ TLL_READ
+#define VNODE_OPCL TLL_READSER
+#define VNODE_WRITE TLL_WRITE
+#endif
diff --git a/servers/avfs/worker.c b/servers/avfs/worker.c
new file mode 100644
index 000000000..f8449109e
--- /dev/null
+++ b/servers/avfs/worker.c
@@ -0,0 +1,336 @@
+#include "fs.h"
+#include "glo.h"
+#include "fproc.h"
+#include "threads.h"
+#include "job.h"
+#include <assert.h>
+
+FORWARD _PROTOTYPE( void append_job, (struct job *job,
+					void *(*func)(void *arg))	);
+FORWARD _PROTOTYPE( void get_work, (struct worker_thread *worker)	);
+FORWARD _PROTOTYPE( void *worker_main, (void *arg)			);
+FORWARD _PROTOTYPE( void worker_sleep, (struct worker_thread *worker)	);
+FORWARD _PROTOTYPE( void worker_wake, (struct worker_thread *worker)	);
+PRIVATE int init = 0;
+PRIVATE mthread_attr_t tattr;
+
+#ifdef MKCOVERAGE
+# define TH_STACKSIZE (10 * 1024)
+#else
+# define TH_STACKSIZE (6 * 1024)
+#endif
+
+#define ASSERTW(w) assert((w) == &sys_worker || (w) == &dl_worker || \
+		   ((w) >= &workers[0] && (w) < &workers[NR_WTHREADS]));
+
+/*===========================================================================*
+ *				worker_init				     *
+ *===========================================================================*/
+PUBLIC void worker_init(struct worker_thread *worker)
+{
+/* Initialize worker thread */
+  if (!init) {
+	threads_init();
+	assert(mthread_attr_init(&tattr) == 0);
+	if (mthread_attr_setstacksize(&tattr, TH_STACKSIZE) != 0)
+		panic("couldn't set default thread stack size");
+	if (mthread_attr_setdetachstate(&tattr, MTHREAD_CREATE_DETACHED) != 0)
+		panic("couldn't set default thread detach state");
+	pending = 0;
+	init = 1;
+  }
+
+  ASSERTW(worker);
+
+  worker->w_job.j_func = NULL;		/* Mark not in use */
+  worker->w_next = NULL;
+  assert(mutex_init(&worker->w_event_mutex, NULL) == 0);
+  assert(cond_init(&worker->w_event, NULL) == 0);
+  assert(mthread_create(&worker->w_tid, &tattr, worker_main, (void *) worker) == 0);
+  yield();
+}
+
+/*===========================================================================*
+ *				get_work				     *
+ *===========================================================================*/
+PRIVATE void get_work(struct worker_thread *worker)
+{
+/* Find new work to do. Work can be 'queued', 'pending', or absent. In the
+ * latter case wait for new work to come in. */
+
+  struct job *new_job;
+  struct fproc *rfp;
+
+  ASSERTW(worker);
+  self = worker;
+
+  /* Do we have queued work to do? */
+  if ((new_job = worker->w_job.j_next) != NULL) {
+	worker->w_job = *new_job;
+	free(new_job);
+	return;
+  } else if (worker != &sys_worker && worker != &dl_worker && pending > 0) {
+	/* Find pending work */
+	for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
+		if (rfp->fp_flags & FP_PENDING) {
+			worker->w_job = rfp->fp_job;
+			rfp->fp_job.j_func = NULL;
+			rfp->fp_flags &= ~FP_PENDING; /* No longer pending */
+			pending--;
+			assert(pending >= 0);
+			return;
+		}
+	}
+	panic("Pending work inconsistency");
+  }
+
+  /* Wait for work to come to us */
+  worker_sleep(worker);
+}
+
+/*===========================================================================*
+ *				worker_available				     *
+ *===========================================================================*/
+PUBLIC int worker_available(void)
+{
+  int busy, i;
+
+  busy = 0;
+  for (i = 0; i < NR_WTHREADS; i++) {
+	if (workers[i].w_job.j_func != NULL)
+		busy++;
+  }
+
+  return(NR_WTHREADS - busy);
+}
+
+/*===========================================================================*
+ *				worker_main				     *
+ *===========================================================================*/
+PRIVATE void *worker_main(void *arg)
+{
+/* Worker thread main loop */
+  struct worker_thread *me;
+
+  me = (struct worker_thread *) arg;
+  ASSERTW(me);
+
+  while(TRUE) {
+	get_work(me);
+
+	/* Register ourselves in fproc table if possible */
+	if (me->w_job.j_fp != NULL) {
+		me->w_job.j_fp->fp_wtid = me->w_tid;
+	}
+
+	/* Carry out work */
+	me->w_job.j_func(&me->w_job);
+
+	/* Mark ourselves as done */
+	me->w_job.j_func = NULL;
+  }
+
+  return(NULL);	/* Unreachable */
+}
+
+/*===========================================================================*
+ *				dl_worker_start				     *
+ *===========================================================================*/
+PUBLIC void dl_worker_start(void *(*func)(void *arg))
+{
+/* Start the deadlock resolving worker. This worker is reserved to run in case
+ * all other workers are busy and we have to have an additional worker to come
+ * to the rescue. */
+  assert(dl_worker.w_job.j_func == NULL);
+
+  if (dl_worker.w_job.j_func == NULL) {
+	dl_worker.w_job.j_fp = fp;
+	dl_worker.w_job.j_m_in = m_in;
+	dl_worker.w_job.j_func = func;
+	worker_wake(&dl_worker);
+  }
+}
+
+/*===========================================================================*
+ *				sys_worker_start			     *
+ *===========================================================================*/
+PUBLIC void sys_worker_start(void *(*func)(void *arg))
+{
+/* Carry out work for the system (i.e., kernel or PM). If this thread is idle
+ * do it right away, else create new job and append it to the queue. */
+
+  if (sys_worker.w_job.j_func == NULL) {
+	sys_worker.w_job.j_fp = fp;
+	sys_worker.w_job.j_m_in = m_in;
+	sys_worker.w_job.j_func = func;
+	worker_wake(&sys_worker);
+  } else {
+	append_job(&sys_worker.w_job, func);
+  }
+}
+
+/*===========================================================================*
+ *				append_job				     *
+ *===========================================================================*/
+PRIVATE void append_job(struct job *job, void *(*func)(void *arg))
+{
+/* Append a job */
+
+  struct job *new_job, *tail;
+
+  /* Create new job */
+  new_job = calloc(1, sizeof(struct job));
+  assert(new_job != NULL);
+  new_job->j_fp = fp;
+  new_job->j_m_in = m_in;
+  new_job->j_func = func;
+  new_job->j_next = NULL;
+
+  /* Append to queue */
+  tail = job;
+  while (tail->j_next != NULL) tail = tail->j_next;
+  tail->j_next = new_job;
+}
+
+/*===========================================================================*
+ *				worker_start				     *
+ *===========================================================================*/
+PUBLIC void worker_start(void *(*func)(void *arg))
+{
+/* Find an available worker or wait for one */
+  int i;
+  struct worker_thread *worker;
+
+  worker = NULL;
+  for (i = 0; i < NR_WTHREADS; i++) {
+	if (workers[i].w_job.j_func == NULL) {
+		worker = &workers[i];
+		break;
+	}
+  }
+
+  if (worker != NULL) {
+	worker->w_job.j_fp = fp;
+	worker->w_job.j_m_in = m_in;
+	worker->w_job.j_func = func;
+	worker->w_job.j_next = NULL;
+	worker_wake(worker);
+	return;
+  }
+
+  /* No worker threads available, let's wait for one to finish. */
+  /* If this process already has a job scheduled, forget about this new
+   * job;
+   *  - the new job is do_dummy and we have already scheduled an actual job
+   *  - the new job is an actual job and we have already scheduled do_dummy in
+   *    order to exit this proc, so doing the new job is pointless. */
+  if (fp->fp_job.j_func == NULL) {
+	assert(!(fp->fp_flags & FP_PENDING));
+	fp->fp_job.j_fp = fp;
+	fp->fp_job.j_m_in = m_in;
+	fp->fp_job.j_func = func;
+	fp->fp_job.j_next = NULL;
+	fp->fp_flags |= FP_PENDING;
+	pending++;
+  }
+}
+
+/*===========================================================================*
+ *				worker_sleep				     *
+ *===========================================================================*/
+PRIVATE void worker_sleep(struct worker_thread *worker)
+{
+  ASSERTW(worker);
+  assert(self == worker);
+  assert(mutex_lock(&worker->w_event_mutex) == 0);
+  assert(cond_wait(&worker->w_event, &worker->w_event_mutex) == 0);
+  assert(mutex_unlock(&worker->w_event_mutex) == 0);
+  self = worker;
+}
+
+/*===========================================================================*
+ *				worker_wake				     *
+ *===========================================================================*/
+PRIVATE void worker_wake(struct worker_thread *worker)
+{
+/* Signal a worker to wake up */
+  ASSERTW(worker);
+  assert(mutex_lock(&worker->w_event_mutex) == 0);
+  assert(cond_signal(&worker->w_event) == 0);
+  assert(mutex_unlock(&worker->w_event_mutex) == 0);
+}
+
+/*===========================================================================*
+ *				worker_wait				     *
+ *===========================================================================*/
+PUBLIC void worker_wait(void)
+{
+  struct worker_thread *worker;
+
+  worker = worker_self();
+  worker->w_job.j_m_in = m_in;	/* Store important global data */
+  assert(fp == worker->w_job.j_fp);
+  worker_sleep(worker);
+  /* We continue here after waking up */
+  fp = worker->w_job.j_fp;	/* Restore global data */
+  m_in = worker->w_job.j_m_in;
+  assert(worker->w_next == NULL);
+}
+
+/*===========================================================================*
+ *				worker_signal				     *
+ *===========================================================================*/
+PUBLIC void worker_signal(struct worker_thread *worker)
+{
+  ASSERTW(worker);		/* Make sure we have a valid thread */
+  worker_wake(worker);
+}
+
+/*===========================================================================*
+ *				worker_self				     *
+ *===========================================================================*/
+PUBLIC struct worker_thread *worker_self(void)
+{
+  struct worker_thread *worker;
+  worker = worker_get(mthread_self());
+  assert(worker != NULL);
+  return(worker);
+}
+
+/*===========================================================================*
+ *				worker_get				     *
+ *===========================================================================*/
+PUBLIC struct worker_thread *worker_get(thread_t worker_tid)
+{
+  int i;
+  struct worker_thread *worker;
+
+  worker = NULL;
+  if (worker_tid == sys_worker.w_tid)
+	worker = &sys_worker;
+  else if (worker_tid == dl_worker.w_tid)
+	worker = &dl_worker;
+  else {
+	for (i = 0; i < NR_WTHREADS; i++) {
+		if (workers[i].w_tid == worker_tid) {
+			worker = &workers[i];
+			break;
+		}
+	}
+  }
+
+  return(worker);
+}
+
+/*===========================================================================*
+ *				worker_getjob				     *
+ *===========================================================================*/
+PUBLIC struct job *worker_getjob(thread_t worker_tid)
+{
+  struct worker_thread *worker;
+
+  if ((worker = worker_get(worker_tid)) != NULL)
+	return(&worker->w_job);
+
+  return(NULL);
+}
diff --git a/servers/avfs/write.c b/servers/avfs/write.c
new file mode 100644
index 000000000..b602eff1e
--- /dev/null
+++ b/servers/avfs/write.c
@@ -0,0 +1,19 @@
+/* This file is the counterpart of "read.c".  It contains the code for writing
+ * insofar as this is not contained in read_write().
+ *
+ * The entry points into this file are
+ *   do_write:     call read_write to perform the WRITE system call
+ */
+
+#include "fs.h"
+#include "file.h"
+
+
+/*===========================================================================*
+ *				do_write				     *
+ *===========================================================================*/
+PUBLIC int do_write()
+{
+/* Perform the write(fd, buffer, nbytes) system call. */
+  return(read_write(WRITING));
+}
diff --git a/servers/is/Makefile b/servers/is/Makefile
index 811068304..225689c9d 100644
--- a/servers/is/Makefile
+++ b/servers/is/Makefile
@@ -1,4 +1,8 @@
 # Makefile for Information Server (IS)
+#
+
+.include <bsd.own.mk>
+
 PROG=	is
 SRCS=	main.c dmp.c dmp_kernel.c dmp_pm.c dmp_fs.c dmp_rs.c dmp_ds.c dmp_vm.c
 
@@ -13,4 +17,8 @@ CPPFLAGS.dmp_kernel.c+=	-I${MINIXSRCDIR}
 CPPFLAGS.dmp_rs.c+=	-I${MINIXSRCDIR}
 CPPFLAGS.dmp_vm.c+=	-I${MINIXSRCDIR}
 
+.if ${BUILDAVFS} == "yes"
+CFLAGS+= -D_USEAVFS
+.endif
+
 .include <minix.service.mk>
diff --git a/servers/is/dmp_fs.c b/servers/is/dmp_fs.c
index aba188dd9..ae3691070 100644
--- a/servers/is/dmp_fs.c
+++ b/servers/is/dmp_fs.c
@@ -10,9 +10,15 @@
 
 #include "inc.h"
 #include "../mfs/const.h"
-#include "../vfs/const.h"
-#include "../vfs/fproc.h"
-#include "../vfs/dmap.h"
+#if defined(_USEAVFS)
+# include "../avfs/const.h"
+# include "../avfs/fproc.h"
+# include "../avfs/dmap.h"
+#else
+# include "../vfs/const.h"
+# include "../vfs/fproc.h"
+# include "../vfs/dmap.h"
+#endif
 #include <minix/dmap.h>
 
 PUBLIC struct fproc fproc[NR_PROCS];
@@ -35,6 +41,7 @@ PUBLIC void fproc_dmp()
   	fp = &fproc[i];
   	if (fp->fp_pid <= 0) continue;
   	if (++n > 22) break;
+#if defined(_USEVFS)
   	printf("%3d  %4d  %2d/%d  0x%05x %2d (%2d) %2d (%2d) %3d   %3d %3d ",
   		i, fp->fp_pid, 
   		((fp->fp_tty>>MAJOR)&BYTE), ((fp->fp_tty>>MINOR)&BYTE), 
@@ -43,6 +50,16 @@ PUBLIC void fproc_dmp()
   		fp->fp_sesldr,
   		fp->fp_blocked_on, !!fp->fp_revived
   	);
+#else
+	printf("%3d  %4d  %2d/%d  0x%05x %2d (%2d) %2d (%2d) %3d   %3d %3d ",
+		i, fp->fp_pid,
+		major(fp->fp_tty), minor(fp->fp_tty),
+		fp->fp_umask,
+		fp->fp_realuid, fp->fp_effuid, fp->fp_realgid, fp->fp_effgid,
+		!!(fp->fp_flags & FP_SESLDR),
+		fp->fp_blocked_on, !!(fp->fp_flags & FP_REVIVED)
+	);
+#endif
 	if (fp->fp_blocked_on == FP_BLOCKED_ON_OTHER)
 		printf("%4d\n", fp->fp_task);
 	else
diff --git a/servers/procfs/Makefile b/servers/procfs/Makefile
index 04356f9d1..5836e8dab 100644
--- a/servers/procfs/Makefile
+++ b/servers/procfs/Makefile
@@ -1,9 +1,17 @@
 # Makefile for ProcFS server
+#
+
+.include <bsd.own.mk>
+
 PROG=	procfs
 SRCS=	buf.c main.c pid.c root.c tree.c util.c cpuinfo.c
 
 CPPFLAGS+= -I${MINIXSRCDIR} -I${MINIXSRCDIR}/servers
 
+.if ${BUILDAVFS} == "yes"
+CFLAGS+= -D_USEAVFS
+.endif
+
 DPADD+=	${LIBVTREEFS} ${LIBSYS}
 LDADD+=	-lvtreefs -lsys
 
diff --git a/servers/procfs/inc.h b/servers/procfs/inc.h
index 1a51668ea..4240e076a 100644
--- a/servers/procfs/inc.h
+++ b/servers/procfs/inc.h
@@ -50,8 +50,13 @@
 #include "kernel/type.h"
 #include "kernel/proc.h"
 #include "pm/mproc.h"
-#include "vfs/const.h"
-#include "vfs/fproc.h"
+#if defined(_USEAVFS)
+# include "avfs/const.h"
+# include "avfs/fproc.h"
+#else
+# include "vfs/const.h"
+# include "vfs/fproc.h"
+#endif
 
 #include <minix/vtreefs.h>
 #include <minix/procfs.h>
diff --git a/share/mk/bsd.own.mk b/share/mk/bsd.own.mk
index 770dfcf34..c4192bfd3 100644
--- a/share/mk/bsd.own.mk
+++ b/share/mk/bsd.own.mk
@@ -12,6 +12,8 @@ SMP_FLAGS += -DCONFIG_MAX_CPUS=${CONFIG_MAX_CPUS}
 
 CPPFLAGS+= ${SMP_FLAGS}
 
+BUILDAVFS?= "no"
+
 MAKECONF?=	/etc/make.conf
 .-include "${MAKECONF}"
 
diff --git a/tools/Makefile b/tools/Makefile
index ce5c27621..a359ff748 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -9,19 +9,26 @@ MDEC=	/usr/mdec
 GEN_FILES=	*.bak image kernel *.iso *.iso.gz cdfdimage rootimage src
 
 # Specify the programs that are part of the system image.
+.if ${BUILDAVFS} == "yes"
+VFS= "../servers/avfs/vfs"
+PFS= "../servers/apfs/pfs"
+.else
+VFS= "../servers/vfs/vfs"
+PFS= "../servers/pfs/pfs"
+.endif
 KERNEL= kernel
 PROGRAMS= \
 	../servers/ds/ds \
 	../servers/rs/rs \
 	../servers/pm/pm \
 	../servers/sched/sched \
-	../servers/vfs/vfs \
+	${VFS} \
 	../drivers/memory/memory \
 	../drivers/log/log \
 	../drivers/tty/tty \
 	../servers/mfs/mfs \
 	../servers/vm/vm \
-	../servers/pfs/pfs \
+	${PFS} \
 	../servers/init/init
 
 usage: