From: David van Moolenbroek <david@minix3.org>
Date: Mon, 4 Jan 2016 18:42:13 +0000 (+0000)
Subject: VFS: store process suspension state as union
X-Git-Url: http://zhaoyanbai.com/repos/%22http:/www.isc.org/icons/zpipe.c?a=commitdiff_plain;h=refs%2Fchanges%2F45%2F3345%2F1;p=minix.git

VFS: store process suspension state as union

Previously, VFS would use various subsets of a number of fproc
structure fields to store state when the process is blocked
(suspended) for various reasons.  As a result, there was a fair
amount of abuse of fields, hidden state, and confusion as to
which fields were used with which suspension states.

Instead, the suspension state is now split into per-state
structures, which are then stored in a union.  Each of the union's
structures should be accessed only right before, during, and right
after the fp_blocked_on field is set to the corresponding blocking
type.  As a result, it is now very clear which fields are in use
at which times, and we even save a bit of memory as a side effect.

Change-Id: I5c24e353b6cb0c32eb41c70f89c5cfb23f6c93df
---

diff --git a/minix/include/minix/ipc.h b/minix/include/minix/ipc.h
index 721c3cc86..5e1f4b985 100644
--- a/minix/include/minix/ipc.h
+++ b/minix/include/minix/ipc.h
@@ -795,8 +795,9 @@ typedef struct {
 	int fd;
 	vir_bytes buf;
 	size_t len;
+	size_t cum_io;		/* reserved/internal, set to 0 */
 
-	uint8_t padding[44];
+	uint8_t padding[40];
 } mess_lc_vfs_readwrite;
 _ASSERT_MSG_SIZE(mess_lc_vfs_readwrite);
 
diff --git a/minix/lib/libc/sys/getdents.c b/minix/lib/libc/sys/getdents.c
index bda030593..175c615fb 100644
--- a/minix/lib/libc/sys/getdents.c
+++ b/minix/lib/libc/sys/getdents.c
@@ -13,6 +13,7 @@ ssize_t getdents(int fd, char *buffer, size_t nbytes)
   m.m_lc_vfs_readwrite.fd = fd;
   m.m_lc_vfs_readwrite.len = nbytes;
   m.m_lc_vfs_readwrite.buf = (vir_bytes)buffer;
+  m.m_lc_vfs_readwrite.cum_io = 0;
   return _syscall(VFS_PROC_NR, VFS_GETDENTS, &m);
 }
 
diff --git a/minix/lib/libc/sys/read.c b/minix/lib/libc/sys/read.c
index b8eeb72be..e23f4eb36 100644
--- a/minix/lib/libc/sys/read.c
+++ b/minix/lib/libc/sys/read.c
@@ -17,5 +17,6 @@ ssize_t read(int fd, void *buffer, size_t nbytes)
   m.m_lc_vfs_readwrite.fd = fd;
   m.m_lc_vfs_readwrite.len = nbytes;
   m.m_lc_vfs_readwrite.buf = (vir_bytes)buffer;
+  m.m_lc_vfs_readwrite.cum_io = 0;
   return(_syscall(VFS_PROC_NR, VFS_READ, &m));
 }
diff --git a/minix/lib/libc/sys/write.c b/minix/lib/libc/sys/write.c
index 3accd022f..5b40a1e30 100644
--- a/minix/lib/libc/sys/write.c
+++ b/minix/lib/libc/sys/write.c
@@ -13,6 +13,7 @@ ssize_t write(int fd, const void *buffer, size_t nbytes)
   m.m_lc_vfs_readwrite.fd = fd;
   m.m_lc_vfs_readwrite.len = nbytes;
   m.m_lc_vfs_readwrite.buf = (vir_bytes)buffer;
+  m.m_lc_vfs_readwrite.cum_io = 0;	/* reserved for future use */
   return(_syscall(VFS_PROC_NR, VFS_WRITE, &m));
 }
 
diff --git a/minix/servers/is/dmp_fs.c b/minix/servers/is/dmp_fs.c
index e30489cc9..5a9ad8fb3 100644
--- a/minix/servers/is/dmp_fs.c
+++ b/minix/servers/is/dmp_fs.c
@@ -48,8 +48,8 @@ void fproc_dmp()
 		!!(fp->fp_flags & FP_SESLDR), nfds,
 		fp->fp_blocked_on, !!(fp->fp_flags & FP_REVIVED)
 	);
-	if (fp->fp_blocked_on == FP_BLOCKED_ON_OTHER)
-		printf("%4d\n", fp->fp_task);
+	if (fp->fp_blocked_on == FP_BLOCKED_ON_CDEV)
+		printf("%4d\n", fp->fp_cdev.endpt);
 	else
 		printf(" nil\n");
   }
diff --git a/minix/servers/mib/proc.c b/minix/servers/mib/proc.c
index d2c63782b..60a901785 100644
--- a/minix/servers/mib/proc.c
+++ b/minix/servers/mib/proc.c
@@ -296,8 +296,8 @@ get_lwp_stat(int mslot, uint64_t * wcptr, char * wmptr, size_t wmsz,
 		case FP_BLOCKED_ON_PIPE:
 			wmesg = "pipe";
 			break;
-		case FP_BLOCKED_ON_LOCK:
-			wmesg = "lock";
+		case FP_BLOCKED_ON_FLOCK:
+			wmesg = "flock";
 			break;
 		case FP_BLOCKED_ON_POPEN:
 			wmesg = "popen";
@@ -305,7 +305,7 @@ get_lwp_stat(int mslot, uint64_t * wcptr, char * wmptr, size_t wmsz,
 		case FP_BLOCKED_ON_SELECT:
 			wmesg = "select";
 			break;
-		case FP_BLOCKED_ON_OTHER:
+		case FP_BLOCKED_ON_CDEV:
 			/*
 			 * Add the task (= character driver) endpoint to the
 			 * wchan value, and use the driver's process name,
diff --git a/minix/servers/vfs/const.h b/minix/servers/vfs/const.h
index 0cce3cf1b..4385350ed 100644
--- a/minix/servers/vfs/const.h
+++ b/minix/servers/vfs/const.h
@@ -17,11 +17,10 @@
 
 #define FP_BLOCKED_ON_NONE	0 /* not blocked */
 #define FP_BLOCKED_ON_PIPE	1 /* susp'd on pipe */
-#define FP_BLOCKED_ON_LOCK	2 /* susp'd on lock */
+#define FP_BLOCKED_ON_FLOCK	2 /* susp'd on file lock */
 #define FP_BLOCKED_ON_POPEN	3 /* susp'd on pipe open */
 #define FP_BLOCKED_ON_SELECT	4 /* susp'd on select */
-#define FP_BLOCKED_ON_OTHER	5 /* blocked on other process, check
-				     fp_task to find out */
+#define FP_BLOCKED_ON_CDEV	5 /* blocked on character device I/O */
 
 /* test if the process is blocked on something */
 #define fp_is_blocked(fp)	((fp)->fp_blocked_on != FP_BLOCKED_ON_NONE)
diff --git a/minix/servers/vfs/coredump.c b/minix/servers/vfs/coredump.c
index 91ebf5d5f..1139798ac 100644
--- a/minix/servers/vfs/coredump.c
+++ b/minix/servers/vfs/coredump.c
@@ -175,7 +175,14 @@ static void adjust_offsets(Elf_Phdr phdrs[], int phnum)
  *===========================================================================*/
 static void write_buf(struct filp *f, char *buf, size_t size)
 {
-  read_write(fp, WRITING, f, (vir_bytes)buf, size, VFS_PROC_NR);
+  /*
+   * TODO: pass in the proper file descriptor number.  It really doesn't matter
+   * what we pass in, because the write target is a regular file.  As such, the
+   * write call will never be suspended, and suspension is the only case that
+   * read_write() could use the file descriptor.  Still, passing in an invalid
+   * value isn't exactly nice.
+   */
+  read_write(fp, WRITING, -1 /*fd*/, f, (vir_bytes)buf, size, VFS_PROC_NR);
 }
 
 /*===========================================================================*
diff --git a/minix/servers/vfs/device.c b/minix/servers/vfs/device.c
index 54a1b0872..e9bec4292 100644
--- a/minix/servers/vfs/device.c
+++ b/minix/servers/vfs/device.c
@@ -32,7 +32,7 @@
 #include "vnode.h"
 #include "vmnt.h"
 
-static int cdev_opcl(int op, dev_t dev, int flags);
+static int cdev_opcl(int op, int fd, dev_t dev, int flags);
 static int block_io(endpoint_t driver_e, message *mess_ptr);
 static cp_grant_id_t make_grant(endpoint_t driver_e, endpoint_t user_e, int op,
 	vir_bytes buf, unsigned long size);
@@ -318,9 +318,10 @@ int cdev_io(
 	panic("VFS: asynsend in cdev_io failed: %d", r);
 
   /* Suspend the calling process until a reply arrives. */
-  wait_for(dp->dmap_driver);
-  assert(!GRANT_VALID(fp->fp_grant));
-  fp->fp_grant = gid;	/* revoke this when unsuspended. */
+  fp->fp_cdev.dev = dev;
+  fp->fp_cdev.endpt = dp->dmap_driver;
+  fp->fp_cdev.grant = gid;	/* revoke this when unsuspended */
+  suspend(FP_BLOCKED_ON_CDEV);
 
   return SUSPEND;
 }
@@ -329,7 +330,7 @@ int cdev_io(
 /*===========================================================================*
  *				cdev_clone				     *
  *===========================================================================*/
-static int cdev_clone(dev_t dev, devminor_t new_minor)
+static int cdev_clone(int fd, dev_t dev, devminor_t new_minor)
 {
 /* A new minor device number has been returned. Request PFS to create a
  * temporary device file to hold it.
@@ -338,6 +339,8 @@ static int cdev_clone(dev_t dev, devminor_t new_minor)
   struct node_details res;
   int r;
 
+  assert(fd != -1);
+
   /* Device number of the new device. */
   dev = makedev(major(dev), new_minor);
 
@@ -345,21 +348,21 @@ static int cdev_clone(dev_t dev, devminor_t new_minor)
   r = req_newnode(PFS_PROC_NR, fp->fp_effuid, fp->fp_effgid,
       RWX_MODES | I_CHAR_SPECIAL, dev, &res);
   if (r != OK) {
-	(void) cdev_opcl(CDEV_CLOSE, dev, 0);
+	(void)cdev_opcl(CDEV_CLOSE, -1, dev, 0);
 	return r;
   }
 
   /* Drop old node and use the new values */
   if ((vp = get_free_vnode()) == NULL) {
 	req_putnode(PFS_PROC_NR, res.inode_nr, 1); /* is this right? */
-	(void) cdev_opcl(CDEV_CLOSE, dev, 0);
+	(void)cdev_opcl(CDEV_CLOSE, -1, dev, 0);
 	return(err_code);
   }
   lock_vnode(vp, VNODE_OPCL);
 
-  assert(fp->fp_filp[fp->fp_fd] != NULL);
-  unlock_vnode(fp->fp_filp[fp->fp_fd]->filp_vno);
-  put_vnode(fp->fp_filp[fp->fp_fd]->filp_vno);
+  assert(fp->fp_filp[fd] != NULL);
+  unlock_vnode(fp->fp_filp[fd]->filp_vno);
+  put_vnode(fp->fp_filp[fd]->filp_vno);
 
   vp->v_fs_e = res.fs_e;
   vp->v_vmnt = NULL;
@@ -370,7 +373,7 @@ static int cdev_clone(dev_t dev, devminor_t new_minor)
   vp->v_sdev = dev;
   vp->v_fs_count = 1;
   vp->v_ref_count = 1;
-  fp->fp_filp[fp->fp_fd]->filp_vno = vp;
+  fp->fp_filp[fd]->filp_vno = vp;
 
   return OK;
 }
@@ -381,6 +384,7 @@ static int cdev_clone(dev_t dev, devminor_t new_minor)
  *===========================================================================*/
 static int cdev_opcl(
   int op,			/* operation, CDEV_OPEN or CDEV_CLOSE */
+  int fd,			/* file descriptor (open) or -1 (close) */
   dev_t dev,			/* device to open or close */
   int flags			/* mode bits and flags */
 )
@@ -392,7 +396,14 @@ static int cdev_opcl(
   message dev_mess;
   int r, r2;
 
+  /*
+   * We need the a descriptor for CDEV_OPEN, because if the driver returns a
+   * cloned device, we need to replace what the fd points to.  For CDEV_CLOSE
+   * however, we may be closing a device for which the calling process has no
+   * file descriptor, and thus we expect no meaningful fd value in that case.
+   */
   assert(op == CDEV_OPEN || op == CDEV_CLOSE);
+  assert(fd != -1 || op == CDEV_CLOSE);
 
   /* Determine task dmap. */
   if ((dp = cdev_get(dev, &minor_dev)) == NULL)
@@ -439,7 +450,6 @@ static int cdev_opcl(
 	panic("VFS: asynsend in cdev_opcl failed: %d", r);
 
   /* Block the thread waiting for a reply. */
-  fp->fp_task = dp->dmap_driver;
   self->w_task = dp->dmap_driver;
   self->w_drv_sendrec = &dev_mess;
 
@@ -460,7 +470,7 @@ static int cdev_opcl(
 	 */
 	if (r & CDEV_CLONED) {
 		new_minor = r & ~(CDEV_CLONED | CDEV_CTTY);
-		if ((r2 = cdev_clone(dev, new_minor)) < 0)
+		if ((r2 = cdev_clone(fd, dev, new_minor)) < 0)
 			return(r2);
 	}
 
@@ -481,11 +491,11 @@ static int cdev_opcl(
 /*===========================================================================*
  *				cdev_open				     *
  *===========================================================================*/
-int cdev_open(dev_t dev, int flags)
+int cdev_open(int fd, dev_t dev, int flags)
 {
 /* Open a character device. */
 
-  return cdev_opcl(CDEV_OPEN, dev, flags);
+  return cdev_opcl(CDEV_OPEN, fd, dev, flags);
 }
 
 
@@ -496,7 +506,7 @@ int cdev_close(dev_t dev)
 {
 /* Close a character device. */
 
-  return cdev_opcl(CDEV_CLOSE, dev, 0);
+  return cdev_opcl(CDEV_CLOSE, -1, dev, 0);
 }
 
 
@@ -507,17 +517,17 @@ int do_ioctl(void)
 {
 /* Perform the ioctl(2) system call. */
   unsigned long ioctlrequest;
-  int r = OK;
+  int fd, r = OK;
   struct filp *f;
   register struct vnode *vp;
   dev_t dev;
   vir_bytes argx;
 
-  fp->fp_fd = job_m_in.m_lc_vfs_ioctl.fd;
+  fd = job_m_in.m_lc_vfs_ioctl.fd;
   ioctlrequest = job_m_in.m_lc_vfs_ioctl.req;
   argx = (vir_bytes)job_m_in.m_lc_vfs_ioctl.arg;
 
-  if ((f = get_filp(fp->fp_fd, VNODE_READ)) == NULL)
+  if ((f = get_filp(fd, VNODE_READ)) == NULL)
 	return(err_code);
   vp = f->filp_vno;		/* get vnode pointer */
   if (!S_ISCHR(vp->v_mode) && !S_ISBLK(vp->v_mode)) {
@@ -535,7 +545,7 @@ int do_ioctl(void)
 		f->filp_ioctl_fp = NULL;
 	} else
 		r = cdev_io(CDEV_IOCTL, dev, who_e, argx, 0, ioctlrequest,
-			f->filp_flags);
+		    f->filp_flags);
   }
 
   unlock_filp(f);
@@ -583,7 +593,7 @@ int cdev_select(dev_t dev, int ops)
 /*===========================================================================*
  *				cdev_cancel				     *
  *===========================================================================*/
-int cdev_cancel(dev_t dev)
+int cdev_cancel(dev_t dev, endpoint_t endpt __unused, cp_grant_id_t grant)
 {
 /* Cancel an I/O request, blocking until it has been cancelled. */
   devminor_t minor_dev;
@@ -607,7 +617,6 @@ int cdev_cancel(dev_t dev)
 	panic("VFS: asynsend in cdev_cancel failed: %d", r);
 
   /* Suspend this thread until we have received the response. */
-  fp->fp_task = dp->dmap_driver;
   self->w_task = dp->dmap_driver;
   self->w_drv_sendrec = &dev_mess;
 
@@ -616,12 +625,11 @@ int cdev_cancel(dev_t dev)
   self->w_task = NONE;
   assert(self->w_drv_sendrec == NULL);
 
-  /* Clean up and return the result (note: the request may have completed). */
-  if (GRANT_VALID(fp->fp_grant)) {
-	(void) cpf_revoke(fp->fp_grant);
-	fp->fp_grant = GRANT_INVALID;
-  }
+  /* Clean up. */
+  if (GRANT_VALID(grant))
+	(void)cpf_revoke(grant);
 
+  /* Return the result (note: the request may have completed). */
   r = dev_mess.m_lchardriver_vfs_reply.status;
   return (r == EAGAIN) ? EINTR : r;
 }
@@ -771,8 +779,8 @@ static void cdev_generic_reply(message *m_ptr)
 	*wp->w_drv_sendrec = *m_ptr;
 	wp->w_drv_sendrec = NULL;
 	worker_signal(wp);	/* Continue open/close/cancel */
-  } else if (rfp->fp_blocked_on != FP_BLOCKED_ON_OTHER ||
-		rfp->fp_task != m_ptr->m_source) {
+  } else if (rfp->fp_blocked_on != FP_BLOCKED_ON_CDEV ||
+    rfp->fp_cdev.endpt != m_ptr->m_source) {
 	/* This would typically be caused by a protocol error, i.e. a driver
 	 * not properly following the character driver protocol rules.
 	 */
diff --git a/minix/servers/vfs/fproc.h b/minix/servers/vfs/fproc.h
index 28bc56d3c..13c0cf61c 100644
--- a/minix/servers/vfs/fproc.h
+++ b/minix/servers/vfs/fproc.h
@@ -27,14 +27,29 @@ EXTERN struct fproc {
   dev_t fp_tty;			/* major/minor of controlling tty */
 
   int fp_blocked_on;		/* what is it blocked on */
-  int fp_block_callnr;		/* blocked call if rd/wr can't finish */
-  size_t fp_cum_io_partial;	/* partial byte count if write can't finish */
-  endpoint_t fp_task;		/* which task is proc suspended on */
-  cp_grant_id_t fp_grant;	/* revoke this grant on unsuspend if > -1 */
-
-  int fp_fd;			/* file descriptor for blocking call */
-  vir_bytes fp_io_buffer;	/* user buffer address for ongoing I/O */
-  size_t fp_io_nbytes;		/* number of bytes left for ongoing I/O */
+  union ixfer_fp_u {		/* state per blocking type */
+	struct {			/* FP_BLOCKED_ON_PIPE */
+		int callnr;		/* user call: VFS_READ or VFS_WRITE */
+		int fd;			/* file descriptor for blocking call */
+		vir_bytes buf;		/* user buffer address */
+		size_t nbytes;		/* number of bytes left */
+		size_t cum_io;		/* partial (write) result byte count */
+	} u_pipe;
+	struct {			/* FP_BLOCKED_ON_POPEN */
+		int fd;			/* file descriptor for blocking call */
+	} u_popen;
+	struct {			/* FP_BLOCKED_ON_FLOCK */
+		int fd;			/* file descriptor for blocking call */
+		int cmd;		/* fcntl command, always F_SETLKW */
+		vir_bytes arg;		/* user address of flock structure */
+	} u_flock;
+	/* nothing for FP_BLOCKED_ON_SELECT for now */
+	struct {			/* FP_BLOCKED_ON_CDEV */
+		dev_t dev;		/* device number for blocking call */
+		endpoint_t endpt;	/* driver endpoint */
+		cp_grant_id_t grant;	/* data grant */
+	} u_cdev;
+  } fp_u;
 
   uid_t fp_realuid;		/* real user id */
   uid_t fp_effuid;		/* effective user id */
@@ -57,6 +72,12 @@ EXTERN struct fproc {
 #endif
 } fproc[NR_PROCS];
 
+/* Shortcuts for block state union substructures. */
+#define fp_pipe		fp_u.u_pipe
+#define fp_popen	fp_u.u_popen
+#define fp_flock	fp_u.u_flock
+#define fp_cdev		fp_u.u_cdev
+
 /* fp_flags */
 #define FP_NOFLAGS	 0000
 #define FP_SRV_PROC	 0001	/* Set if process is a service */
diff --git a/minix/servers/vfs/link.c b/minix/servers/vfs/link.c
index 9d699a03c..ea6124c39 100644
--- a/minix/servers/vfs/link.c
+++ b/minix/servers/vfs/link.c
@@ -329,16 +329,16 @@ int do_ftruncate(void)
 /* As with do_truncate(), truncate_vnode() does the actual work. */
   struct filp *rfilp;
   struct vnode *vp;
-  int r;
+  int r, fd;
   off_t length;
 
-  fp->fp_fd = job_m_in.m_lc_vfs_truncate.fd;
+  fd = job_m_in.m_lc_vfs_truncate.fd;
 
   length = job_m_in.m_lc_vfs_truncate.offset;
   if (length < 0) return(EINVAL);
 
   /* File is already opened; get a vnode pointer from filp */
-  if ((rfilp = get_filp(fp->fp_fd, VNODE_WRITE)) == NULL)
+  if ((rfilp = get_filp(fd, VNODE_WRITE)) == NULL)
 	return(err_code);
 
   vp = rfilp->filp_vno;
diff --git a/minix/servers/vfs/lock.c b/minix/servers/vfs/lock.c
index b7aefda3a..8cf341934 100644
--- a/minix/servers/vfs/lock.c
+++ b/minix/servers/vfs/lock.c
@@ -10,6 +10,7 @@
 #include <minix/u64.h>
 #include <fcntl.h>
 #include <unistd.h>
+#include <assert.h>
 #include "file.h"
 #include "lock.h"
 #include "vnode.h"
@@ -17,21 +18,24 @@
 /*===========================================================================*
  *				lock_op					     *
  *===========================================================================*/
-int lock_op(f, req)
-struct filp *f;
-int req;			/* either F_SETLK or F_SETLKW */
+int lock_op(int fd, int req, vir_bytes arg)
 {
 /* Perform the advisory locking required by POSIX. */
-
   int r, ltype, i, conflict = 0, unlocking = 0;
   mode_t mo;
   off_t first, last;
+  struct filp *f;
   struct flock flock;
   struct file_lock *flp, *flp2, *empty;
 
+  assert(req == F_GETLK || req == F_SETLK || req == F_SETLKW);
+
+  f = fp->fp_filp[fd];
+  assert(f != NULL);
+
   /* Fetch the flock structure from user space. */
-  r = sys_datacopy_wrapper(who_e, fp->fp_io_buffer, VFS_PROC_NR,
-		   (vir_bytes) &flock, sizeof(flock));
+  r = sys_datacopy_wrapper(who_e, arg, VFS_PROC_NR, (vir_bytes)&flock,
+      sizeof(flock));
   if (r != OK) return(EINVAL);
 
   /* Make some error checks. */
@@ -86,7 +90,10 @@ int req;			/* either F_SETLK or F_SETLKW */
 			return(EAGAIN);
 		} else {
 			/* For F_SETLKW, suspend the process. */
-			suspend(FP_BLOCKED_ON_LOCK);
+			fp->fp_flock.fd = fd;
+			fp->fp_flock.cmd = req;
+			fp->fp_flock.arg = arg;
+			suspend(FP_BLOCKED_ON_FLOCK);
 			return(SUSPEND);
 		}
 	}
@@ -140,8 +147,8 @@ int req;			/* either F_SETLK or F_SETLKW */
 	}
 
 	/* Copy the flock structure back to the caller. */
-	r = sys_datacopy_wrapper(VFS_PROC_NR, (vir_bytes) &flock, who_e,
-		fp->fp_io_buffer, sizeof(flock));
+	r = sys_datacopy_wrapper(VFS_PROC_NR, (vir_bytes)&flock, who_e, arg,
+	    sizeof(flock));
 	return(r);
   }
 
@@ -177,7 +184,7 @@ void lock_revive()
 
   for (fptr = &fproc[0]; fptr < &fproc[NR_PROCS]; fptr++){
 	if (fptr->fp_pid == PID_FREE) continue;
-	if (fptr->fp_blocked_on == FP_BLOCKED_ON_LOCK) {
+	if (fptr->fp_blocked_on == FP_BLOCKED_ON_FLOCK) {
 		revive(fptr->fp_endpoint, 0);
 	}
   }
diff --git a/minix/servers/vfs/main.c b/minix/servers/vfs/main.c
index 9c70e952e..b690ffd33 100644
--- a/minix/servers/vfs/main.c
+++ b/minix/servers/vfs/main.c
@@ -213,18 +213,32 @@ static void do_reply(struct worker_thread *wp)
  *===========================================================================*/
 static void do_pending_pipe(void)
 {
-  int r, op;
+  vir_bytes buf;
+  size_t nbytes, cum_io;
+  int r, op, fd;
   struct filp *f;
   tll_access_t locktype;
 
-  f = fp->fp_filp[fp->fp_fd];
+  assert(fp->fp_blocked_on == FP_BLOCKED_ON_NONE);
+
+  /*
+   * We take all our needed resumption state from the m_in message, which is
+   * filled by unblock().  Since this is an internal resumption, there is no
+   * need to perform extensive checks on the message fields.
+   */
+  fd = job_m_in.m_lc_vfs_readwrite.fd;
+  buf = job_m_in.m_lc_vfs_readwrite.buf;
+  nbytes = job_m_in.m_lc_vfs_readwrite.len;
+  cum_io = job_m_in.m_lc_vfs_readwrite.cum_io;
+
+  f = fp->fp_filp[fd];
   assert(f != NULL);
 
   locktype = (job_call_nr == VFS_READ) ? VNODE_READ : VNODE_WRITE;
   op = (job_call_nr == VFS_READ) ? READING : WRITING;
   lock_filp(f, locktype);
 
-  r = rw_pipe(op, who_e, f, fp->fp_io_buffer, fp->fp_io_nbytes);
+  r = rw_pipe(op, who_e, f, job_call_nr, fd, buf, nbytes, cum_io);
 
   if (r != SUSPEND) { /* Do we have results to report? */
 	/* Process is writing, but there is no reader. Send a SIGPIPE signal.
@@ -409,7 +423,6 @@ static int sef_cb_init_fresh(int UNUSED(type), sef_init_info_t *info)
 	rfp->fp_flags = FP_NOFLAGS;
 	rfp->fp_pid = mess.VFS_PM_PID;
 	rfp->fp_endpoint = mess.VFS_PM_ENDPT;
-	rfp->fp_grant = GRANT_INVALID;
 	rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
 	rfp->fp_realuid = (uid_t) SYS_UID;
 	rfp->fp_effuid = (uid_t) SYS_UID;
@@ -917,24 +930,25 @@ struct fproc *rfp;
   /* Reconstruct the original request from the saved data. */
   memset(&m_in, 0, sizeof(m_in));
   m_in.m_source = rfp->fp_endpoint;
-  m_in.m_type = rfp->fp_block_callnr;
-  switch (m_in.m_type) {
-  case VFS_READ:
-  case VFS_WRITE:
-	assert(blocked_on == FP_BLOCKED_ON_PIPE);
-	m_in.m_lc_vfs_readwrite.fd = rfp->fp_fd;
-	m_in.m_lc_vfs_readwrite.buf = rfp->fp_io_buffer;
-	m_in.m_lc_vfs_readwrite.len = rfp->fp_io_nbytes;
+  switch (blocked_on) {
+  case FP_BLOCKED_ON_PIPE:
+	assert(rfp->fp_pipe.callnr == VFS_READ ||
+	    rfp->fp_pipe.callnr == VFS_WRITE);
+	m_in.m_type = rfp->fp_pipe.callnr;
+	m_in.m_lc_vfs_readwrite.fd = rfp->fp_pipe.fd;
+	m_in.m_lc_vfs_readwrite.buf = rfp->fp_pipe.buf;
+	m_in.m_lc_vfs_readwrite.len = rfp->fp_pipe.nbytes;
+	m_in.m_lc_vfs_readwrite.cum_io = rfp->fp_pipe.cum_io;
 	break;
-  case VFS_FCNTL:
-	assert(blocked_on == FP_BLOCKED_ON_LOCK);
-	m_in.m_lc_vfs_fcntl.fd = rfp->fp_fd;
-	m_in.m_lc_vfs_fcntl.cmd = rfp->fp_io_nbytes;
-	m_in.m_lc_vfs_fcntl.arg_ptr = rfp->fp_io_buffer;
-	assert(m_in.m_lc_vfs_fcntl.cmd == F_SETLKW);
+  case FP_BLOCKED_ON_FLOCK:
+	assert(rfp->fp_flock.cmd == F_SETLKW);
+	m_in.m_type = VFS_FCNTL;
+	m_in.m_lc_vfs_fcntl.fd = rfp->fp_flock.fd;
+	m_in.m_lc_vfs_fcntl.cmd = rfp->fp_flock.cmd;
+	m_in.m_lc_vfs_fcntl.arg_ptr = rfp->fp_flock.arg;
 	break;
   default:
-	panic("unblocking call %d blocked on %d ??", m_in.m_type, blocked_on);
+	panic("unblocking call blocked on %d ??", blocked_on);
   }
 
   rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;	/* no longer blocked */
@@ -942,9 +956,6 @@ struct fproc *rfp;
   reviving--;
   assert(reviving >= 0);
 
-  /* This should not be device I/O. If it is, it'll 'leak' grants. */
-  assert(!GRANT_VALID(rfp->fp_grant));
-
   /* Pending pipe reads/writes cannot be repeated as is, and thus require a
    * special resumption procedure.
    */
diff --git a/minix/servers/vfs/misc.c b/minix/servers/vfs/misc.c
index d02b99534..d7e30bb94 100644
--- a/minix/servers/vfs/misc.c
+++ b/minix/servers/vfs/misc.c
@@ -83,7 +83,10 @@ int do_getsysinfo(void)
 	for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++, rfpl++) {
 		rfpl->fpl_tty = rfp->fp_tty;
 		rfpl->fpl_blocked_on = rfp->fp_blocked_on;
-		rfpl->fpl_task = rfp->fp_task;
+		if (rfp->fp_blocked_on == FP_BLOCKED_ON_CDEV)
+			rfpl->fpl_task = rfp->fp_cdev.endpt;
+		else
+			rfpl->fpl_task = NONE;
 	}
 	src_addr = (vir_bytes) fproc_light;
 	len = sizeof(fproc_light);
@@ -110,20 +113,19 @@ int do_getsysinfo(void)
 int do_fcntl(void)
 {
 /* Perform the fcntl(fd, cmd, ...) system call. */
-
-  register struct filp *f;
-  int new_fd, fl, r = OK, fcntl_req, fcntl_argx;
+  struct filp *f;
+  int fd, new_fd, fl, r = OK, fcntl_req, fcntl_argx;
+  vir_bytes addr;
   tll_access_t locktype;
 
-  fp->fp_fd = job_m_in.m_lc_vfs_fcntl.fd;
-  fp->fp_io_buffer = job_m_in.m_lc_vfs_fcntl.arg_ptr;
-  fp->fp_io_nbytes = job_m_in.m_lc_vfs_fcntl.cmd;
+  fd = job_m_in.m_lc_vfs_fcntl.fd;
   fcntl_req = job_m_in.m_lc_vfs_fcntl.cmd;
   fcntl_argx = job_m_in.m_lc_vfs_fcntl.arg_int;
+  addr = job_m_in.m_lc_vfs_fcntl.arg_ptr;
 
   /* Is the file descriptor valid? */
   locktype = (fcntl_req == F_FREESP) ? VNODE_WRITE : VNODE_READ;
-  if ((f = get_filp(fp->fp_fd, locktype)) == NULL)
+  if ((f = get_filp(fd, locktype)) == NULL)
 	return(err_code);
 
   switch (fcntl_req) {
@@ -144,16 +146,16 @@ int do_fcntl(void)
     case F_GETFD:
 	/* Get close-on-exec flag (FD_CLOEXEC in POSIX Table 6-2). */
 	r = 0;
-	if (FD_ISSET(fp->fp_fd, &fp->fp_cloexec_set))
+	if (FD_ISSET(fd, &fp->fp_cloexec_set))
 		r = FD_CLOEXEC;
 	break;
 
     case F_SETFD:
 	/* Set close-on-exec flag (FD_CLOEXEC in POSIX Table 6-2). */
 	if (fcntl_argx & FD_CLOEXEC)
-		FD_SET(fp->fp_fd, &fp->fp_cloexec_set);
+		FD_SET(fd, &fp->fp_cloexec_set);
 	else
-		FD_CLR(fp->fp_fd, &fp->fp_cloexec_set);
+		FD_CLR(fd, &fp->fp_cloexec_set);
 	break;
 
     case F_GETFL:
@@ -172,7 +174,7 @@ int do_fcntl(void)
     case F_SETLK:
     case F_SETLKW:
 	/* Set or clear a file lock. */
-	r = lock_op(f, fcntl_req);
+	r = lock_op(fd, fcntl_req, addr);
 	break;
 
     case F_FREESP:
@@ -186,8 +188,8 @@ int do_fcntl(void)
 	else if (!(f->filp_mode & W_BIT)) r = EBADF;
 	else {
 		/* Copy flock data from userspace. */
-		r = sys_datacopy_wrapper(who_e, fp->fp_io_buffer,
-			SELF, (vir_bytes) &flock_arg, sizeof(flock_arg));
+		r = sys_datacopy_wrapper(who_e, addr, SELF,
+		    (vir_bytes)&flock_arg, sizeof(flock_arg));
 	}
 
 	if (r != OK) break;
@@ -294,11 +296,11 @@ int do_fsync(void)
   struct filp *rfilp;
   struct vmnt *vmp;
   dev_t dev;
-  int r = OK;
+  int fd, r = OK;
 
-  fp->fp_fd = job_m_in.m_lc_vfs_fsync.fd;
+  fd = job_m_in.m_lc_vfs_fsync.fd;
 
-  if ((rfilp = get_filp(fp->fp_fd, VNODE_READ)) == NULL)
+  if ((rfilp = get_filp(fd, VNODE_READ)) == NULL)
 	return(err_code);
 
   dev = rfilp->filp_vno->v_dev;
@@ -574,7 +576,6 @@ void pm_fork(endpoint_t pproc, endpoint_t cproc, pid_t cpid)
  * The parent and child parameters tell who forked off whom. The file
  * system uses the same slot numbers as the kernel.  Only PM makes this call.
  */
-
   struct fproc *cp, *pp;
   int i, parentno, childno;
   mutex_t c_fp_lock;
@@ -609,16 +610,8 @@ void pm_fork(endpoint_t pproc, endpoint_t cproc, pid_t cpid)
   cp->fp_pid = cpid;
   cp->fp_endpoint = cproc;
 
-  /* A forking process never has an outstanding grant, as it isn't blocking on
-   * I/O. */
-  if (GRANT_VALID(pp->fp_grant)) {
-	panic("VFS: fork: pp (endpoint %d) has grant %d\n", pp->fp_endpoint,
-	       pp->fp_grant);
-  }
-  if (GRANT_VALID(cp->fp_grant)) {
-	panic("VFS: fork: cp (endpoint %d) has grant %d\n", cp->fp_endpoint,
-	       cp->fp_grant);
-  }
+  /* A forking process cannot possibly be suspended on anything. */
+  assert(pp->fp_blocked_on == FP_BLOCKED_ON_NONE);
 
   /* A child is not a process leader, not being revived, etc. */
   cp->fp_flags = FP_NOFLAGS;
@@ -907,10 +900,12 @@ int pm_dumpcore(int csig, vir_bytes exe_name)
   char core_path[PATH_MAX];
   char proc_name[PROC_NAME_LEN];
 
-  /* If a process is blocked, fp->fp_fd holds the fd it's blocked on. Free it
-   * up for use by common_open(). This step is the reason we cannot use this
-   * function to generate a core dump of a process while it is still running
-   * (i.e., without terminating it), as it changes the state of the process.
+  /* In effect, the coredump is generated through the use of calls as if made
+   * by the process itself.  As such, the process must not be doing anything
+   * else.  Therefore, if the process was blocked on anything, unblock it
+   * first.  This step is the reason we cannot use this function to generate a
+   * core dump of a process while it is still running (i.e., without
+   * terminating it), as it changes the state of the process.
    */
   if (fp_is_blocked(fp))
           unpause();
diff --git a/minix/servers/vfs/open.c b/minix/servers/vfs/open.c
index 7dc9e7222..4ace3b985 100644
--- a/minix/servers/vfs/open.c
+++ b/minix/servers/vfs/open.c
@@ -30,7 +30,7 @@ static char mode_map[] = {R_BIT, W_BIT, R_BIT|W_BIT, 0};
 
 static struct vnode *new_node(struct lookup *resolve, int oflags,
 	mode_t bits);
-static int pipe_open(struct vnode *vp, mode_t bits, int oflags);
+static int pipe_open(int fd, struct vnode *vp, mode_t bits, int oflags);
 
 /*===========================================================================*
  *				do_open					     *
@@ -92,14 +92,14 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode, int for_exec)
   struct vmnt *vmp;
   struct dmap *dp;
   struct lookup resolve;
-  int start = 0;
+  int fd, start = 0;
 
   /* Remap the bottom two bits of oflags. */
   bits = (mode_t) mode_map[oflags & O_ACCMODE];
   if (!bits) return(EINVAL);
 
   /* See if file descriptor and filp slots are available. */
-  if ((r = get_fd(fp, start, bits, &fp->fp_fd, &filp)) != OK)
+  if ((r = get_fd(fp, start, bits, &fd, &filp)) != OK)
 	return(r);
 
   lookup_init(&resolve, path, PATH_NOFLAGS, &vmp, &vp);
@@ -130,12 +130,12 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode, int for_exec)
   }
 
   /* Claim the file descriptor and filp slot and fill them in. */
-  fp->fp_filp[fp->fp_fd] = filp;
+  fp->fp_filp[fd] = filp;
   filp->filp_count = 1;
   filp->filp_vno = vp;
   filp->filp_flags = oflags;
   if (oflags & O_CLOEXEC)
-	FD_SET(fp->fp_fd, &fp->fp_cloexec_set);
+	FD_SET(fd, &fp->fp_cloexec_set);
 
   /* Only do the normal open code if we didn't just create the file. */
   if (exist) {
@@ -163,7 +163,7 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode, int for_exec)
 			/* Invoke the driver for special processing. */
 			dev = vp->v_sdev;
 			/* TTY needs to know about the O_NOCTTY flag. */
-			r = cdev_open(dev, bits | (oflags & O_NOCTTY));
+			r = cdev_open(fd, dev, bits | (oflags & O_NOCTTY));
 			vp = filp->filp_vno;	/* Might be updated by
 						 * cdev_open after cloning */
 			break;
@@ -233,7 +233,7 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode, int for_exec)
 				filp->filp_flags = oflags;
 			}
 			if (r == OK) {
-				r = pipe_open(vp, bits, oflags);
+				r = pipe_open(fd, vp, bits, oflags);
 			}
 			if (r != ENXIO) {
 				/* See if someone else is doing a rd or wt on
@@ -244,7 +244,7 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode, int for_exec)
 				filp->filp_count = 0; /* don't find self */
 				if ((filp2 = find_filp(vp, b)) != NULL) {
 				    /* Co-reader or writer found. Use it.*/
-				    fp->fp_filp[fp->fp_fd] = filp2;
+				    fp->fp_filp[fd] = filp2;
 				    filp2->filp_count++;
 				    filp2->filp_vno = vp;
 				    filp2->filp_flags = oflags;
@@ -280,13 +280,13 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode, int for_exec)
   /* If error, release inode. */
   if (r != OK) {
 	if (r != SUSPEND) {
-		fp->fp_filp[fp->fp_fd] = NULL;
+		fp->fp_filp[fd] = NULL;
 		filp->filp_count = 0;
 		filp->filp_vno = NULL;
 		put_vnode(vp);
 	}
   } else {
-	r = fp->fp_fd;
+	r = fd;
   }
 
   return(r);
@@ -480,7 +480,7 @@ static struct vnode *new_node(struct lookup *resolve, int oflags, mode_t bits)
 /*===========================================================================*
  *				pipe_open				     *
  *===========================================================================*/
-static int pipe_open(struct vnode *vp, mode_t bits, int oflags)
+static int pipe_open(int fd, struct vnode *vp, mode_t bits, int oflags)
 {
 /*  This function is called from common_open. It checks if
  *  there is at least one reader/writer pair for the pipe, if not
@@ -497,6 +497,7 @@ static int pipe_open(struct vnode *vp, mode_t bits, int oflags)
 		if (bits & W_BIT) return(ENXIO);
 	} else {
 		/* Let's wait for the other side to show up */
+		fp->fp_popen.fd = fd;
 		suspend(FP_BLOCKED_ON_POPEN);
 		return(SUSPEND);
 	}
diff --git a/minix/servers/vfs/pipe.c b/minix/servers/vfs/pipe.c
index 795023fe1..0b50c838a 100644
--- a/minix/servers/vfs/pipe.c
+++ b/minix/servers/vfs/pipe.c
@@ -294,53 +294,37 @@ int notouch		/* check only */
  *===========================================================================*/
 void suspend(int why)
 {
-/* Take measures to suspend the processing of the present system call.
- * Store the parameters to be used upon resuming in the process table.
- * (Actually they are not used when a process is waiting for an I/O device,
- * but they are needed for pipes, and it is not worth making the distinction.)
- * The SUSPEND pseudo error should be returned after calling suspend().
+/* Take measures to suspend the processing of the present system call.  The
+ * caller must store the parameters to be used upon resuming in the process
+ * table as appropriate.  The SUSPEND pseudo error should be returned after
+ * calling suspend().
  */
 
+  assert(fp->fp_blocked_on == FP_BLOCKED_ON_NONE);
+
   if (why == FP_BLOCKED_ON_POPEN || why == FP_BLOCKED_ON_PIPE)
 	/* #procs susp'ed on pipe*/
 	susp_count++;
 
   fp->fp_blocked_on = why;
-  assert(fp->fp_grant == GRANT_INVALID || !GRANT_VALID(fp->fp_grant));
-  fp->fp_block_callnr = job_call_nr;
-}
-
-/*===========================================================================*
- *				wait_for				     *
- *===========================================================================*/
-void wait_for(endpoint_t who)
-{
-  if(who == NONE || who == ANY)
-	panic("suspend on NONE or ANY");
-  suspend(FP_BLOCKED_ON_OTHER);
-  fp->fp_task = who;
 }
 
 
 /*===========================================================================*
  *				pipe_suspend				     *
  *===========================================================================*/
-void pipe_suspend(struct filp * filp __unused, vir_bytes buf, size_t size)
+void pipe_suspend(int callnr, int fd, vir_bytes buf, size_t size,
+	size_t cum_io)
 {
 /* Take measures to suspend the processing of the present system call.
  * Store the parameters to be used upon resuming in the process table.
  */
 
-  /* We can only get here through an I/O call, which comes with a file
-   * descriptor, and that file descriptor must therefore correspond to the
-   * target file pointer of the I/O request. The process is blocked on the I/O
-   * call, and thus, the file descriptor will remain valid. Therefore, we can,
-   * and will, use the file descriptor to get the file pointer again later.
-   */
-  assert(fp->fp_filp[fp->fp_fd] == filp);
-
-  fp->fp_io_buffer = buf;
-  fp->fp_io_nbytes = size;
+  fp->fp_pipe.callnr = callnr;
+  fp->fp_pipe.fd = fd;
+  fp->fp_pipe.buf = buf;
+  fp->fp_pipe.nbytes = size;
+  fp->fp_pipe.cum_io = cum_io;
   suspend(FP_BLOCKED_ON_PIPE);
 }
 
@@ -350,14 +334,15 @@ void pipe_suspend(struct filp * filp __unused, vir_bytes buf, size_t size)
  *===========================================================================*/
 void unsuspend_by_endpt(endpoint_t proc_e)
 {
-/* Revive processes waiting for drivers (SUSPENDed) that have disappeared with
- * return code EAGAIN.
+/* Revive processes waiting for drivers (SUSPENDed) that have disappeared, with
+ * return code EIO.
  */
   struct fproc *rp;
 
   for (rp = &fproc[0]; rp < &fproc[NR_PROCS]; rp++) {
 	if (rp->fp_pid == PID_FREE) continue;
-	if (rp->fp_blocked_on == FP_BLOCKED_ON_OTHER && rp->fp_task == proc_e)
+	if (rp->fp_blocked_on == FP_BLOCKED_ON_CDEV &&
+	    rp->fp_cdev.endpt == proc_e)
 		revive(rp->fp_endpoint, EIO);
   }
 
@@ -371,18 +356,18 @@ void unsuspend_by_endpt(endpoint_t proc_e)
 /*===========================================================================*
  *				release					     *
  *===========================================================================*/
-void release(vp, op, count)
-register struct vnode *vp;	/* inode of pipe */
-int op;				/* VFS_READ, VFS_WRITE, or VFS_OPEN */
-int count;			/* max number of processes to release */
+void release(struct vnode * vp, int op, int count)
 {
-/* Check to see if any process is hanging on vnode 'vp'. If one is, and it
- * was trying to perform the call indicated by 'op', release it.
+/* Check to see if any process is hanging on pipe vnode 'vp'. If one is, and it
+ * was trying to perform the call indicated by 'op' - one of VFS_OPEN,
+ * VFS_READ, or VFS_WRITE - release it.  The 'count' parameter indicates the
+ * maximum number of processes to release, which allows us to stop searching
+ * early in some cases.
  */
 
   register struct fproc *rp;
   struct filp *f;
-  int selop;
+  int fd, selop;
 
   /* Trying to perform the call also includes SELECTing on it with that
    * operation.
@@ -406,22 +391,27 @@ int count;			/* max number of processes to release */
 
   /* Search the proc table. */
   for (rp = &fproc[0]; rp < &fproc[NR_PROCS] && count > 0; rp++) {
+	/* Just to make sure:
+	 * - FP_BLOCKED_ON_POPEN implies the original request was VFS_OPEN;
+	 * - FP_BLOCKED_ON_PIPE may be the result of VFS_READ and VFS_WRITE,
+	 *   and one of those two numbers is stored in fp_pipe.callnr.
+	 */
 	if (rp->fp_pid != PID_FREE && fp_is_blocked(rp) &&
-	    !(rp->fp_flags & FP_REVIVED) && rp->fp_block_callnr == op) {
+	    !(rp->fp_flags & FP_REVIVED) &&
+	    ((op == VFS_OPEN && rp->fp_blocked_on == FP_BLOCKED_ON_POPEN) ||
+	     (op != VFS_OPEN && rp->fp_blocked_on == FP_BLOCKED_ON_PIPE &&
+	      op == rp->fp_pipe.callnr))) {
 		/* Find the vnode. Depending on the reason the process was
 		 * suspended, there are different ways of finding it.
 		 */
-
-		if (rp->fp_blocked_on == FP_BLOCKED_ON_POPEN ||
-		    rp->fp_blocked_on == FP_BLOCKED_ON_PIPE ||
-		    rp->fp_blocked_on == FP_BLOCKED_ON_LOCK ||
-		    rp->fp_blocked_on == FP_BLOCKED_ON_OTHER) {
-			f = rp->fp_filp[rp->fp_fd];
-			if (f == NULL || f->filp_mode == FILP_CLOSED)
-				continue;
-			if (f->filp_vno != vp)
-				continue;
-		} else
+		if (rp->fp_blocked_on == FP_BLOCKED_ON_POPEN)
+			fd = rp->fp_popen.fd;
+		else
+			fd = rp->fp_pipe.fd;
+		f = rp->fp_filp[fd];
+		if (f == NULL || f->filp_mode == FILP_CLOSED)
+			continue;
+		if (f->filp_vno != vp)
 			continue;
 
 		/* We found the vnode. Revive process. */
@@ -442,53 +432,44 @@ void revive(endpoint_t proc_e, int returned)
 {
 /* Revive a previously blocked process. When a process hangs on tty, this
  * is the way it is eventually released. For processes blocked on _SELECT and
- * _OTHER, this function MUST NOT block its calling thread.
+ * _CDEV, this function MUST NOT block its calling thread.
  */
   struct fproc *rfp;
   int blocked_on;
-  int fd_nr, slot;
+  int slot;
 
   if (proc_e == NONE || isokendpt(proc_e, &slot) != OK) return;
 
   rfp = &fproc[slot];
   if (!fp_is_blocked(rfp) || (rfp->fp_flags & FP_REVIVED)) return;
 
-  /* The 'reviving' flag only applies to pipes.  Processes waiting for TTY get
-   * a message right away.  The revival process is different for TTY and pipes.
-   * For select and TTY revival, the work is already done, for pipes it is not:
-   * the proc must be restarted so it can try again.
+  /* The 'reviving' flag applies to pipe I/O and file locks.  Processes waiting
+   * on those suspension types need more processing, and will be unblocked from
+   * the main loop later.  Processes suspended for other reasons get a reply
+   * right away, and as such, have their suspension cleared right here as well.
    */
   blocked_on = rfp->fp_blocked_on;
-  fd_nr = rfp->fp_fd;
-  if (blocked_on == FP_BLOCKED_ON_PIPE || blocked_on == FP_BLOCKED_ON_LOCK) {
+  if (blocked_on == FP_BLOCKED_ON_PIPE || blocked_on == FP_BLOCKED_ON_FLOCK) {
 	/* Revive a process suspended on a pipe or lock. */
 	rfp->fp_flags |= FP_REVIVED;
 	reviving++;		/* process was waiting on pipe or lock */
   } else {
 	rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
-	/* TODO: we could reset rfp->fp_fd to (e.g.) -1 here, but since its
-	 * value is not always bounds checked elsewhere, this might do more
-	 * harm than good right now.
-	 */
 	if (blocked_on == FP_BLOCKED_ON_POPEN) {
 		/* process blocked in open or create */
-		replycode(proc_e, fd_nr);
+		replycode(proc_e, rfp->fp_popen.fd);
 	} else if (blocked_on == FP_BLOCKED_ON_SELECT) {
 		replycode(proc_e, returned);
 	} else {
-		/* Revive a process suspended on TTY or other device.
-		 * Pretend it wants only what there is.
-		 */
-		rfp->fp_io_nbytes = returned;
+		assert(blocked_on == FP_BLOCKED_ON_CDEV);
 		/* If a grant has been issued by FS for this I/O, revoke
 		 * it again now that I/O is done.
 		 */
-		if (GRANT_VALID(rfp->fp_grant)) {
-			if(cpf_revoke(rfp->fp_grant) == -1) {
+		if (GRANT_VALID(rfp->fp_cdev.grant)) {
+			if (cpf_revoke(rfp->fp_cdev.grant) == -1) {
 				panic("VFS: revoke failed for grant: %d",
-					rfp->fp_grant);
+				    rfp->fp_cdev.grant);
 			}
-			rfp->fp_grant = GRANT_INVALID;
 		}
 		replycode(proc_e, returned);/* unblock the process */
 	}
@@ -504,9 +485,7 @@ void unpause(void)
 /* A signal has been sent to a user who is paused on the file system.
  * Abort the system call with the EINTR error message.
  */
-  int blocked_on, fild, status = EINTR;
-  struct filp *f;
-  dev_t dev;
+  int blocked_on, status = EINTR;
   int wasreviving = 0;
 
   if (!fp_is_blocked(fp)) return;
@@ -527,17 +506,13 @@ void unpause(void)
   switch (blocked_on) {
 	case FP_BLOCKED_ON_PIPE:/* process trying to read or write a pipe */
 		/* If the operation succeeded partially, return the bytes
-		 * processed so far, and clear the remembered state. Otherwise,
-		 * return EINTR as usual.
+		 * processed so far.  Otherwise, return EINTR as usual.
 		 */
-		if (fp->fp_cum_io_partial > 0) {
-			status = fp->fp_cum_io_partial;
-
-			fp->fp_cum_io_partial = 0;
-		}
+		if (fp->fp_pipe.cum_io > 0)
+			status = fp->fp_pipe.cum_io;
 		break;
 
-	case FP_BLOCKED_ON_LOCK:/* process trying to set a lock with FCNTL */
+	case FP_BLOCKED_ON_FLOCK:/* process trying to set a lock with FCNTL */
 		break;
 
 	case FP_BLOCKED_ON_SELECT:/* process blocking on select() */
@@ -547,19 +522,9 @@ void unpause(void)
 	case FP_BLOCKED_ON_POPEN:	/* process trying to open a fifo */
 		break;
 
-	case FP_BLOCKED_ON_OTHER:/* process trying to do device I/O (e.g. tty)*/
-		fild = fp->fp_fd;
-		if (fild < 0 || fild >= OPEN_MAX)
-			panic("file descriptor out-of-range");
-		f = fp->fp_filp[fild];
-		if(!f) {
-			sys_diagctl_stacktrace(fp->fp_endpoint);
-			panic("process %d blocked on empty fd %d",
-				fp->fp_endpoint, fild);
-		}
-		dev = f->filp_vno->v_sdev;	/* device hung on */
-
-		status = cdev_cancel(dev);
+	case FP_BLOCKED_ON_CDEV: /* process blocked on character device I/O */
+		status = cdev_cancel(fp->fp_cdev.dev, fp->fp_cdev.endpt,
+		    fp->fp_cdev.grant);
 
 		break;
 	default :
diff --git a/minix/servers/vfs/proto.h b/minix/servers/vfs/proto.h
index d031282d4..f077a251d 100644
--- a/minix/servers/vfs/proto.h
+++ b/minix/servers/vfs/proto.h
@@ -30,13 +30,13 @@ void send_work(void);
 int vm_vfs_procctl_handlemem(endpoint_t ep, vir_bytes mem, vir_bytes len, int flags);
 
 /* device.c */
-int cdev_open(dev_t dev, int flags);
+int cdev_open(int fd, dev_t dev, int flags);
 int cdev_close(dev_t dev);
 int cdev_io(int op, dev_t dev, endpoint_t proc_e, vir_bytes buf, off_t pos,
 	unsigned long bytes, int flags);
 dev_t cdev_map(dev_t dev, struct fproc *rfp);
 int cdev_select(dev_t dev, int ops);
-int cdev_cancel(dev_t dev);
+int cdev_cancel(dev_t dev, endpoint_t endpt, cp_grant_id_t grant);
 void cdev_reply(void);
 int bdev_open(dev_t dev, int access);
 int bdev_close(dev_t dev);
@@ -94,7 +94,7 @@ int truncate_vnode(struct vnode *vp, off_t newsize);
 int rdlink_direct(char *orig_path, char *link_path, struct fproc *rfp);
 
 /* lock.c */
-int lock_op(struct filp *f, int req);
+int lock_op(int fd, int req, vir_bytes arg);
 void lock_revive(void);
 
 /* main.c */
@@ -169,9 +169,9 @@ int pipe_check(struct filp *filp, int rw_flag, int oflags, int bytes,
 void release(struct vnode *vp, int op, int count);
 void revive(endpoint_t proc_e, int returned);
 void suspend(int why);
-void pipe_suspend(struct filp *rfilp, vir_bytes buf, size_t size);
+void pipe_suspend(int callnr, int fd, vir_bytes buf, size_t size,
+	size_t cum_io);
 void unsuspend_by_endpt(endpoint_t proc_e);
-void wait_for(endpoint_t proc_e);
 
 /* protect.c */
 int do_access(void);
@@ -189,12 +189,12 @@ void lock_bsf(void);
 void unlock_bsf(void);
 void check_bsf_lock(void);
 int do_read_write_peek(int rw_flag, int fd, vir_bytes buf, size_t bytes);
-int actual_read_write_peek(struct fproc *rfp, int rw_flag, int fd, vir_bytes buf,
-	size_t bytes);
-int read_write(struct fproc *rfp, int rw_flag, struct filp *f, vir_bytes buffer,
-	size_t nbytes, endpoint_t for_e);
-int rw_pipe(int rw_flag, endpoint_t usr, struct filp *f, vir_bytes buf,
-	size_t req_size);
+int actual_read_write_peek(struct fproc *rfp, int rw_flag, int fd,
+	vir_bytes buf, size_t bytes);
+int read_write(struct fproc *rfp, int rw_flag, int fd, struct filp *f,
+	vir_bytes buffer, size_t nbytes, endpoint_t for_e);
+int rw_pipe(int rw_flag, endpoint_t usr, struct filp *f, int callnr, int fd,
+	vir_bytes buf, size_t nbytes, size_t cum_io);
 
 /* request.c */
 int req_breadwrite(endpoint_t fs_e, endpoint_t user_e, dev_t dev, off_t pos,
diff --git a/minix/servers/vfs/read.c b/minix/servers/vfs/read.c
index 5f0a5e783..c28831e6e 100644
--- a/minix/servers/vfs/read.c
+++ b/minix/servers/vfs/read.c
@@ -29,8 +29,17 @@
  *===========================================================================*/
 int do_read(void)
 {
+
+  /*
+   * This field is currently reserved for internal usage only, and must be set
+   * to zero by the caller.  We may use it for future SA_RESTART support just
+   * like we are using it internally now.
+   */
+  if (job_m_in.m_lc_vfs_readwrite.cum_io != 0)
+	return(EINVAL);
+
   return(do_read_write_peek(READING, job_m_in.m_lc_vfs_readwrite.fd,
-          job_m_in.m_lc_vfs_readwrite.buf, job_m_in.m_lc_vfs_readwrite.len));
+	job_m_in.m_lc_vfs_readwrite.buf, job_m_in.m_lc_vfs_readwrite.len));
 }
 
 
@@ -80,8 +89,8 @@ void check_bsf_lock(void)
 /*===========================================================================*
  *				actual_read_write_peek			     *
  *===========================================================================*/
-int actual_read_write_peek(struct fproc *rfp, int rw_flag, int io_fd,
-	vir_bytes io_buf, size_t io_nbytes)
+int actual_read_write_peek(struct fproc *rfp, int rw_flag, int fd,
+	vir_bytes buf, size_t nbytes)
 {
 /* Perform read(fd, buffer, nbytes) or write(fd, buffer, nbytes) call. */
   struct filp *f;
@@ -91,12 +100,8 @@ int actual_read_write_peek(struct fproc *rfp, int rw_flag, int io_fd,
 
   if(rw_flag == WRITING) ro = 0;
 
-  rfp->fp_fd = io_fd;
-  rfp->fp_io_buffer = io_buf;
-  rfp->fp_io_nbytes = io_nbytes;
-
   locktype = rw_flag == WRITING ? VNODE_WRITE : VNODE_READ;
-  if ((f = get_filp2(rfp, rfp->fp_fd, locktype)) == NULL)
+  if ((f = get_filp2(rfp, fd, locktype)) == NULL)
 	return(err_code);
 
   assert(f->filp_count > 0);
@@ -105,12 +110,12 @@ int actual_read_write_peek(struct fproc *rfp, int rw_flag, int io_fd,
 	unlock_filp(f);
 	return(EBADF);
   }
-  if (rfp->fp_io_nbytes == 0) {
+  if (nbytes == 0) {
 	unlock_filp(f);
 	return(0);	/* so char special files need not check for 0*/
   }
 
-  r = read_write(rfp, rw_flag, f, rfp->fp_io_buffer, rfp->fp_io_nbytes, who_e);
+  r = read_write(rfp, rw_flag, fd, f, buf, nbytes, who_e);
 
   unlock_filp(f);
   return(r);
@@ -119,15 +124,15 @@ int actual_read_write_peek(struct fproc *rfp, int rw_flag, int io_fd,
 /*===========================================================================*
  *				do_read_write_peek			     *
  *===========================================================================*/
-int do_read_write_peek(int rw_flag, int io_fd, vir_bytes io_buf, size_t io_nbytes)
+int do_read_write_peek(int rw_flag, int fd, vir_bytes buf, size_t nbytes)
 {
-	return actual_read_write_peek(fp, rw_flag, io_fd, io_buf, io_nbytes);
+	return actual_read_write_peek(fp, rw_flag, fd, buf, nbytes);
 }
 
 /*===========================================================================*
  *				read_write				     *
  *===========================================================================*/
-int read_write(struct fproc *rfp, int rw_flag, struct filp *f,
+int read_write(struct fproc *rfp, int rw_flag, int fd, struct filp *f,
 	vir_bytes buf, size_t size, endpoint_t for_e)
 {
   register struct vnode *vp;
@@ -146,17 +151,14 @@ int read_write(struct fproc *rfp, int rw_flag, struct filp *f,
 
   if (size > SSIZE_MAX) return(EINVAL);
 
-  op = (rw_flag == READING ? CDEV_READ : CDEV_WRITE);
-
   if (S_ISFIFO(vp->v_mode)) {		/* Pipes */
-	if (rfp->fp_cum_io_partial != 0) {
-		panic("VFS: read_write: fp_cum_io_partial not clear");
-	}
 	if(rw_flag == PEEKING) {
 	  	printf("read_write: peek on pipe makes no sense\n");
 		return EINVAL;
 	}
-	r = rw_pipe(rw_flag, for_e, f, buf, size);
+	assert(fd != -1);
+	op = (rw_flag == READING ? VFS_READ : VFS_WRITE);
+	r = rw_pipe(rw_flag, for_e, f, op, fd, buf, size, 0 /*cum_io*/);
   } else if (S_ISCHR(vp->v_mode)) {	/* Character special files. */
 	if(rw_flag == PEEKING) {
 	  	printf("read_write: peek on char device makes no sense\n");
@@ -167,6 +169,7 @@ int read_write(struct fproc *rfp, int rw_flag, struct filp *f,
 		panic("VFS: read_write tries to access char dev NO_DEV");
 
 	dev = vp->v_sdev;
+	op = (rw_flag == READING ? CDEV_READ : CDEV_WRITE);
 
 	r = cdev_io(op, dev, for_e, buf, position, size, f->filp_flags);
 	if (r >= 0) {
@@ -268,16 +271,22 @@ int read_write(struct fproc *rfp, int rw_flag, struct filp *f,
 int do_getdents(void)
 {
 /* Perform the getdents(fd, buf, size) system call. */
-  int r = OK;
+  int fd, r = OK;
   off_t new_pos;
+  vir_bytes buf;
+  size_t size;
   register struct filp *rfilp;
 
-  fp->fp_fd = job_m_in.m_lc_vfs_readwrite.fd;
-  fp->fp_io_buffer = job_m_in.m_lc_vfs_readwrite.buf;
-  fp->fp_io_nbytes = job_m_in.m_lc_vfs_readwrite.len;
+  /* This field must always be set to zero for getdents(). */
+  if (job_m_in.m_lc_vfs_readwrite.cum_io != 0)
+	return(EINVAL);
+
+  fd = job_m_in.m_lc_vfs_readwrite.fd;
+  buf = job_m_in.m_lc_vfs_readwrite.buf;
+  size = job_m_in.m_lc_vfs_readwrite.len;
 
   /* Is the file descriptor valid? */
-  if ( (rfilp = get_filp(fp->fp_fd, VNODE_READ)) == NULL)
+  if ( (rfilp = get_filp(fd, VNODE_READ)) == NULL)
 	return(err_code);
 
   if (!(rfilp->filp_mode & R_BIT))
@@ -287,8 +296,7 @@ int do_getdents(void)
 
   if (r == OK) {
 	r = req_getdents(rfilp->filp_vno->v_fs_e, rfilp->filp_vno->v_inode_nr,
-			 rfilp->filp_pos, fp->fp_io_buffer, fp->fp_io_nbytes,
-			 &new_pos, 0);
+	    rfilp->filp_pos, buf, size, &new_pos, 0);
 
 	if (r > 0) rfilp->filp_pos = new_pos;
   }
@@ -301,15 +309,11 @@ int do_getdents(void)
 /*===========================================================================*
  *				rw_pipe					     *
  *===========================================================================*/
-int rw_pipe(rw_flag, usr_e, f, buf, req_size)
-int rw_flag;			/* READING or WRITING */
-endpoint_t usr_e;
-struct filp *f;
-vir_bytes buf;
-size_t req_size;
+int rw_pipe(int rw_flag, endpoint_t usr_e, struct filp *f, int callnr, int fd,
+	vir_bytes buf, size_t nbytes, size_t cum_io)
 {
-  int r, oflags, partial_pipe = 0;
-  size_t size, cum_io;
+  int r, oflags, partial_pipe = FALSE;
+  size_t size;
   size_t cum_io_incr;
   struct vnode *vp;
   off_t  position, new_pos;
@@ -324,19 +328,11 @@ size_t req_size;
 
   assert(rw_flag == READING || rw_flag == WRITING);
 
-  /* fp->fp_cum_io_partial is only nonzero when doing partial writes.
-   * We clear the field immediately here because we expect completion or error;
-   * its value must be (re)assigned if we end up suspending the write (again).
-   */
-  cum_io = fp->fp_cum_io_partial;
-  fp->fp_cum_io_partial = 0;
-
-  r = pipe_check(f, rw_flag, oflags, req_size, 0);
+  r = pipe_check(f, rw_flag, oflags, nbytes, 0);
   if (r <= 0) {
-	if (r == SUSPEND) {
-		fp->fp_cum_io_partial = cum_io;
-		pipe_suspend(f, buf, req_size);
-	}
+	if (r == SUSPEND)
+		pipe_suspend(callnr, fd, buf, nbytes, cum_io);
+
 	/* If pipe_check returns an error instead of suspending the call, we
 	 * return that error, even if we are resuming a partially completed
 	 * operation (ie, a large blocking write), to match NetBSD's behavior.
@@ -345,7 +341,7 @@ size_t req_size;
   }
 
   size = r;
-  if (size < req_size) partial_pipe = 1;
+  if (size < nbytes) partial_pipe = TRUE;
 
   /* Truncate read request at size. */
   if (rw_flag == READING && size > vp->v_size) {
@@ -365,7 +361,7 @@ size_t req_size;
 
   cum_io += cum_io_incr;
   buf += cum_io_incr;
-  req_size -= cum_io_incr;
+  nbytes -= cum_io_incr;
 
   if (rw_flag == READING)
 	vp->v_size -= cum_io_incr;
@@ -376,16 +372,11 @@ size_t req_size;
 	/* partial write on pipe with */
 	/* O_NONBLOCK, return write count */
 	if (!(oflags & O_NONBLOCK)) {
-		/* partial write on pipe with req_size > PIPE_BUF,
-		 * non-atomic
-		 */
-		fp->fp_cum_io_partial = cum_io;
-		pipe_suspend(f, buf, req_size);
+		/* partial write on pipe with nbytes > PIPE_BUF, non-atomic */
+		pipe_suspend(callnr, fd, buf, nbytes, cum_io);
 		return(SUSPEND);
 	}
   }
 
-  assert(fp->fp_cum_io_partial == 0);
-
   return(cum_io);
 }
diff --git a/minix/servers/vfs/write.c b/minix/servers/vfs/write.c
index 951e8ad25..eeb04b60d 100644
--- a/minix/servers/vfs/write.c
+++ b/minix/servers/vfs/write.c
@@ -15,6 +15,11 @@
 int do_write(void)
 {
 /* Perform the write(fd, buffer, nbytes) system call. */
+
+  /* See the comment in do_read(). */
+  if (job_m_in.m_lc_vfs_readwrite.cum_io != 0)
+	return(EINVAL);
+
   return(do_read_write_peek(WRITING, job_m_in.m_lc_vfs_readwrite.fd,
 	job_m_in.m_lc_vfs_readwrite.buf, job_m_in.m_lc_vfs_readwrite.len));
 }