From: David van Moolenbroek Date: Mon, 4 Jan 2016 18:42:13 +0000 (+0000) Subject: VFS: store process suspension state as union X-Git-Url: http://zhaoyanbai.com/repos/Bv9ARM.pdf?a=commitdiff_plain;h=232819dd499bd777897c6a4ee3a77009ce269db9;p=minix.git VFS: store process suspension state as union Previously, VFS would use various subsets of a number of fproc structure fields to store state when the process is blocked (suspended) for various reasons. As a result, there was a fair amount of abuse of fields, hidden state, and confusion as to which fields were used with which suspension states. Instead, the suspension state is now split into per-state structures, which are then stored in a union. Each of the union's structures should be accessed only right before, during, and right after the fp_blocked_on field is set to the corresponding blocking type. As a result, it is now very clear which fields are in use at which times, and we even save a bit of memory as a side effect. Change-Id: I5c24e353b6cb0c32eb41c70f89c5cfb23f6c93df --- diff --git a/minix/include/minix/ipc.h b/minix/include/minix/ipc.h index 721c3cc86..5e1f4b985 100644 --- a/minix/include/minix/ipc.h +++ b/minix/include/minix/ipc.h @@ -795,8 +795,9 @@ typedef struct { int fd; vir_bytes buf; size_t len; + size_t cum_io; /* reserved/internal, set to 0 */ - uint8_t padding[44]; + uint8_t padding[40]; } mess_lc_vfs_readwrite; _ASSERT_MSG_SIZE(mess_lc_vfs_readwrite); diff --git a/minix/lib/libc/sys/getdents.c b/minix/lib/libc/sys/getdents.c index bda030593..175c615fb 100644 --- a/minix/lib/libc/sys/getdents.c +++ b/minix/lib/libc/sys/getdents.c @@ -13,6 +13,7 @@ ssize_t getdents(int fd, char *buffer, size_t nbytes) m.m_lc_vfs_readwrite.fd = fd; m.m_lc_vfs_readwrite.len = nbytes; m.m_lc_vfs_readwrite.buf = (vir_bytes)buffer; + m.m_lc_vfs_readwrite.cum_io = 0; return _syscall(VFS_PROC_NR, VFS_GETDENTS, &m); } diff --git a/minix/lib/libc/sys/read.c b/minix/lib/libc/sys/read.c index b8eeb72be..e23f4eb36 100644 --- a/minix/lib/libc/sys/read.c +++ b/minix/lib/libc/sys/read.c @@ -17,5 +17,6 @@ ssize_t read(int fd, void *buffer, size_t nbytes) m.m_lc_vfs_readwrite.fd = fd; m.m_lc_vfs_readwrite.len = nbytes; m.m_lc_vfs_readwrite.buf = (vir_bytes)buffer; + m.m_lc_vfs_readwrite.cum_io = 0; return(_syscall(VFS_PROC_NR, VFS_READ, &m)); } diff --git a/minix/lib/libc/sys/write.c b/minix/lib/libc/sys/write.c index 3accd022f..5b40a1e30 100644 --- a/minix/lib/libc/sys/write.c +++ b/minix/lib/libc/sys/write.c @@ -13,6 +13,7 @@ ssize_t write(int fd, const void *buffer, size_t nbytes) m.m_lc_vfs_readwrite.fd = fd; m.m_lc_vfs_readwrite.len = nbytes; m.m_lc_vfs_readwrite.buf = (vir_bytes)buffer; + m.m_lc_vfs_readwrite.cum_io = 0; /* reserved for future use */ return(_syscall(VFS_PROC_NR, VFS_WRITE, &m)); } diff --git a/minix/servers/is/dmp_fs.c b/minix/servers/is/dmp_fs.c index e30489cc9..5a9ad8fb3 100644 --- a/minix/servers/is/dmp_fs.c +++ b/minix/servers/is/dmp_fs.c @@ -48,8 +48,8 @@ void fproc_dmp() !!(fp->fp_flags & FP_SESLDR), nfds, fp->fp_blocked_on, !!(fp->fp_flags & FP_REVIVED) ); - if (fp->fp_blocked_on == FP_BLOCKED_ON_OTHER) - printf("%4d\n", fp->fp_task); + if (fp->fp_blocked_on == FP_BLOCKED_ON_CDEV) + printf("%4d\n", fp->fp_cdev.endpt); else printf(" nil\n"); } diff --git a/minix/servers/mib/proc.c b/minix/servers/mib/proc.c index d2c63782b..60a901785 100644 --- a/minix/servers/mib/proc.c +++ b/minix/servers/mib/proc.c @@ -296,8 +296,8 @@ get_lwp_stat(int mslot, uint64_t * wcptr, char * wmptr, size_t wmsz, case FP_BLOCKED_ON_PIPE: wmesg = "pipe"; break; - case FP_BLOCKED_ON_LOCK: - wmesg = "lock"; + case FP_BLOCKED_ON_FLOCK: + wmesg = "flock"; break; case FP_BLOCKED_ON_POPEN: wmesg = "popen"; @@ -305,7 +305,7 @@ get_lwp_stat(int mslot, uint64_t * wcptr, char * wmptr, size_t wmsz, case FP_BLOCKED_ON_SELECT: wmesg = "select"; break; - case FP_BLOCKED_ON_OTHER: + case FP_BLOCKED_ON_CDEV: /* * Add the task (= character driver) endpoint to the * wchan value, and use the driver's process name, diff --git a/minix/servers/vfs/const.h b/minix/servers/vfs/const.h index 0cce3cf1b..4385350ed 100644 --- a/minix/servers/vfs/const.h +++ b/minix/servers/vfs/const.h @@ -17,11 +17,10 @@ #define FP_BLOCKED_ON_NONE 0 /* not blocked */ #define FP_BLOCKED_ON_PIPE 1 /* susp'd on pipe */ -#define FP_BLOCKED_ON_LOCK 2 /* susp'd on lock */ +#define FP_BLOCKED_ON_FLOCK 2 /* susp'd on file lock */ #define FP_BLOCKED_ON_POPEN 3 /* susp'd on pipe open */ #define FP_BLOCKED_ON_SELECT 4 /* susp'd on select */ -#define FP_BLOCKED_ON_OTHER 5 /* blocked on other process, check - fp_task to find out */ +#define FP_BLOCKED_ON_CDEV 5 /* blocked on character device I/O */ /* test if the process is blocked on something */ #define fp_is_blocked(fp) ((fp)->fp_blocked_on != FP_BLOCKED_ON_NONE) diff --git a/minix/servers/vfs/coredump.c b/minix/servers/vfs/coredump.c index 91ebf5d5f..1139798ac 100644 --- a/minix/servers/vfs/coredump.c +++ b/minix/servers/vfs/coredump.c @@ -175,7 +175,14 @@ static void adjust_offsets(Elf_Phdr phdrs[], int phnum) *===========================================================================*/ static void write_buf(struct filp *f, char *buf, size_t size) { - read_write(fp, WRITING, f, (vir_bytes)buf, size, VFS_PROC_NR); + /* + * TODO: pass in the proper file descriptor number. It really doesn't matter + * what we pass in, because the write target is a regular file. As such, the + * write call will never be suspended, and suspension is the only case that + * read_write() could use the file descriptor. Still, passing in an invalid + * value isn't exactly nice. + */ + read_write(fp, WRITING, -1 /*fd*/, f, (vir_bytes)buf, size, VFS_PROC_NR); } /*===========================================================================* diff --git a/minix/servers/vfs/device.c b/minix/servers/vfs/device.c index 54a1b0872..e9bec4292 100644 --- a/minix/servers/vfs/device.c +++ b/minix/servers/vfs/device.c @@ -32,7 +32,7 @@ #include "vnode.h" #include "vmnt.h" -static int cdev_opcl(int op, dev_t dev, int flags); +static int cdev_opcl(int op, int fd, dev_t dev, int flags); static int block_io(endpoint_t driver_e, message *mess_ptr); static cp_grant_id_t make_grant(endpoint_t driver_e, endpoint_t user_e, int op, vir_bytes buf, unsigned long size); @@ -318,9 +318,10 @@ int cdev_io( panic("VFS: asynsend in cdev_io failed: %d", r); /* Suspend the calling process until a reply arrives. */ - wait_for(dp->dmap_driver); - assert(!GRANT_VALID(fp->fp_grant)); - fp->fp_grant = gid; /* revoke this when unsuspended. */ + fp->fp_cdev.dev = dev; + fp->fp_cdev.endpt = dp->dmap_driver; + fp->fp_cdev.grant = gid; /* revoke this when unsuspended */ + suspend(FP_BLOCKED_ON_CDEV); return SUSPEND; } @@ -329,7 +330,7 @@ int cdev_io( /*===========================================================================* * cdev_clone * *===========================================================================*/ -static int cdev_clone(dev_t dev, devminor_t new_minor) +static int cdev_clone(int fd, dev_t dev, devminor_t new_minor) { /* A new minor device number has been returned. Request PFS to create a * temporary device file to hold it. @@ -338,6 +339,8 @@ static int cdev_clone(dev_t dev, devminor_t new_minor) struct node_details res; int r; + assert(fd != -1); + /* Device number of the new device. */ dev = makedev(major(dev), new_minor); @@ -345,21 +348,21 @@ static int cdev_clone(dev_t dev, devminor_t new_minor) r = req_newnode(PFS_PROC_NR, fp->fp_effuid, fp->fp_effgid, RWX_MODES | I_CHAR_SPECIAL, dev, &res); if (r != OK) { - (void) cdev_opcl(CDEV_CLOSE, dev, 0); + (void)cdev_opcl(CDEV_CLOSE, -1, dev, 0); return r; } /* Drop old node and use the new values */ if ((vp = get_free_vnode()) == NULL) { req_putnode(PFS_PROC_NR, res.inode_nr, 1); /* is this right? */ - (void) cdev_opcl(CDEV_CLOSE, dev, 0); + (void)cdev_opcl(CDEV_CLOSE, -1, dev, 0); return(err_code); } lock_vnode(vp, VNODE_OPCL); - assert(fp->fp_filp[fp->fp_fd] != NULL); - unlock_vnode(fp->fp_filp[fp->fp_fd]->filp_vno); - put_vnode(fp->fp_filp[fp->fp_fd]->filp_vno); + assert(fp->fp_filp[fd] != NULL); + unlock_vnode(fp->fp_filp[fd]->filp_vno); + put_vnode(fp->fp_filp[fd]->filp_vno); vp->v_fs_e = res.fs_e; vp->v_vmnt = NULL; @@ -370,7 +373,7 @@ static int cdev_clone(dev_t dev, devminor_t new_minor) vp->v_sdev = dev; vp->v_fs_count = 1; vp->v_ref_count = 1; - fp->fp_filp[fp->fp_fd]->filp_vno = vp; + fp->fp_filp[fd]->filp_vno = vp; return OK; } @@ -381,6 +384,7 @@ static int cdev_clone(dev_t dev, devminor_t new_minor) *===========================================================================*/ static int cdev_opcl( int op, /* operation, CDEV_OPEN or CDEV_CLOSE */ + int fd, /* file descriptor (open) or -1 (close) */ dev_t dev, /* device to open or close */ int flags /* mode bits and flags */ ) @@ -392,7 +396,14 @@ static int cdev_opcl( message dev_mess; int r, r2; + /* + * We need the a descriptor for CDEV_OPEN, because if the driver returns a + * cloned device, we need to replace what the fd points to. For CDEV_CLOSE + * however, we may be closing a device for which the calling process has no + * file descriptor, and thus we expect no meaningful fd value in that case. + */ assert(op == CDEV_OPEN || op == CDEV_CLOSE); + assert(fd != -1 || op == CDEV_CLOSE); /* Determine task dmap. */ if ((dp = cdev_get(dev, &minor_dev)) == NULL) @@ -439,7 +450,6 @@ static int cdev_opcl( panic("VFS: asynsend in cdev_opcl failed: %d", r); /* Block the thread waiting for a reply. */ - fp->fp_task = dp->dmap_driver; self->w_task = dp->dmap_driver; self->w_drv_sendrec = &dev_mess; @@ -460,7 +470,7 @@ static int cdev_opcl( */ if (r & CDEV_CLONED) { new_minor = r & ~(CDEV_CLONED | CDEV_CTTY); - if ((r2 = cdev_clone(dev, new_minor)) < 0) + if ((r2 = cdev_clone(fd, dev, new_minor)) < 0) return(r2); } @@ -481,11 +491,11 @@ static int cdev_opcl( /*===========================================================================* * cdev_open * *===========================================================================*/ -int cdev_open(dev_t dev, int flags) +int cdev_open(int fd, dev_t dev, int flags) { /* Open a character device. */ - return cdev_opcl(CDEV_OPEN, dev, flags); + return cdev_opcl(CDEV_OPEN, fd, dev, flags); } @@ -496,7 +506,7 @@ int cdev_close(dev_t dev) { /* Close a character device. */ - return cdev_opcl(CDEV_CLOSE, dev, 0); + return cdev_opcl(CDEV_CLOSE, -1, dev, 0); } @@ -507,17 +517,17 @@ int do_ioctl(void) { /* Perform the ioctl(2) system call. */ unsigned long ioctlrequest; - int r = OK; + int fd, r = OK; struct filp *f; register struct vnode *vp; dev_t dev; vir_bytes argx; - fp->fp_fd = job_m_in.m_lc_vfs_ioctl.fd; + fd = job_m_in.m_lc_vfs_ioctl.fd; ioctlrequest = job_m_in.m_lc_vfs_ioctl.req; argx = (vir_bytes)job_m_in.m_lc_vfs_ioctl.arg; - if ((f = get_filp(fp->fp_fd, VNODE_READ)) == NULL) + if ((f = get_filp(fd, VNODE_READ)) == NULL) return(err_code); vp = f->filp_vno; /* get vnode pointer */ if (!S_ISCHR(vp->v_mode) && !S_ISBLK(vp->v_mode)) { @@ -535,7 +545,7 @@ int do_ioctl(void) f->filp_ioctl_fp = NULL; } else r = cdev_io(CDEV_IOCTL, dev, who_e, argx, 0, ioctlrequest, - f->filp_flags); + f->filp_flags); } unlock_filp(f); @@ -583,7 +593,7 @@ int cdev_select(dev_t dev, int ops) /*===========================================================================* * cdev_cancel * *===========================================================================*/ -int cdev_cancel(dev_t dev) +int cdev_cancel(dev_t dev, endpoint_t endpt __unused, cp_grant_id_t grant) { /* Cancel an I/O request, blocking until it has been cancelled. */ devminor_t minor_dev; @@ -607,7 +617,6 @@ int cdev_cancel(dev_t dev) panic("VFS: asynsend in cdev_cancel failed: %d", r); /* Suspend this thread until we have received the response. */ - fp->fp_task = dp->dmap_driver; self->w_task = dp->dmap_driver; self->w_drv_sendrec = &dev_mess; @@ -616,12 +625,11 @@ int cdev_cancel(dev_t dev) self->w_task = NONE; assert(self->w_drv_sendrec == NULL); - /* Clean up and return the result (note: the request may have completed). */ - if (GRANT_VALID(fp->fp_grant)) { - (void) cpf_revoke(fp->fp_grant); - fp->fp_grant = GRANT_INVALID; - } + /* Clean up. */ + if (GRANT_VALID(grant)) + (void)cpf_revoke(grant); + /* Return the result (note: the request may have completed). */ r = dev_mess.m_lchardriver_vfs_reply.status; return (r == EAGAIN) ? EINTR : r; } @@ -771,8 +779,8 @@ static void cdev_generic_reply(message *m_ptr) *wp->w_drv_sendrec = *m_ptr; wp->w_drv_sendrec = NULL; worker_signal(wp); /* Continue open/close/cancel */ - } else if (rfp->fp_blocked_on != FP_BLOCKED_ON_OTHER || - rfp->fp_task != m_ptr->m_source) { + } else if (rfp->fp_blocked_on != FP_BLOCKED_ON_CDEV || + rfp->fp_cdev.endpt != m_ptr->m_source) { /* This would typically be caused by a protocol error, i.e. a driver * not properly following the character driver protocol rules. */ diff --git a/minix/servers/vfs/fproc.h b/minix/servers/vfs/fproc.h index 28bc56d3c..13c0cf61c 100644 --- a/minix/servers/vfs/fproc.h +++ b/minix/servers/vfs/fproc.h @@ -27,14 +27,29 @@ EXTERN struct fproc { dev_t fp_tty; /* major/minor of controlling tty */ int fp_blocked_on; /* what is it blocked on */ - int fp_block_callnr; /* blocked call if rd/wr can't finish */ - size_t fp_cum_io_partial; /* partial byte count if write can't finish */ - endpoint_t fp_task; /* which task is proc suspended on */ - cp_grant_id_t fp_grant; /* revoke this grant on unsuspend if > -1 */ - - int fp_fd; /* file descriptor for blocking call */ - vir_bytes fp_io_buffer; /* user buffer address for ongoing I/O */ - size_t fp_io_nbytes; /* number of bytes left for ongoing I/O */ + union ixfer_fp_u { /* state per blocking type */ + struct { /* FP_BLOCKED_ON_PIPE */ + int callnr; /* user call: VFS_READ or VFS_WRITE */ + int fd; /* file descriptor for blocking call */ + vir_bytes buf; /* user buffer address */ + size_t nbytes; /* number of bytes left */ + size_t cum_io; /* partial (write) result byte count */ + } u_pipe; + struct { /* FP_BLOCKED_ON_POPEN */ + int fd; /* file descriptor for blocking call */ + } u_popen; + struct { /* FP_BLOCKED_ON_FLOCK */ + int fd; /* file descriptor for blocking call */ + int cmd; /* fcntl command, always F_SETLKW */ + vir_bytes arg; /* user address of flock structure */ + } u_flock; + /* nothing for FP_BLOCKED_ON_SELECT for now */ + struct { /* FP_BLOCKED_ON_CDEV */ + dev_t dev; /* device number for blocking call */ + endpoint_t endpt; /* driver endpoint */ + cp_grant_id_t grant; /* data grant */ + } u_cdev; + } fp_u; uid_t fp_realuid; /* real user id */ uid_t fp_effuid; /* effective user id */ @@ -57,6 +72,12 @@ EXTERN struct fproc { #endif } fproc[NR_PROCS]; +/* Shortcuts for block state union substructures. */ +#define fp_pipe fp_u.u_pipe +#define fp_popen fp_u.u_popen +#define fp_flock fp_u.u_flock +#define fp_cdev fp_u.u_cdev + /* fp_flags */ #define FP_NOFLAGS 0000 #define FP_SRV_PROC 0001 /* Set if process is a service */ diff --git a/minix/servers/vfs/link.c b/minix/servers/vfs/link.c index 9d699a03c..ea6124c39 100644 --- a/minix/servers/vfs/link.c +++ b/minix/servers/vfs/link.c @@ -329,16 +329,16 @@ int do_ftruncate(void) /* As with do_truncate(), truncate_vnode() does the actual work. */ struct filp *rfilp; struct vnode *vp; - int r; + int r, fd; off_t length; - fp->fp_fd = job_m_in.m_lc_vfs_truncate.fd; + fd = job_m_in.m_lc_vfs_truncate.fd; length = job_m_in.m_lc_vfs_truncate.offset; if (length < 0) return(EINVAL); /* File is already opened; get a vnode pointer from filp */ - if ((rfilp = get_filp(fp->fp_fd, VNODE_WRITE)) == NULL) + if ((rfilp = get_filp(fd, VNODE_WRITE)) == NULL) return(err_code); vp = rfilp->filp_vno; diff --git a/minix/servers/vfs/lock.c b/minix/servers/vfs/lock.c index b7aefda3a..8cf341934 100644 --- a/minix/servers/vfs/lock.c +++ b/minix/servers/vfs/lock.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "file.h" #include "lock.h" #include "vnode.h" @@ -17,21 +18,24 @@ /*===========================================================================* * lock_op * *===========================================================================*/ -int lock_op(f, req) -struct filp *f; -int req; /* either F_SETLK or F_SETLKW */ +int lock_op(int fd, int req, vir_bytes arg) { /* Perform the advisory locking required by POSIX. */ - int r, ltype, i, conflict = 0, unlocking = 0; mode_t mo; off_t first, last; + struct filp *f; struct flock flock; struct file_lock *flp, *flp2, *empty; + assert(req == F_GETLK || req == F_SETLK || req == F_SETLKW); + + f = fp->fp_filp[fd]; + assert(f != NULL); + /* Fetch the flock structure from user space. */ - r = sys_datacopy_wrapper(who_e, fp->fp_io_buffer, VFS_PROC_NR, - (vir_bytes) &flock, sizeof(flock)); + r = sys_datacopy_wrapper(who_e, arg, VFS_PROC_NR, (vir_bytes)&flock, + sizeof(flock)); if (r != OK) return(EINVAL); /* Make some error checks. */ @@ -86,7 +90,10 @@ int req; /* either F_SETLK or F_SETLKW */ return(EAGAIN); } else { /* For F_SETLKW, suspend the process. */ - suspend(FP_BLOCKED_ON_LOCK); + fp->fp_flock.fd = fd; + fp->fp_flock.cmd = req; + fp->fp_flock.arg = arg; + suspend(FP_BLOCKED_ON_FLOCK); return(SUSPEND); } } @@ -140,8 +147,8 @@ int req; /* either F_SETLK or F_SETLKW */ } /* Copy the flock structure back to the caller. */ - r = sys_datacopy_wrapper(VFS_PROC_NR, (vir_bytes) &flock, who_e, - fp->fp_io_buffer, sizeof(flock)); + r = sys_datacopy_wrapper(VFS_PROC_NR, (vir_bytes)&flock, who_e, arg, + sizeof(flock)); return(r); } @@ -177,7 +184,7 @@ void lock_revive() for (fptr = &fproc[0]; fptr < &fproc[NR_PROCS]; fptr++){ if (fptr->fp_pid == PID_FREE) continue; - if (fptr->fp_blocked_on == FP_BLOCKED_ON_LOCK) { + if (fptr->fp_blocked_on == FP_BLOCKED_ON_FLOCK) { revive(fptr->fp_endpoint, 0); } } diff --git a/minix/servers/vfs/main.c b/minix/servers/vfs/main.c index 9c70e952e..b690ffd33 100644 --- a/minix/servers/vfs/main.c +++ b/minix/servers/vfs/main.c @@ -213,18 +213,32 @@ static void do_reply(struct worker_thread *wp) *===========================================================================*/ static void do_pending_pipe(void) { - int r, op; + vir_bytes buf; + size_t nbytes, cum_io; + int r, op, fd; struct filp *f; tll_access_t locktype; - f = fp->fp_filp[fp->fp_fd]; + assert(fp->fp_blocked_on == FP_BLOCKED_ON_NONE); + + /* + * We take all our needed resumption state from the m_in message, which is + * filled by unblock(). Since this is an internal resumption, there is no + * need to perform extensive checks on the message fields. + */ + fd = job_m_in.m_lc_vfs_readwrite.fd; + buf = job_m_in.m_lc_vfs_readwrite.buf; + nbytes = job_m_in.m_lc_vfs_readwrite.len; + cum_io = job_m_in.m_lc_vfs_readwrite.cum_io; + + f = fp->fp_filp[fd]; assert(f != NULL); locktype = (job_call_nr == VFS_READ) ? VNODE_READ : VNODE_WRITE; op = (job_call_nr == VFS_READ) ? READING : WRITING; lock_filp(f, locktype); - r = rw_pipe(op, who_e, f, fp->fp_io_buffer, fp->fp_io_nbytes); + r = rw_pipe(op, who_e, f, job_call_nr, fd, buf, nbytes, cum_io); if (r != SUSPEND) { /* Do we have results to report? */ /* Process is writing, but there is no reader. Send a SIGPIPE signal. @@ -409,7 +423,6 @@ static int sef_cb_init_fresh(int UNUSED(type), sef_init_info_t *info) rfp->fp_flags = FP_NOFLAGS; rfp->fp_pid = mess.VFS_PM_PID; rfp->fp_endpoint = mess.VFS_PM_ENDPT; - rfp->fp_grant = GRANT_INVALID; rfp->fp_blocked_on = FP_BLOCKED_ON_NONE; rfp->fp_realuid = (uid_t) SYS_UID; rfp->fp_effuid = (uid_t) SYS_UID; @@ -917,24 +930,25 @@ struct fproc *rfp; /* Reconstruct the original request from the saved data. */ memset(&m_in, 0, sizeof(m_in)); m_in.m_source = rfp->fp_endpoint; - m_in.m_type = rfp->fp_block_callnr; - switch (m_in.m_type) { - case VFS_READ: - case VFS_WRITE: - assert(blocked_on == FP_BLOCKED_ON_PIPE); - m_in.m_lc_vfs_readwrite.fd = rfp->fp_fd; - m_in.m_lc_vfs_readwrite.buf = rfp->fp_io_buffer; - m_in.m_lc_vfs_readwrite.len = rfp->fp_io_nbytes; + switch (blocked_on) { + case FP_BLOCKED_ON_PIPE: + assert(rfp->fp_pipe.callnr == VFS_READ || + rfp->fp_pipe.callnr == VFS_WRITE); + m_in.m_type = rfp->fp_pipe.callnr; + m_in.m_lc_vfs_readwrite.fd = rfp->fp_pipe.fd; + m_in.m_lc_vfs_readwrite.buf = rfp->fp_pipe.buf; + m_in.m_lc_vfs_readwrite.len = rfp->fp_pipe.nbytes; + m_in.m_lc_vfs_readwrite.cum_io = rfp->fp_pipe.cum_io; break; - case VFS_FCNTL: - assert(blocked_on == FP_BLOCKED_ON_LOCK); - m_in.m_lc_vfs_fcntl.fd = rfp->fp_fd; - m_in.m_lc_vfs_fcntl.cmd = rfp->fp_io_nbytes; - m_in.m_lc_vfs_fcntl.arg_ptr = rfp->fp_io_buffer; - assert(m_in.m_lc_vfs_fcntl.cmd == F_SETLKW); + case FP_BLOCKED_ON_FLOCK: + assert(rfp->fp_flock.cmd == F_SETLKW); + m_in.m_type = VFS_FCNTL; + m_in.m_lc_vfs_fcntl.fd = rfp->fp_flock.fd; + m_in.m_lc_vfs_fcntl.cmd = rfp->fp_flock.cmd; + m_in.m_lc_vfs_fcntl.arg_ptr = rfp->fp_flock.arg; break; default: - panic("unblocking call %d blocked on %d ??", m_in.m_type, blocked_on); + panic("unblocking call blocked on %d ??", blocked_on); } rfp->fp_blocked_on = FP_BLOCKED_ON_NONE; /* no longer blocked */ @@ -942,9 +956,6 @@ struct fproc *rfp; reviving--; assert(reviving >= 0); - /* This should not be device I/O. If it is, it'll 'leak' grants. */ - assert(!GRANT_VALID(rfp->fp_grant)); - /* Pending pipe reads/writes cannot be repeated as is, and thus require a * special resumption procedure. */ diff --git a/minix/servers/vfs/misc.c b/minix/servers/vfs/misc.c index d02b99534..d7e30bb94 100644 --- a/minix/servers/vfs/misc.c +++ b/minix/servers/vfs/misc.c @@ -83,7 +83,10 @@ int do_getsysinfo(void) for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++, rfpl++) { rfpl->fpl_tty = rfp->fp_tty; rfpl->fpl_blocked_on = rfp->fp_blocked_on; - rfpl->fpl_task = rfp->fp_task; + if (rfp->fp_blocked_on == FP_BLOCKED_ON_CDEV) + rfpl->fpl_task = rfp->fp_cdev.endpt; + else + rfpl->fpl_task = NONE; } src_addr = (vir_bytes) fproc_light; len = sizeof(fproc_light); @@ -110,20 +113,19 @@ int do_getsysinfo(void) int do_fcntl(void) { /* Perform the fcntl(fd, cmd, ...) system call. */ - - register struct filp *f; - int new_fd, fl, r = OK, fcntl_req, fcntl_argx; + struct filp *f; + int fd, new_fd, fl, r = OK, fcntl_req, fcntl_argx; + vir_bytes addr; tll_access_t locktype; - fp->fp_fd = job_m_in.m_lc_vfs_fcntl.fd; - fp->fp_io_buffer = job_m_in.m_lc_vfs_fcntl.arg_ptr; - fp->fp_io_nbytes = job_m_in.m_lc_vfs_fcntl.cmd; + fd = job_m_in.m_lc_vfs_fcntl.fd; fcntl_req = job_m_in.m_lc_vfs_fcntl.cmd; fcntl_argx = job_m_in.m_lc_vfs_fcntl.arg_int; + addr = job_m_in.m_lc_vfs_fcntl.arg_ptr; /* Is the file descriptor valid? */ locktype = (fcntl_req == F_FREESP) ? VNODE_WRITE : VNODE_READ; - if ((f = get_filp(fp->fp_fd, locktype)) == NULL) + if ((f = get_filp(fd, locktype)) == NULL) return(err_code); switch (fcntl_req) { @@ -144,16 +146,16 @@ int do_fcntl(void) case F_GETFD: /* Get close-on-exec flag (FD_CLOEXEC in POSIX Table 6-2). */ r = 0; - if (FD_ISSET(fp->fp_fd, &fp->fp_cloexec_set)) + if (FD_ISSET(fd, &fp->fp_cloexec_set)) r = FD_CLOEXEC; break; case F_SETFD: /* Set close-on-exec flag (FD_CLOEXEC in POSIX Table 6-2). */ if (fcntl_argx & FD_CLOEXEC) - FD_SET(fp->fp_fd, &fp->fp_cloexec_set); + FD_SET(fd, &fp->fp_cloexec_set); else - FD_CLR(fp->fp_fd, &fp->fp_cloexec_set); + FD_CLR(fd, &fp->fp_cloexec_set); break; case F_GETFL: @@ -172,7 +174,7 @@ int do_fcntl(void) case F_SETLK: case F_SETLKW: /* Set or clear a file lock. */ - r = lock_op(f, fcntl_req); + r = lock_op(fd, fcntl_req, addr); break; case F_FREESP: @@ -186,8 +188,8 @@ int do_fcntl(void) else if (!(f->filp_mode & W_BIT)) r = EBADF; else { /* Copy flock data from userspace. */ - r = sys_datacopy_wrapper(who_e, fp->fp_io_buffer, - SELF, (vir_bytes) &flock_arg, sizeof(flock_arg)); + r = sys_datacopy_wrapper(who_e, addr, SELF, + (vir_bytes)&flock_arg, sizeof(flock_arg)); } if (r != OK) break; @@ -294,11 +296,11 @@ int do_fsync(void) struct filp *rfilp; struct vmnt *vmp; dev_t dev; - int r = OK; + int fd, r = OK; - fp->fp_fd = job_m_in.m_lc_vfs_fsync.fd; + fd = job_m_in.m_lc_vfs_fsync.fd; - if ((rfilp = get_filp(fp->fp_fd, VNODE_READ)) == NULL) + if ((rfilp = get_filp(fd, VNODE_READ)) == NULL) return(err_code); dev = rfilp->filp_vno->v_dev; @@ -574,7 +576,6 @@ void pm_fork(endpoint_t pproc, endpoint_t cproc, pid_t cpid) * The parent and child parameters tell who forked off whom. The file * system uses the same slot numbers as the kernel. Only PM makes this call. */ - struct fproc *cp, *pp; int i, parentno, childno; mutex_t c_fp_lock; @@ -609,16 +610,8 @@ void pm_fork(endpoint_t pproc, endpoint_t cproc, pid_t cpid) cp->fp_pid = cpid; cp->fp_endpoint = cproc; - /* A forking process never has an outstanding grant, as it isn't blocking on - * I/O. */ - if (GRANT_VALID(pp->fp_grant)) { - panic("VFS: fork: pp (endpoint %d) has grant %d\n", pp->fp_endpoint, - pp->fp_grant); - } - if (GRANT_VALID(cp->fp_grant)) { - panic("VFS: fork: cp (endpoint %d) has grant %d\n", cp->fp_endpoint, - cp->fp_grant); - } + /* A forking process cannot possibly be suspended on anything. */ + assert(pp->fp_blocked_on == FP_BLOCKED_ON_NONE); /* A child is not a process leader, not being revived, etc. */ cp->fp_flags = FP_NOFLAGS; @@ -907,10 +900,12 @@ int pm_dumpcore(int csig, vir_bytes exe_name) char core_path[PATH_MAX]; char proc_name[PROC_NAME_LEN]; - /* If a process is blocked, fp->fp_fd holds the fd it's blocked on. Free it - * up for use by common_open(). This step is the reason we cannot use this - * function to generate a core dump of a process while it is still running - * (i.e., without terminating it), as it changes the state of the process. + /* In effect, the coredump is generated through the use of calls as if made + * by the process itself. As such, the process must not be doing anything + * else. Therefore, if the process was blocked on anything, unblock it + * first. This step is the reason we cannot use this function to generate a + * core dump of a process while it is still running (i.e., without + * terminating it), as it changes the state of the process. */ if (fp_is_blocked(fp)) unpause(); diff --git a/minix/servers/vfs/open.c b/minix/servers/vfs/open.c index 7dc9e7222..4ace3b985 100644 --- a/minix/servers/vfs/open.c +++ b/minix/servers/vfs/open.c @@ -30,7 +30,7 @@ static char mode_map[] = {R_BIT, W_BIT, R_BIT|W_BIT, 0}; static struct vnode *new_node(struct lookup *resolve, int oflags, mode_t bits); -static int pipe_open(struct vnode *vp, mode_t bits, int oflags); +static int pipe_open(int fd, struct vnode *vp, mode_t bits, int oflags); /*===========================================================================* * do_open * @@ -92,14 +92,14 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode, int for_exec) struct vmnt *vmp; struct dmap *dp; struct lookup resolve; - int start = 0; + int fd, start = 0; /* Remap the bottom two bits of oflags. */ bits = (mode_t) mode_map[oflags & O_ACCMODE]; if (!bits) return(EINVAL); /* See if file descriptor and filp slots are available. */ - if ((r = get_fd(fp, start, bits, &fp->fp_fd, &filp)) != OK) + if ((r = get_fd(fp, start, bits, &fd, &filp)) != OK) return(r); lookup_init(&resolve, path, PATH_NOFLAGS, &vmp, &vp); @@ -130,12 +130,12 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode, int for_exec) } /* Claim the file descriptor and filp slot and fill them in. */ - fp->fp_filp[fp->fp_fd] = filp; + fp->fp_filp[fd] = filp; filp->filp_count = 1; filp->filp_vno = vp; filp->filp_flags = oflags; if (oflags & O_CLOEXEC) - FD_SET(fp->fp_fd, &fp->fp_cloexec_set); + FD_SET(fd, &fp->fp_cloexec_set); /* Only do the normal open code if we didn't just create the file. */ if (exist) { @@ -163,7 +163,7 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode, int for_exec) /* Invoke the driver for special processing. */ dev = vp->v_sdev; /* TTY needs to know about the O_NOCTTY flag. */ - r = cdev_open(dev, bits | (oflags & O_NOCTTY)); + r = cdev_open(fd, dev, bits | (oflags & O_NOCTTY)); vp = filp->filp_vno; /* Might be updated by * cdev_open after cloning */ break; @@ -233,7 +233,7 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode, int for_exec) filp->filp_flags = oflags; } if (r == OK) { - r = pipe_open(vp, bits, oflags); + r = pipe_open(fd, vp, bits, oflags); } if (r != ENXIO) { /* See if someone else is doing a rd or wt on @@ -244,7 +244,7 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode, int for_exec) filp->filp_count = 0; /* don't find self */ if ((filp2 = find_filp(vp, b)) != NULL) { /* Co-reader or writer found. Use it.*/ - fp->fp_filp[fp->fp_fd] = filp2; + fp->fp_filp[fd] = filp2; filp2->filp_count++; filp2->filp_vno = vp; filp2->filp_flags = oflags; @@ -280,13 +280,13 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode, int for_exec) /* If error, release inode. */ if (r != OK) { if (r != SUSPEND) { - fp->fp_filp[fp->fp_fd] = NULL; + fp->fp_filp[fd] = NULL; filp->filp_count = 0; filp->filp_vno = NULL; put_vnode(vp); } } else { - r = fp->fp_fd; + r = fd; } return(r); @@ -480,7 +480,7 @@ static struct vnode *new_node(struct lookup *resolve, int oflags, mode_t bits) /*===========================================================================* * pipe_open * *===========================================================================*/ -static int pipe_open(struct vnode *vp, mode_t bits, int oflags) +static int pipe_open(int fd, struct vnode *vp, mode_t bits, int oflags) { /* This function is called from common_open. It checks if * there is at least one reader/writer pair for the pipe, if not @@ -497,6 +497,7 @@ static int pipe_open(struct vnode *vp, mode_t bits, int oflags) if (bits & W_BIT) return(ENXIO); } else { /* Let's wait for the other side to show up */ + fp->fp_popen.fd = fd; suspend(FP_BLOCKED_ON_POPEN); return(SUSPEND); } diff --git a/minix/servers/vfs/pipe.c b/minix/servers/vfs/pipe.c index 795023fe1..0b50c838a 100644 --- a/minix/servers/vfs/pipe.c +++ b/minix/servers/vfs/pipe.c @@ -294,53 +294,37 @@ int notouch /* check only */ *===========================================================================*/ void suspend(int why) { -/* Take measures to suspend the processing of the present system call. - * Store the parameters to be used upon resuming in the process table. - * (Actually they are not used when a process is waiting for an I/O device, - * but they are needed for pipes, and it is not worth making the distinction.) - * The SUSPEND pseudo error should be returned after calling suspend(). +/* Take measures to suspend the processing of the present system call. The + * caller must store the parameters to be used upon resuming in the process + * table as appropriate. The SUSPEND pseudo error should be returned after + * calling suspend(). */ + assert(fp->fp_blocked_on == FP_BLOCKED_ON_NONE); + if (why == FP_BLOCKED_ON_POPEN || why == FP_BLOCKED_ON_PIPE) /* #procs susp'ed on pipe*/ susp_count++; fp->fp_blocked_on = why; - assert(fp->fp_grant == GRANT_INVALID || !GRANT_VALID(fp->fp_grant)); - fp->fp_block_callnr = job_call_nr; -} - -/*===========================================================================* - * wait_for * - *===========================================================================*/ -void wait_for(endpoint_t who) -{ - if(who == NONE || who == ANY) - panic("suspend on NONE or ANY"); - suspend(FP_BLOCKED_ON_OTHER); - fp->fp_task = who; } /*===========================================================================* * pipe_suspend * *===========================================================================*/ -void pipe_suspend(struct filp * filp __unused, vir_bytes buf, size_t size) +void pipe_suspend(int callnr, int fd, vir_bytes buf, size_t size, + size_t cum_io) { /* Take measures to suspend the processing of the present system call. * Store the parameters to be used upon resuming in the process table. */ - /* We can only get here through an I/O call, which comes with a file - * descriptor, and that file descriptor must therefore correspond to the - * target file pointer of the I/O request. The process is blocked on the I/O - * call, and thus, the file descriptor will remain valid. Therefore, we can, - * and will, use the file descriptor to get the file pointer again later. - */ - assert(fp->fp_filp[fp->fp_fd] == filp); - - fp->fp_io_buffer = buf; - fp->fp_io_nbytes = size; + fp->fp_pipe.callnr = callnr; + fp->fp_pipe.fd = fd; + fp->fp_pipe.buf = buf; + fp->fp_pipe.nbytes = size; + fp->fp_pipe.cum_io = cum_io; suspend(FP_BLOCKED_ON_PIPE); } @@ -350,14 +334,15 @@ void pipe_suspend(struct filp * filp __unused, vir_bytes buf, size_t size) *===========================================================================*/ void unsuspend_by_endpt(endpoint_t proc_e) { -/* Revive processes waiting for drivers (SUSPENDed) that have disappeared with - * return code EAGAIN. +/* Revive processes waiting for drivers (SUSPENDed) that have disappeared, with + * return code EIO. */ struct fproc *rp; for (rp = &fproc[0]; rp < &fproc[NR_PROCS]; rp++) { if (rp->fp_pid == PID_FREE) continue; - if (rp->fp_blocked_on == FP_BLOCKED_ON_OTHER && rp->fp_task == proc_e) + if (rp->fp_blocked_on == FP_BLOCKED_ON_CDEV && + rp->fp_cdev.endpt == proc_e) revive(rp->fp_endpoint, EIO); } @@ -371,18 +356,18 @@ void unsuspend_by_endpt(endpoint_t proc_e) /*===========================================================================* * release * *===========================================================================*/ -void release(vp, op, count) -register struct vnode *vp; /* inode of pipe */ -int op; /* VFS_READ, VFS_WRITE, or VFS_OPEN */ -int count; /* max number of processes to release */ +void release(struct vnode * vp, int op, int count) { -/* Check to see if any process is hanging on vnode 'vp'. If one is, and it - * was trying to perform the call indicated by 'op', release it. +/* Check to see if any process is hanging on pipe vnode 'vp'. If one is, and it + * was trying to perform the call indicated by 'op' - one of VFS_OPEN, + * VFS_READ, or VFS_WRITE - release it. The 'count' parameter indicates the + * maximum number of processes to release, which allows us to stop searching + * early in some cases. */ register struct fproc *rp; struct filp *f; - int selop; + int fd, selop; /* Trying to perform the call also includes SELECTing on it with that * operation. @@ -406,22 +391,27 @@ int count; /* max number of processes to release */ /* Search the proc table. */ for (rp = &fproc[0]; rp < &fproc[NR_PROCS] && count > 0; rp++) { + /* Just to make sure: + * - FP_BLOCKED_ON_POPEN implies the original request was VFS_OPEN; + * - FP_BLOCKED_ON_PIPE may be the result of VFS_READ and VFS_WRITE, + * and one of those two numbers is stored in fp_pipe.callnr. + */ if (rp->fp_pid != PID_FREE && fp_is_blocked(rp) && - !(rp->fp_flags & FP_REVIVED) && rp->fp_block_callnr == op) { + !(rp->fp_flags & FP_REVIVED) && + ((op == VFS_OPEN && rp->fp_blocked_on == FP_BLOCKED_ON_POPEN) || + (op != VFS_OPEN && rp->fp_blocked_on == FP_BLOCKED_ON_PIPE && + op == rp->fp_pipe.callnr))) { /* Find the vnode. Depending on the reason the process was * suspended, there are different ways of finding it. */ - - if (rp->fp_blocked_on == FP_BLOCKED_ON_POPEN || - rp->fp_blocked_on == FP_BLOCKED_ON_PIPE || - rp->fp_blocked_on == FP_BLOCKED_ON_LOCK || - rp->fp_blocked_on == FP_BLOCKED_ON_OTHER) { - f = rp->fp_filp[rp->fp_fd]; - if (f == NULL || f->filp_mode == FILP_CLOSED) - continue; - if (f->filp_vno != vp) - continue; - } else + if (rp->fp_blocked_on == FP_BLOCKED_ON_POPEN) + fd = rp->fp_popen.fd; + else + fd = rp->fp_pipe.fd; + f = rp->fp_filp[fd]; + if (f == NULL || f->filp_mode == FILP_CLOSED) + continue; + if (f->filp_vno != vp) continue; /* We found the vnode. Revive process. */ @@ -442,53 +432,44 @@ void revive(endpoint_t proc_e, int returned) { /* Revive a previously blocked process. When a process hangs on tty, this * is the way it is eventually released. For processes blocked on _SELECT and - * _OTHER, this function MUST NOT block its calling thread. + * _CDEV, this function MUST NOT block its calling thread. */ struct fproc *rfp; int blocked_on; - int fd_nr, slot; + int slot; if (proc_e == NONE || isokendpt(proc_e, &slot) != OK) return; rfp = &fproc[slot]; if (!fp_is_blocked(rfp) || (rfp->fp_flags & FP_REVIVED)) return; - /* The 'reviving' flag only applies to pipes. Processes waiting for TTY get - * a message right away. The revival process is different for TTY and pipes. - * For select and TTY revival, the work is already done, for pipes it is not: - * the proc must be restarted so it can try again. + /* The 'reviving' flag applies to pipe I/O and file locks. Processes waiting + * on those suspension types need more processing, and will be unblocked from + * the main loop later. Processes suspended for other reasons get a reply + * right away, and as such, have their suspension cleared right here as well. */ blocked_on = rfp->fp_blocked_on; - fd_nr = rfp->fp_fd; - if (blocked_on == FP_BLOCKED_ON_PIPE || blocked_on == FP_BLOCKED_ON_LOCK) { + if (blocked_on == FP_BLOCKED_ON_PIPE || blocked_on == FP_BLOCKED_ON_FLOCK) { /* Revive a process suspended on a pipe or lock. */ rfp->fp_flags |= FP_REVIVED; reviving++; /* process was waiting on pipe or lock */ } else { rfp->fp_blocked_on = FP_BLOCKED_ON_NONE; - /* TODO: we could reset rfp->fp_fd to (e.g.) -1 here, but since its - * value is not always bounds checked elsewhere, this might do more - * harm than good right now. - */ if (blocked_on == FP_BLOCKED_ON_POPEN) { /* process blocked in open or create */ - replycode(proc_e, fd_nr); + replycode(proc_e, rfp->fp_popen.fd); } else if (blocked_on == FP_BLOCKED_ON_SELECT) { replycode(proc_e, returned); } else { - /* Revive a process suspended on TTY or other device. - * Pretend it wants only what there is. - */ - rfp->fp_io_nbytes = returned; + assert(blocked_on == FP_BLOCKED_ON_CDEV); /* If a grant has been issued by FS for this I/O, revoke * it again now that I/O is done. */ - if (GRANT_VALID(rfp->fp_grant)) { - if(cpf_revoke(rfp->fp_grant) == -1) { + if (GRANT_VALID(rfp->fp_cdev.grant)) { + if (cpf_revoke(rfp->fp_cdev.grant) == -1) { panic("VFS: revoke failed for grant: %d", - rfp->fp_grant); + rfp->fp_cdev.grant); } - rfp->fp_grant = GRANT_INVALID; } replycode(proc_e, returned);/* unblock the process */ } @@ -504,9 +485,7 @@ void unpause(void) /* A signal has been sent to a user who is paused on the file system. * Abort the system call with the EINTR error message. */ - int blocked_on, fild, status = EINTR; - struct filp *f; - dev_t dev; + int blocked_on, status = EINTR; int wasreviving = 0; if (!fp_is_blocked(fp)) return; @@ -527,17 +506,13 @@ void unpause(void) switch (blocked_on) { case FP_BLOCKED_ON_PIPE:/* process trying to read or write a pipe */ /* If the operation succeeded partially, return the bytes - * processed so far, and clear the remembered state. Otherwise, - * return EINTR as usual. + * processed so far. Otherwise, return EINTR as usual. */ - if (fp->fp_cum_io_partial > 0) { - status = fp->fp_cum_io_partial; - - fp->fp_cum_io_partial = 0; - } + if (fp->fp_pipe.cum_io > 0) + status = fp->fp_pipe.cum_io; break; - case FP_BLOCKED_ON_LOCK:/* process trying to set a lock with FCNTL */ + case FP_BLOCKED_ON_FLOCK:/* process trying to set a lock with FCNTL */ break; case FP_BLOCKED_ON_SELECT:/* process blocking on select() */ @@ -547,19 +522,9 @@ void unpause(void) case FP_BLOCKED_ON_POPEN: /* process trying to open a fifo */ break; - case FP_BLOCKED_ON_OTHER:/* process trying to do device I/O (e.g. tty)*/ - fild = fp->fp_fd; - if (fild < 0 || fild >= OPEN_MAX) - panic("file descriptor out-of-range"); - f = fp->fp_filp[fild]; - if(!f) { - sys_diagctl_stacktrace(fp->fp_endpoint); - panic("process %d blocked on empty fd %d", - fp->fp_endpoint, fild); - } - dev = f->filp_vno->v_sdev; /* device hung on */ - - status = cdev_cancel(dev); + case FP_BLOCKED_ON_CDEV: /* process blocked on character device I/O */ + status = cdev_cancel(fp->fp_cdev.dev, fp->fp_cdev.endpt, + fp->fp_cdev.grant); break; default : diff --git a/minix/servers/vfs/proto.h b/minix/servers/vfs/proto.h index d031282d4..f077a251d 100644 --- a/minix/servers/vfs/proto.h +++ b/minix/servers/vfs/proto.h @@ -30,13 +30,13 @@ void send_work(void); int vm_vfs_procctl_handlemem(endpoint_t ep, vir_bytes mem, vir_bytes len, int flags); /* device.c */ -int cdev_open(dev_t dev, int flags); +int cdev_open(int fd, dev_t dev, int flags); int cdev_close(dev_t dev); int cdev_io(int op, dev_t dev, endpoint_t proc_e, vir_bytes buf, off_t pos, unsigned long bytes, int flags); dev_t cdev_map(dev_t dev, struct fproc *rfp); int cdev_select(dev_t dev, int ops); -int cdev_cancel(dev_t dev); +int cdev_cancel(dev_t dev, endpoint_t endpt, cp_grant_id_t grant); void cdev_reply(void); int bdev_open(dev_t dev, int access); int bdev_close(dev_t dev); @@ -94,7 +94,7 @@ int truncate_vnode(struct vnode *vp, off_t newsize); int rdlink_direct(char *orig_path, char *link_path, struct fproc *rfp); /* lock.c */ -int lock_op(struct filp *f, int req); +int lock_op(int fd, int req, vir_bytes arg); void lock_revive(void); /* main.c */ @@ -169,9 +169,9 @@ int pipe_check(struct filp *filp, int rw_flag, int oflags, int bytes, void release(struct vnode *vp, int op, int count); void revive(endpoint_t proc_e, int returned); void suspend(int why); -void pipe_suspend(struct filp *rfilp, vir_bytes buf, size_t size); +void pipe_suspend(int callnr, int fd, vir_bytes buf, size_t size, + size_t cum_io); void unsuspend_by_endpt(endpoint_t proc_e); -void wait_for(endpoint_t proc_e); /* protect.c */ int do_access(void); @@ -189,12 +189,12 @@ void lock_bsf(void); void unlock_bsf(void); void check_bsf_lock(void); int do_read_write_peek(int rw_flag, int fd, vir_bytes buf, size_t bytes); -int actual_read_write_peek(struct fproc *rfp, int rw_flag, int fd, vir_bytes buf, - size_t bytes); -int read_write(struct fproc *rfp, int rw_flag, struct filp *f, vir_bytes buffer, - size_t nbytes, endpoint_t for_e); -int rw_pipe(int rw_flag, endpoint_t usr, struct filp *f, vir_bytes buf, - size_t req_size); +int actual_read_write_peek(struct fproc *rfp, int rw_flag, int fd, + vir_bytes buf, size_t bytes); +int read_write(struct fproc *rfp, int rw_flag, int fd, struct filp *f, + vir_bytes buffer, size_t nbytes, endpoint_t for_e); +int rw_pipe(int rw_flag, endpoint_t usr, struct filp *f, int callnr, int fd, + vir_bytes buf, size_t nbytes, size_t cum_io); /* request.c */ int req_breadwrite(endpoint_t fs_e, endpoint_t user_e, dev_t dev, off_t pos, diff --git a/minix/servers/vfs/read.c b/minix/servers/vfs/read.c index 5f0a5e783..c28831e6e 100644 --- a/minix/servers/vfs/read.c +++ b/minix/servers/vfs/read.c @@ -29,8 +29,17 @@ *===========================================================================*/ int do_read(void) { + + /* + * This field is currently reserved for internal usage only, and must be set + * to zero by the caller. We may use it for future SA_RESTART support just + * like we are using it internally now. + */ + if (job_m_in.m_lc_vfs_readwrite.cum_io != 0) + return(EINVAL); + return(do_read_write_peek(READING, job_m_in.m_lc_vfs_readwrite.fd, - job_m_in.m_lc_vfs_readwrite.buf, job_m_in.m_lc_vfs_readwrite.len)); + job_m_in.m_lc_vfs_readwrite.buf, job_m_in.m_lc_vfs_readwrite.len)); } @@ -80,8 +89,8 @@ void check_bsf_lock(void) /*===========================================================================* * actual_read_write_peek * *===========================================================================*/ -int actual_read_write_peek(struct fproc *rfp, int rw_flag, int io_fd, - vir_bytes io_buf, size_t io_nbytes) +int actual_read_write_peek(struct fproc *rfp, int rw_flag, int fd, + vir_bytes buf, size_t nbytes) { /* Perform read(fd, buffer, nbytes) or write(fd, buffer, nbytes) call. */ struct filp *f; @@ -91,12 +100,8 @@ int actual_read_write_peek(struct fproc *rfp, int rw_flag, int io_fd, if(rw_flag == WRITING) ro = 0; - rfp->fp_fd = io_fd; - rfp->fp_io_buffer = io_buf; - rfp->fp_io_nbytes = io_nbytes; - locktype = rw_flag == WRITING ? VNODE_WRITE : VNODE_READ; - if ((f = get_filp2(rfp, rfp->fp_fd, locktype)) == NULL) + if ((f = get_filp2(rfp, fd, locktype)) == NULL) return(err_code); assert(f->filp_count > 0); @@ -105,12 +110,12 @@ int actual_read_write_peek(struct fproc *rfp, int rw_flag, int io_fd, unlock_filp(f); return(EBADF); } - if (rfp->fp_io_nbytes == 0) { + if (nbytes == 0) { unlock_filp(f); return(0); /* so char special files need not check for 0*/ } - r = read_write(rfp, rw_flag, f, rfp->fp_io_buffer, rfp->fp_io_nbytes, who_e); + r = read_write(rfp, rw_flag, fd, f, buf, nbytes, who_e); unlock_filp(f); return(r); @@ -119,15 +124,15 @@ int actual_read_write_peek(struct fproc *rfp, int rw_flag, int io_fd, /*===========================================================================* * do_read_write_peek * *===========================================================================*/ -int do_read_write_peek(int rw_flag, int io_fd, vir_bytes io_buf, size_t io_nbytes) +int do_read_write_peek(int rw_flag, int fd, vir_bytes buf, size_t nbytes) { - return actual_read_write_peek(fp, rw_flag, io_fd, io_buf, io_nbytes); + return actual_read_write_peek(fp, rw_flag, fd, buf, nbytes); } /*===========================================================================* * read_write * *===========================================================================*/ -int read_write(struct fproc *rfp, int rw_flag, struct filp *f, +int read_write(struct fproc *rfp, int rw_flag, int fd, struct filp *f, vir_bytes buf, size_t size, endpoint_t for_e) { register struct vnode *vp; @@ -146,17 +151,14 @@ int read_write(struct fproc *rfp, int rw_flag, struct filp *f, if (size > SSIZE_MAX) return(EINVAL); - op = (rw_flag == READING ? CDEV_READ : CDEV_WRITE); - if (S_ISFIFO(vp->v_mode)) { /* Pipes */ - if (rfp->fp_cum_io_partial != 0) { - panic("VFS: read_write: fp_cum_io_partial not clear"); - } if(rw_flag == PEEKING) { printf("read_write: peek on pipe makes no sense\n"); return EINVAL; } - r = rw_pipe(rw_flag, for_e, f, buf, size); + assert(fd != -1); + op = (rw_flag == READING ? VFS_READ : VFS_WRITE); + r = rw_pipe(rw_flag, for_e, f, op, fd, buf, size, 0 /*cum_io*/); } else if (S_ISCHR(vp->v_mode)) { /* Character special files. */ if(rw_flag == PEEKING) { printf("read_write: peek on char device makes no sense\n"); @@ -167,6 +169,7 @@ int read_write(struct fproc *rfp, int rw_flag, struct filp *f, panic("VFS: read_write tries to access char dev NO_DEV"); dev = vp->v_sdev; + op = (rw_flag == READING ? CDEV_READ : CDEV_WRITE); r = cdev_io(op, dev, for_e, buf, position, size, f->filp_flags); if (r >= 0) { @@ -268,16 +271,22 @@ int read_write(struct fproc *rfp, int rw_flag, struct filp *f, int do_getdents(void) { /* Perform the getdents(fd, buf, size) system call. */ - int r = OK; + int fd, r = OK; off_t new_pos; + vir_bytes buf; + size_t size; register struct filp *rfilp; - fp->fp_fd = job_m_in.m_lc_vfs_readwrite.fd; - fp->fp_io_buffer = job_m_in.m_lc_vfs_readwrite.buf; - fp->fp_io_nbytes = job_m_in.m_lc_vfs_readwrite.len; + /* This field must always be set to zero for getdents(). */ + if (job_m_in.m_lc_vfs_readwrite.cum_io != 0) + return(EINVAL); + + fd = job_m_in.m_lc_vfs_readwrite.fd; + buf = job_m_in.m_lc_vfs_readwrite.buf; + size = job_m_in.m_lc_vfs_readwrite.len; /* Is the file descriptor valid? */ - if ( (rfilp = get_filp(fp->fp_fd, VNODE_READ)) == NULL) + if ( (rfilp = get_filp(fd, VNODE_READ)) == NULL) return(err_code); if (!(rfilp->filp_mode & R_BIT)) @@ -287,8 +296,7 @@ int do_getdents(void) if (r == OK) { r = req_getdents(rfilp->filp_vno->v_fs_e, rfilp->filp_vno->v_inode_nr, - rfilp->filp_pos, fp->fp_io_buffer, fp->fp_io_nbytes, - &new_pos, 0); + rfilp->filp_pos, buf, size, &new_pos, 0); if (r > 0) rfilp->filp_pos = new_pos; } @@ -301,15 +309,11 @@ int do_getdents(void) /*===========================================================================* * rw_pipe * *===========================================================================*/ -int rw_pipe(rw_flag, usr_e, f, buf, req_size) -int rw_flag; /* READING or WRITING */ -endpoint_t usr_e; -struct filp *f; -vir_bytes buf; -size_t req_size; +int rw_pipe(int rw_flag, endpoint_t usr_e, struct filp *f, int callnr, int fd, + vir_bytes buf, size_t nbytes, size_t cum_io) { - int r, oflags, partial_pipe = 0; - size_t size, cum_io; + int r, oflags, partial_pipe = FALSE; + size_t size; size_t cum_io_incr; struct vnode *vp; off_t position, new_pos; @@ -324,19 +328,11 @@ size_t req_size; assert(rw_flag == READING || rw_flag == WRITING); - /* fp->fp_cum_io_partial is only nonzero when doing partial writes. - * We clear the field immediately here because we expect completion or error; - * its value must be (re)assigned if we end up suspending the write (again). - */ - cum_io = fp->fp_cum_io_partial; - fp->fp_cum_io_partial = 0; - - r = pipe_check(f, rw_flag, oflags, req_size, 0); + r = pipe_check(f, rw_flag, oflags, nbytes, 0); if (r <= 0) { - if (r == SUSPEND) { - fp->fp_cum_io_partial = cum_io; - pipe_suspend(f, buf, req_size); - } + if (r == SUSPEND) + pipe_suspend(callnr, fd, buf, nbytes, cum_io); + /* If pipe_check returns an error instead of suspending the call, we * return that error, even if we are resuming a partially completed * operation (ie, a large blocking write), to match NetBSD's behavior. @@ -345,7 +341,7 @@ size_t req_size; } size = r; - if (size < req_size) partial_pipe = 1; + if (size < nbytes) partial_pipe = TRUE; /* Truncate read request at size. */ if (rw_flag == READING && size > vp->v_size) { @@ -365,7 +361,7 @@ size_t req_size; cum_io += cum_io_incr; buf += cum_io_incr; - req_size -= cum_io_incr; + nbytes -= cum_io_incr; if (rw_flag == READING) vp->v_size -= cum_io_incr; @@ -376,16 +372,11 @@ size_t req_size; /* partial write on pipe with */ /* O_NONBLOCK, return write count */ if (!(oflags & O_NONBLOCK)) { - /* partial write on pipe with req_size > PIPE_BUF, - * non-atomic - */ - fp->fp_cum_io_partial = cum_io; - pipe_suspend(f, buf, req_size); + /* partial write on pipe with nbytes > PIPE_BUF, non-atomic */ + pipe_suspend(callnr, fd, buf, nbytes, cum_io); return(SUSPEND); } } - assert(fp->fp_cum_io_partial == 0); - return(cum_io); } diff --git a/minix/servers/vfs/write.c b/minix/servers/vfs/write.c index 951e8ad25..eeb04b60d 100644 --- a/minix/servers/vfs/write.c +++ b/minix/servers/vfs/write.c @@ -15,6 +15,11 @@ int do_write(void) { /* Perform the write(fd, buffer, nbytes) system call. */ + + /* See the comment in do_read(). */ + if (job_m_in.m_lc_vfs_readwrite.cum_io != 0) + return(EINVAL); + return(do_read_write_peek(WRITING, job_m_in.m_lc_vfs_readwrite.fd, job_m_in.m_lc_vfs_readwrite.buf, job_m_in.m_lc_vfs_readwrite.len)); }