From: Thomas Veerman Date: Thu, 9 Feb 2012 14:24:28 +0000 (+0000) Subject: AVFS: fix various system call interruption issues X-Git-Tag: v3.2.0~81 X-Git-Url: http://zhaoyanbai.com/repos/man.nsec3hash.html?a=commitdiff_plain;h=abd6043a2f26b3a5ccba994ea87137c696c4e839;p=minix.git AVFS: fix various system call interruption issues - When cancelling ioctls, VFS did not remember which file descriptor to cancel and sent bogus to the driver. - Select state was not cleaned up when select()ing process was interrupted. - Process trying to do a system call at the exact same time as a user trying to interrupt the process, could cause the system call worker thread to overwrite state belonging to the worker thread trying to exit the process. This led to hanging threads and eventual system hang when this happens often enough. --- diff --git a/servers/avfs/device.c b/servers/avfs/device.c index e6e019978..926cf322a 100644 --- a/servers/avfs/device.c +++ b/servers/avfs/device.c @@ -547,7 +547,7 @@ PUBLIC int gen_opcl( if (op == DEV_OPEN && dp->dmap_style == STYLE_DEVA) { fp->fp_task = dp->dmap_driver; - worker_wait(dp->dmap_driver); + worker_wait(); } if (is_bdev) @@ -650,7 +650,10 @@ PUBLIC int do_ioctl() register struct vnode *vp; dev_t dev; - if ((f = get_filp(m_in.ls_fd, VNODE_READ)) == NULL) return(err_code); + scratch(fp).file.fd_nr = m_in.ls_fd; + + if ((f = get_filp(scratch(fp).file.fd_nr, VNODE_READ)) == NULL) + return(err_code); vp = f->filp_vno; /* get vnode pointer */ if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL && (vp->v_mode & I_TYPE) != I_BLOCK_SPECIAL) { @@ -860,6 +863,7 @@ PUBLIC int clone_opcl( if (op == DEV_OPEN && dp->dmap_style == STYLE_CLONE_A) { /* Wait for reply when driver is asynchronous */ + fp->fp_task = dp->dmap_driver; worker_wait(); } diff --git a/servers/avfs/fproc.h b/servers/avfs/fproc.h index f0ae8419d..79ede32f7 100644 --- a/servers/avfs/fproc.h +++ b/servers/avfs/fproc.h @@ -63,6 +63,7 @@ EXTERN struct fproc { #define FP_EXITING 0020 /* Set if process is exiting */ #define FP_PM_PENDING 0040 /* Set if process has pending PM request */ #define FP_SYS_PROC 0100 /* Set if process is a driver or FS */ +#define FP_DROP_WORK 0200 /* Set if process won't accept new work */ /* Field values. */ #define NOT_REVIVING 0xC0FFEEE /* process is not being revived */ diff --git a/servers/avfs/main.c b/servers/avfs/main.c index 143ce9d4b..68ccda822 100644 --- a/servers/avfs/main.c +++ b/servers/avfs/main.c @@ -272,10 +272,11 @@ PRIVATE void *do_fs_reply(struct job *job) return(NULL); } + if (rfp->fp_task != who_e) + printf("AVFS: expected %d to reply, not %d\n", rfp->fp_task, who_e); *rfp->fp_sendrec = m_in; rfp->fp_task = NONE; vmp->m_comm.c_cur_reqs--; /* We've got our reply, make room for others */ - worker_signal(worker_get(rfp->fp_wtid));/* Continue this worker thread */ return(NULL); } @@ -424,8 +425,6 @@ PRIVATE void *do_work(void *arg) */ if (call_nr < 0 || call_nr >= NCALLS) { error = ENOSYS; - } else if (fp->fp_flags & FP_EXITING) { - error = SUSPEND; } else if (fp->fp_pid == PID_FREE) { /* Process vanished before we were able to handle request. * Replying has no use. Just drop it. */ @@ -643,8 +642,8 @@ PUBLIC void lock_proc(struct fproc *rfp, int force_lock) org_m_in = m_in; org_fp = fp; org_self = self; - if (mutex_lock(&rfp->fp_lock) != 0) - panic("unable to lock fproc lock"); + if ((r = mutex_lock(&rfp->fp_lock)) != 0) + panic("unable to lock fproc lock: %d", r); m_in = org_m_in; fp = org_fp; self = org_self; @@ -696,7 +695,10 @@ PRIVATE void thread_cleanup_f(struct fproc *rfp, char *f, int l) } #endif - if (rfp != NULL) unlock_proc(rfp); + if (rfp != NULL) { + rfp->fp_flags &= ~FP_DROP_WORK; + unlock_proc(rfp); + } #if 0 mthread_exit(NULL); @@ -903,6 +905,7 @@ PRIVATE void service_pm() if (!(fp->fp_flags & FP_PENDING) && mutex_trylock(&fp->fp_lock) == 0) { mutex_unlock(&fp->fp_lock); worker_start(do_dummy); + fp->fp_flags |= FP_DROP_WORK; } fp->fp_job.j_m_in = m_in; diff --git a/servers/avfs/misc.c b/servers/avfs/misc.c index ed866016b..5497bf76e 100644 --- a/servers/avfs/misc.c +++ b/servers/avfs/misc.c @@ -457,6 +457,8 @@ PRIVATE void free_proc(struct fproc *exiter, int flags) /* The rest of these actions is only done when processes actually exit. */ if (!(flags & FP_EXITING)) return; + exiter->fp_flags |= FP_EXITING; + /* Check if any process is SUSPENDed on this driver. * If a driver exits, unmap its entries in the dmap table. * (unmapping has to be done after the first step, because the @@ -471,7 +473,6 @@ PRIVATE void free_proc(struct fproc *exiter, int flags) /* Invalidate endpoint number for error and sanity checks. */ exiter->fp_endpoint = NONE; - exiter->fp_flags |= FP_EXITING; /* If a session leader exits and it has a controlling tty, then revoke * access to its controlling tty from all other processes using it. diff --git a/servers/avfs/pipe.c b/servers/avfs/pipe.c index b339f6baf..0a6d6f49d 100644 --- a/servers/avfs/pipe.c +++ b/servers/avfs/pipe.c @@ -515,8 +515,7 @@ int returned; /* if hanging on task, how many bytes read */ /*===========================================================================* * unpause * *===========================================================================*/ -PUBLIC void unpause(proc_nr_e) -int proc_nr_e; +PUBLIC void unpause(endpoint_t proc_e) { /* A signal has been sent to a user who is paused on the file system. * Abort the system call with the EINTR error message. @@ -529,8 +528,8 @@ int proc_nr_e; message mess; int wasreviving = 0; - if (isokendpt(proc_nr_e, &slot) != OK) { - printf("VFS: ignoring unpause for bogus endpoint %d\n", proc_nr_e); + if (isokendpt(proc_e, &slot) != OK) { + printf("VFS: ignoring unpause for bogus endpoint %d\n", proc_e); return; } @@ -552,7 +551,7 @@ int proc_nr_e; break; case FP_BLOCKED_ON_SELECT:/* process blocking on select() */ - select_forget(proc_nr_e); + select_forget(proc_e); break; case FP_BLOCKED_ON_POPEN: /* process trying to open a fifo */ @@ -614,7 +613,7 @@ int proc_nr_e; susp_count--; } - reply(proc_nr_e, status); /* signal interrupted call */ + reply(proc_e, status); /* signal interrupted call */ } #if DO_SANITYCHECKS diff --git a/servers/avfs/select.c b/servers/avfs/select.c index b59fe6b1f..90e23ab06 100644 --- a/servers/avfs/select.c +++ b/servers/avfs/select.c @@ -593,9 +593,6 @@ PRIVATE void select_cancel_all(struct selectentry *se) int fd; struct filp *f; - /* Always await results of asynchronous requests */ - assert(!is_deferred(se)); - for (fd = 0; fd < se->nfds; fd++) { if ((f = se->filps[fd]) == NULL) continue; se->filps[fd] = NULL; @@ -620,6 +617,7 @@ PRIVATE void select_cancel_filp(struct filp *f) assert(f); assert(f->filp_selectors >= 0); if (f->filp_selectors == 0) return; + if (f->filp_count == 0) return; select_lock_filp(f, f->filp_select_ops); @@ -644,6 +642,7 @@ PRIVATE void select_return(struct selectentry *se) assert(!is_deferred(se)); /* Not done yet, first wait for async reply */ select_cancel_all(se); + r1 = copy_fdsets(se, se->nfds, TO_PROC); if (r1 != OK) r = r1; @@ -737,6 +736,11 @@ PUBLIC void select_unsuspend_by_endpt(endpoint_t proc_e) int wakehim = 0; se = &selecttab[s]; if (se->requestor == NULL) continue; + if (se->requestor->fp_endpoint == proc_e) { + assert(se->requestor->fp_flags & FP_EXITING); + select_cancel_all(se); + continue; + } for (fd = 0; fd < se->nfds; fd++) { if ((f = se->filps[fd]) == NULL || f->filp_vno == NULL) @@ -756,7 +760,6 @@ PUBLIC void select_unsuspend_by_endpt(endpoint_t proc_e) } } - /*===========================================================================* * select_reply1 * *===========================================================================*/ @@ -800,39 +803,51 @@ int status; } } - select_lock_filp(f, f->filp_select_ops); - /* No longer waiting for a reply from this device */ - f->filp_select_flags &= ~FSF_BUSY; dp->dmap_sel_filp = NULL; - /* The select call is done now, except when - * - another process started a select on the same filp with possibly a - * different set of operations. - * - a process does a select on the same filp but using different file - * descriptors. - * - the select has a timeout. Upon receiving this reply the operations might - * not be ready yet, so we want to wait for that to ultimately happen. - * Therefore we need to keep remembering what the operations are. */ - if (!(f->filp_select_flags & (FSF_UPDATE|FSF_BLOCKED))) - f->filp_select_ops = 0; /* done selecting */ - else if (!(f->filp_select_flags & FSF_UPDATE)) - f->filp_select_ops &= ~status; /* there may be operations pending */ - - /* Tell filp owners about result unless we need to wait longer */ - if (!(status == 0 && (f->filp_select_flags & FSF_BLOCKED))) { - if (status > 0) { /* operations ready */ - if (status & SEL_RD) f->filp_select_flags &= ~FSF_RD_BLOCK; - if (status & SEL_WR) f->filp_select_flags &= ~FSF_WR_BLOCK; - if (status & SEL_ERR) f->filp_select_flags &= ~FSF_ERR_BLOCK; - } else if (status < 0) { /* error */ - f->filp_select_flags &= ~FSF_BLOCKED; /* No longer blocking */ - } + /* Process select result only if requestor is still around. That is, the + * corresponding filp is still in use. + */ + if (f->filp_count >= 1) { + select_lock_filp(f, f->filp_select_ops); + f->filp_select_flags &= ~FSF_BUSY; + + /* The select call is done now, except when + * - another process started a select on the same filp with possibly a + * different set of operations. + * - a process does a select on the same filp but using different file + * descriptors. + * - the select has a timeout. Upon receiving this reply the operations + * might not be ready yet, so we want to wait for that to ultimately + * happen. + * Therefore we need to keep remembering what the operations are. + */ + if (!(f->filp_select_flags & (FSF_UPDATE|FSF_BLOCKED))) + f->filp_select_ops = 0; /* done selecting */ + else if (!(f->filp_select_flags & FSF_UPDATE)) + /* there may be operations pending */ + f->filp_select_ops &= ~status; + + /* Tell filp owners about result unless we need to wait longer */ + if (!(status == 0 && (f->filp_select_flags & FSF_BLOCKED))) { + if (status > 0) { /* operations ready */ + if (status & SEL_RD) + f->filp_select_flags &= ~FSF_RD_BLOCK; + if (status & SEL_WR) + f->filp_select_flags &= ~FSF_WR_BLOCK; + if (status & SEL_ERR) + f->filp_select_flags &= ~FSF_ERR_BLOCK; + } else if (status < 0) { /* error */ + /* Always unblock upon error */ + f->filp_select_flags &= ~FSF_BLOCKED; + } - unlock_filp(f); - filp_status(f, status); /* Tell filp owners about the results */ - } else { - unlock_filp(f); + unlock_filp(f); + filp_status(f, status); /* Tell filp owners about the results */ + } else { + unlock_filp(f); + } } select_restart_filps(); diff --git a/servers/avfs/worker.c b/servers/avfs/worker.c index 879363471..9c6324171 100644 --- a/servers/avfs/worker.c +++ b/servers/avfs/worker.c @@ -208,6 +208,10 @@ PUBLIC void worker_start(void *(*func)(void *arg)) int i; struct worker_thread *worker; + if (fp->fp_flags & FP_DROP_WORK) { + return; /* This process is not allowed to accept new work */ + } + worker = NULL; for (i = 0; i < NR_WTHREADS; i++) { if (workers[i].w_job.j_func == NULL) {