From: David van Moolenbroek Date: Mon, 25 Jul 2016 11:11:37 +0000 (+0000) Subject: VFS: support for suspending close(2) for sockets X-Git-Url: http://zhaoyanbai.com/repos/dnssec-keygen.html?a=commitdiff_plain;h=491d647a3b6c5e781017a8842cade543fc0b74a3;p=minix.git VFS: support for suspending close(2) for sockets This change effectively adds the VFS side of support for the SO_LINGER socket option, by allowing file descriptor close operations to be suspended (and later resumed) by socket drivers. Currently, support is limited to the close(2) system call--in all other cases where file descriptors are closed (dup2, close-on-exec, process exit..), the close operation still completes instantly. As a general policy, the close(2) return value will always indicate that the file descriptor has been closed: either 0, or -1 with errno set to EINPROGRESS. The latter error may be thrown only when a suspended close is interrupted by a signal. As necessary for UDS, this change also introduces a closenb(2) system call extension, allowing the caller to bypass blocking SO_LINGER close behavior. This extension allows UDS to avoid blocking on closing the last reference to an in-flight file descriptor, in an atomic fashion. The extension is currently part of libsys, but there is no reason why userland would not be allowed to make this call, so it is deliberately not protected from use by userland. Change-Id: Iec77d6665232110346180017fc1300b1614910b7 --- diff --git a/minix/include/minix/ipc.h b/minix/include/minix/ipc.h index 7a3507106..5bfac14e7 100644 --- a/minix/include/minix/ipc.h +++ b/minix/include/minix/ipc.h @@ -621,8 +621,9 @@ _ASSERT_MSG_SIZE(mess_lc_vfs_chown); typedef struct { int fd; + int nblock; - uint8_t padding[52]; + uint8_t padding[48]; } mess_lc_vfs_close; _ASSERT_MSG_SIZE(mess_lc_vfs_close); diff --git a/minix/include/minix/syslib.h b/minix/include/minix/syslib.h index a6a80a6c6..b5f573863 100644 --- a/minix/include/minix/syslib.h +++ b/minix/include/minix/syslib.h @@ -274,6 +274,7 @@ int copyfd(endpoint_t endpt, int fd, int what); #define COPYFD_FROM 0 /* copy file descriptor from remote process */ #define COPYFD_TO 1 /* copy file descriptor to remote process */ #define COPYFD_CLOSE 2 /* close file descriptor in remote process */ +int closenb(int fd); /* * These are also the event numbers used in PROC_EVENT messages, but in order diff --git a/minix/lib/libc/sys/close.c b/minix/lib/libc/sys/close.c index f74571bde..ac8c7f54c 100644 --- a/minix/lib/libc/sys/close.c +++ b/minix/lib/libc/sys/close.c @@ -5,12 +5,14 @@ #include #include -int close(fd) -int fd; +int +close(int fd) { - message m; + message m; - memset(&m, 0, sizeof(m)); - m.m_lc_vfs_close.fd = fd; - return(_syscall(VFS_PROC_NR, VFS_CLOSE, &m)); + memset(&m, 0, sizeof(m)); + m.m_lc_vfs_close.fd = fd; + m.m_lc_vfs_close.nblock = 0; + + return _syscall(VFS_PROC_NR, VFS_CLOSE, &m); } diff --git a/minix/lib/libsys/Makefile b/minix/lib/libsys/Makefile index c4ebc869b..84a5b3aa0 100644 --- a/minix/lib/libsys/Makefile +++ b/minix/lib/libsys/Makefile @@ -15,6 +15,7 @@ SRCS+= \ asynsend.c \ checkperms.c \ clock_time.c \ + closenb.c \ copyfd.c \ cpuavg.c \ ds.c \ diff --git a/minix/lib/libsys/closenb.c b/minix/lib/libsys/closenb.c new file mode 100644 index 000000000..5603708cc --- /dev/null +++ b/minix/lib/libsys/closenb.c @@ -0,0 +1,27 @@ +#include "syslib.h" + +#include + +/* + * Non-blocking variant of close(2). This call is to be used by system + * services that need to close arbitrary local file descriptors. The purpose + * of the call is to avoid that such services end up blocking on closing socket + * file descriptors with the SO_LINGER socket option enabled. They cannot put + * the file pointer in non-blocking mode to that end, because the file pointer + * may be shared with other processes. + * + * Even though this call is defined for system services only, there is no harm + * in letting arbitrary user processes use this functionality. Thus, it needs + * no separate VFS call number. + */ +int +closenb(int fd) +{ + message m; + + memset(&m, 0, sizeof(m)); + m.m_lc_vfs_close.fd = fd; + m.m_lc_vfs_close.nblock = 1; + + return _taskcall(VFS_PROC_NR, VFS_CLOSE, &m); +} diff --git a/minix/servers/vfs/exec.c b/minix/servers/vfs/exec.c index 24704f08a..f37fc0b2e 100644 --- a/minix/servers/vfs/exec.c +++ b/minix/servers/vfs/exec.c @@ -391,7 +391,7 @@ pm_execfinal: } if(execi.vmfd >= 0 && !execi.vmfd_used) { - if(OK != close_fd(vmfp, execi.vmfd)) { + if(OK != close_fd(vmfp, execi.vmfd, FALSE /*may_suspend*/)) { printf("VFS: unexpected close fail of vm fd\n"); } } @@ -727,7 +727,7 @@ static void clo_exec(struct fproc *rfp) /* Check the file desriptors one by one for presence of FD_CLOEXEC. */ for (i = 0; i < OPEN_MAX; i++) if ( FD_ISSET(i, &rfp->fp_cloexec_set)) - (void) close_fd(rfp, i); + (void) close_fd(rfp, i, FALSE /*may_suspend*/); } /*===========================================================================* diff --git a/minix/servers/vfs/filedes.c b/minix/servers/vfs/filedes.c index 1bbeeb6ef..4ba4faf8a 100644 --- a/minix/servers/vfs/filedes.c +++ b/minix/servers/vfs/filedes.c @@ -410,12 +410,17 @@ unlock_filps(struct filp *filp1, struct filp *filp2) /*===========================================================================* * close_filp * *===========================================================================*/ -void -close_filp(struct filp *f) +int +close_filp(struct filp * f, int may_suspend) { -/* Close a file. Will also unlock filp when done */ - - int rw; +/* Close a file. Will also unlock filp when done. The 'may_suspend' flag + * indicates whether the current process may be suspended closing a socket. + * That is currently supported only when the user issued a close(2), and (only) + * in that case may this function return SUSPEND instead of OK. In all other + * cases, this function will always return OK. It will never return another + * error code, for reasons explained below. + */ + int r, rw; dev_t dev; struct vnode *vp; @@ -425,6 +430,8 @@ close_filp(struct filp *f) vp = f->filp_vno; + r = OK; + if (f->filp_count - 1 == 0 && f->filp_mode != FILP_CLOSED) { /* Check to see if the file is special. */ if (S_ISCHR(vp->v_mode) || S_ISBLK(vp->v_mode) || @@ -446,19 +453,34 @@ close_filp(struct filp *f) (void) cdev_close(dev); /* Ignore errors */ } else { /* - * TODO: this should be completely redone. Sockets may - * take a while to be closed (SO_LINGER etc), and thus, - * we should be able to issue a suspending close to a + * Sockets may take a while to be closed (SO_LINGER), + * and thus, we may issue a suspending close to a * socket driver. Getting this working for close(2) is - * the easy case, but there's also eg dup2(2), which if - * interrupted by a signal should fail without closing - * the file descriptor. Then there are cases where the - * close should probably never block: close-on-exec, - * exit, and UDS closing in-flight FDs (currently just - * using close(2), but it could set the FD to non- - * blocking) for instance. There is much to do here. + * the easy case, and that is all we support for now. + * However, there is also eg dup2(2), which if + * interrupted by a signal should technically fail + * without closing the file descriptor. Then there are + * cases where the close should never block: process + * exit and close-on-exec for example. Getting all + * such cases right is left to future work; currently + * they all perform thread-blocking socket closes and + * thus cause the socket to perform lingering in the + * background if at all. + */ + assert(!may_suspend || job_call_nr == VFS_CLOSE); + + if (f->filp_flags & O_NONBLOCK) + may_suspend = FALSE; + + r = sdev_close(dev, may_suspend); + + /* + * Returning a non-OK error is a bad idea, because it + * will leave the application wondering whether closing + * the file descriptor actually succeeded. */ - (void) sdev_close(dev); /* Ignore errors */ + if (r != SUSPEND) + r = OK; } f->filp_mode = FILP_CLOSED; @@ -492,6 +514,8 @@ close_filp(struct filp *f) } mutex_unlock(&f->filp_lock); + + return r; } /*===========================================================================* diff --git a/minix/servers/vfs/misc.c b/minix/servers/vfs/misc.c index 4ea85f11c..b9336f3cc 100644 --- a/minix/servers/vfs/misc.c +++ b/minix/servers/vfs/misc.c @@ -452,7 +452,7 @@ int do_vm_call(void) } case VMVFSREQ_FDCLOSE: { - result = close_fd(fp, req_fd); + result = close_fd(fp, req_fd, FALSE /*may_suspend*/); if(result != OK) { printf("VFS: VM fd close for fd %d, %d (%d)\n", req_fd, fp->fp_endpoint, result); @@ -645,7 +645,7 @@ static void free_proc(int flags) /* Loop on file descriptors, closing any that are open. */ for (i = 0; i < OPEN_MAX; i++) { - (void) close_fd(fp, i); + (void) close_fd(fp, i, FALSE /*may_suspend*/); } /* Release root and working directories. */ diff --git a/minix/servers/vfs/open.c b/minix/servers/vfs/open.c index b5e3f8513..fc5b0c227 100644 --- a/minix/servers/vfs/open.c +++ b/minix/servers/vfs/open.c @@ -673,9 +673,13 @@ int do_lseek(void) *===========================================================================*/ int do_close(void) { -/* Perform the close(fd) system call. */ - int thefd = job_m_in.m_lc_vfs_close.fd; - return close_fd(fp, thefd); +/* Perform the close(fd) or closenb(fd) system call. */ + int fd, nblock; + + fd = job_m_in.m_lc_vfs_close.fd; + nblock = job_m_in.m_lc_vfs_close.nblock; + + return close_fd(fp, fd, !nblock /*may_suspend*/); } @@ -683,13 +687,13 @@ int do_close(void) * close_fd * *===========================================================================*/ int -close_fd(struct fproc *rfp, int fd_nr) +close_fd(struct fproc * rfp, int fd_nr, int may_suspend) { /* Perform the close(fd) system call. */ register struct filp *rfilp; register struct vnode *vp; struct file_lock *flp; - int lock_count; + int r, lock_count; /* First locate the vnode that belongs to the file descriptor. */ if ( (rfilp = get_filp2(rfp, fd_nr, VNODE_OPCL)) == NULL) return(err_code); @@ -701,7 +705,7 @@ close_fd(struct fproc *rfp, int fd_nr) */ rfp->fp_filp[fd_nr] = NULL; - close_filp(rfilp); + r = close_filp(rfilp, may_suspend); FD_CLR(fd_nr, &rfp->fp_cloexec_set); @@ -719,5 +723,5 @@ close_fd(struct fproc *rfp, int fd_nr) lock_revive(); /* one or more locks released */ } - return(OK); + return(r); } diff --git a/minix/servers/vfs/proto.h b/minix/servers/vfs/proto.h index fada70710..d9a3b2068 100644 --- a/minix/servers/vfs/proto.h +++ b/minix/servers/vfs/proto.h @@ -89,7 +89,7 @@ void invalidate_filp(struct filp *); void invalidate_filp_by_endpt(endpoint_t proc_e); void invalidate_filp_by_char_major(devmajor_t major); void invalidate_filp_by_sock_drv(unsigned int num); -void close_filp(struct filp *fp); +int close_filp(struct filp *fp, int may_suspend); int do_copyfd(void); /* fscall.c */ @@ -149,7 +149,7 @@ void unmount_all(int force); /* open.c */ int do_close(void); -int close_fd(struct fproc *rfp, int fd_nr); +int close_fd(struct fproc *rfp, int fd_nr, int may_suspend); int common_open(char path[PATH_MAX], int oflags, mode_t omode, int for_exec); int do_creat(void); int do_lseek(void); @@ -281,7 +281,7 @@ int sdev_getsockopt(dev_t dev, int level, int name, vir_bytes addr, int sdev_getsockname(dev_t dev, vir_bytes addr, unsigned int *addr_len); int sdev_getpeername(dev_t dev, vir_bytes addr, unsigned int *addr_len); int sdev_shutdown(dev_t dev, int how); -int sdev_close(dev_t dev); +int sdev_close(dev_t dev, int may_suspend); int sdev_select(dev_t dev, int ops); void sdev_stop(struct fproc *rfp); void sdev_cancel(void); diff --git a/minix/servers/vfs/sdev.c b/minix/servers/vfs/sdev.c index 7bc667292..cc3cd4e50 100644 --- a/minix/servers/vfs/sdev.c +++ b/minix/servers/vfs/sdev.c @@ -40,11 +40,6 @@ * thread is spawned for processing successful accept replies, unless the reply * was received from a worker thread already (as may be the case if the accept * request was being canceled). - * - * As a current shortcoming, close requests should be long-lived (in order to - * support SO_LINGER) but are modeled as short-lived in VFS. This is the - * result of implementation limitations that have to be resolved first; see the - * comments in sdev_close() and close_filp() for more information. */ #include "fs.h" @@ -166,7 +161,7 @@ sdev_socket(int domain, int type, int protocol, dev_t * dev, int pair) if (sock_id2 < 0) { printf("VFS: %d sent bad SOCKETPAIR socket ID %d\n", sp->smap_endpt, sock_id2); - (void)sdev_close(dev[0]); + (void)sdev_close(dev[0], FALSE /*may_suspend*/); return EIO; } @@ -606,22 +601,42 @@ sdev_shutdown(dev_t dev, int how) * Close the socket identified by the given socket device number. */ int -sdev_close(dev_t dev) +sdev_close(dev_t dev, int may_suspend) { + struct smap *sp; + sockid_t sock_id; + message m; + int r; /* - * TODO: for now, we generate only nonblocking requests, because VFS as - * a whole does not yet support blocking close operations. See also - * the comment in close_filp(). All callers of sdev_close() currently - * ignore the return value, so socket drivers can already implement - * support for blocking close requests if they want, although at some - * later point we may have to introduce an additional SDEV_ flag to - * indicate whether the close call should be interrupted (keeping the - * socket open and returning EINTR, for dup2(2)) or continue in the - * background (closing the socket later and returning EINPROGRESS, for - * close(2)). + * Originally, all close requests were blocking the calling thread, but + * the new support for SO_LINGER has changed that. In a very strictly + * limited subset of cases - namely, the user process calling close(2), + * we suspend the close request and handle it asynchronously. In all + * other cases, including close-on-exit, close-on-exec, and even dup2, + * the close is issued as a thread-synchronous request instead. */ - return sdev_simple(dev, SDEV_CLOSE, SDEV_NONBLOCK); + if (may_suspend) { + if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL) + return EIO; + + /* Prepare the request message. */ + memset(&m, 0, sizeof(m)); + m.m_type = SDEV_CLOSE; + m.m_vfs_lsockdriver_simple.req_id = (sockid_t)who_e; + m.m_vfs_lsockdriver_simple.sock_id = sock_id; + m.m_vfs_lsockdriver_simple.param = 0; + + /* Send the request to the driver. */ + if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK) + panic("VFS: asynsend in sdev_bindconn failed: %d", r); + + /* Suspend the process until the reply arrives. */ + return sdev_suspend(dev, GRANT_INVALID, GRANT_INVALID, + GRANT_INVALID, -1, 0); + } else + /* Block the calling thread until the socket is closed. */ + return sdev_simple(dev, SDEV_CLOSE, SDEV_NONBLOCK); } /* @@ -774,12 +789,21 @@ sdev_finish(struct fproc * rfp, message * m_ptr) case VFS_SENDTO: case VFS_SENDMSG: case VFS_IOCTL: + case VFS_CLOSE: /* * These calls all use the same SDEV_REPLY reply type and only * need to reply an OK-or-error status code back to userland. */ if (m_ptr->m_type == SDEV_REPLY) { status = m_ptr->m_lsockdriver_vfs_reply.status; + + /* + * For close(2) calls, the return value must indicate + * that the file descriptor has been closed. + */ + if (callnr == VFS_CLOSE && + status != OK && status != EINPROGRESS) + status = OK; } else if (m_ptr->m_type < 0) { status = m_ptr->m_type; } else { diff --git a/minix/servers/vfs/socket.c b/minix/servers/vfs/socket.c index dd8af0a12..17ec25131 100644 --- a/minix/servers/vfs/socket.c +++ b/minix/servers/vfs/socket.c @@ -212,7 +212,7 @@ do_socket(void) return r; if ((r = make_sock_fd(dev, flags)) < 0) - (void)sdev_close(dev); + (void)sdev_close(dev, FALSE /*may_suspend*/); return r; } @@ -250,14 +250,14 @@ do_socketpair(void) return r; if ((fd0 = make_sock_fd(dev[0], flags)) < 0) { - (void)sdev_close(dev[0]); - (void)sdev_close(dev[1]); + (void)sdev_close(dev[0], FALSE /*may_suspend*/); + (void)sdev_close(dev[1], FALSE /*may_suspend*/); return fd0; } if ((fd1 = make_sock_fd(dev[1], flags)) < 0) { - close_fd(fp, fd0); - (void)sdev_close(dev[1]); + close_fd(fp, fd0, FALSE /*may_suspend*/); + (void)sdev_close(dev[1], FALSE /*may_suspend*/); return fd1; } @@ -442,7 +442,7 @@ resume_accept(struct fproc * rfp, int status, dev_t dev, unsigned int addr_len, * handle address copy failures in the cleanest possible way. */ if (status != OK) { - (void)sdev_close(dev); + (void)sdev_close(dev, FALSE /*may_suspend*/); replycode(rfp->fp_endpoint, status); @@ -459,7 +459,7 @@ resume_accept(struct fproc * rfp, int status, dev_t dev, unsigned int addr_len, flags &= O_CLOEXEC | O_NONBLOCK | O_NOSIGPIPE; if ((r = make_sock_fd(dev, flags)) < 0) { - (void)sdev_close(dev); + (void)sdev_close(dev, FALSE /*may_suspend*/); replycode(rfp->fp_endpoint, r);