From: David van Moolenbroek Date: Sun, 21 Feb 2016 17:39:34 +0000 (+0000) Subject: Prepare for switch to native BSD socket API X-Git-Url: http://zhaoyanbai.com/repos/%22http:/www.isc.org/icons/zpipe.c?a=commitdiff_plain;h=refs%2Fchanges%2F00%2F3300%2F2;p=minix.git Prepare for switch to native BSD socket API Currently, the BSD socket API is implemented in libc, translating the API calls to character driver operations underneath. This approach has several issues: - it is inefficient, as most character driver operations are specific to the socket type, thus requiring that each operation start by bruteforcing the socket protocol family and type of the given file descriptor using several system calls; - it requires that libc itself be changed every time system support for a new protocol is added; - various parts of the libc implementations violate the asynchronous signal safety POSIX requirements. In order to resolve all these issues at once, the plan is to turn the BSD socket calls into system calls, thus making the BSD socket API the "native" ABI, removing the complexity from libc and instead letting VFS deal with the socket calls. The overall change is going to break all networking functionality. In order to smoothen the transition, this patch introduces the fifteen new BSD socket system calls, and makes libc try these first before falling back on the old behavior. For now, the VFS implementations of the new calls fail such that libc will always use the fallback cases. Later on, when we introduce the actual implementation of the native BSD socket calls, all statically linked programs will automatically use the new ABI, thus limiting actual application breakage. In other words: by itself, this patch does nothing, except add a bit of transitional overhead that will disappear in the future. The largest part of the patch is concerned with adding full support for the new BSD socket system calls to trace(1) - this early addition has the advantage of making system call tracing output of several socket calls much more readable already. Both the system call interfaces and the trace(1) support have already been tested using code that will be committed later on. Change-Id: I3460812be50c78be662d857f9d3d6840f3ca917f --- diff --git a/minix/include/minix/callnr.h b/minix/include/minix/callnr.h index 46b91a825..759350af6 100644 --- a/minix/include/minix/callnr.h +++ b/minix/include/minix/callnr.h @@ -118,7 +118,22 @@ #define VFS_COPYFD (VFS_BASE + 46) #define VFS_CHECKPERMS (VFS_BASE + 47) #define VFS_GETSYSINFO (VFS_BASE + 48) +#define VFS_SOCKET (VFS_BASE + 49) +#define VFS_SOCKETPAIR (VFS_BASE + 50) +#define VFS_BIND (VFS_BASE + 51) +#define VFS_CONNECT (VFS_BASE + 52) +#define VFS_LISTEN (VFS_BASE + 53) +#define VFS_ACCEPT (VFS_BASE + 54) +#define VFS_SENDTO (VFS_BASE + 55) +#define VFS_SENDMSG (VFS_BASE + 56) +#define VFS_RECVFROM (VFS_BASE + 57) +#define VFS_RECVMSG (VFS_BASE + 58) +#define VFS_SETSOCKOPT (VFS_BASE + 59) +#define VFS_GETSOCKOPT (VFS_BASE + 60) +#define VFS_GETSOCKNAME (VFS_BASE + 61) +#define VFS_GETPEERNAME (VFS_BASE + 62) +#define VFS_SHUTDOWN (VFS_BASE + 63) -#define NR_VFS_CALLS 49 /* highest number from base plus one */ +#define NR_VFS_CALLS 64 /* highest number from base plus one */ #endif /* !_MINIX_CALLNR_H */ diff --git a/minix/include/minix/ipc.h b/minix/include/minix/ipc.h index ac4e7e2a2..b5051a785 100644 --- a/minix/include/minix/ipc.h +++ b/minix/include/minix/ipc.h @@ -714,6 +714,14 @@ typedef struct { } mess_lc_vfs_link; _ASSERT_MSG_SIZE(mess_lc_vfs_link); +typedef struct { + int fd; + int backlog; + + u8_t padding[48]; +} mess_lc_vfs_listen; +_ASSERT_MSG_SIZE(mess_lc_vfs_listen); + typedef struct { off_t offset; @@ -803,6 +811,64 @@ typedef struct { } mess_lc_vfs_select; _ASSERT_MSG_SIZE(mess_lc_vfs_select); +typedef struct { + int fd; + vir_bytes buf; /* void * */ + size_t len; + int flags; + vir_bytes addr; /* struct sockaddr * */ + unsigned int addr_len; /* socklen_t */ + + uint8_t padding[32]; +} mess_lc_vfs_sendrecv; +_ASSERT_MSG_SIZE(mess_lc_vfs_sendrecv); + +typedef struct { + int fd; + int how; + + uint8_t padding[48]; +} mess_lc_vfs_shutdown; +_ASSERT_MSG_SIZE(mess_lc_vfs_shutdown); + +typedef struct { + int fd; + vir_bytes addr; /* struct sockaddr * */ + unsigned int addr_len; /* socklen_t */ + + uint8_t padding[44]; +} mess_lc_vfs_sockaddr; +_ASSERT_MSG_SIZE(mess_lc_vfs_sockaddr); + +typedef struct { + int domain; + int type; + int protocol; + + uint8_t padding[44]; +} mess_lc_vfs_socket; +_ASSERT_MSG_SIZE(mess_lc_vfs_socket); + +typedef struct { + int fd; + vir_bytes msgbuf; /* struct msghdr * */ + int flags; + + uint8_t padding[44]; +} mess_lc_vfs_sockmsg; +_ASSERT_MSG_SIZE(mess_lc_vfs_sockmsg); + +typedef struct { + int fd; + int level; + int name; + vir_bytes buf; /* void * */ + unsigned int len; /* socklen_t */ + + uint8_t padding[36]; +} mess_lc_vfs_sockopt; +_ASSERT_MSG_SIZE(mess_lc_vfs_sockopt); + typedef struct { size_t len; vir_bytes name; /* const char * */ @@ -1969,6 +2035,13 @@ typedef struct { } mess_vfs_lc_lseek; _ASSERT_MSG_SIZE(mess_vfs_lc_lseek); +typedef struct { + unsigned int len; /* socklen_t */ + + uint8_t padding[52]; +} mess_vfs_lc_socklen; +_ASSERT_MSG_SIZE(mess_vfs_lc_socklen); + typedef struct { endpoint_t id; devminor_t minor; @@ -2141,6 +2214,7 @@ typedef struct noxfer_message { mess_lc_vfs_getvfsstat m_lc_vfs_getvfsstat; mess_lc_vfs_ioctl m_lc_vfs_ioctl; mess_lc_vfs_link m_lc_vfs_link; + mess_lc_vfs_listen m_lc_vfs_listen; mess_lc_vfs_lseek m_lc_vfs_lseek; mess_lc_vfs_mknod m_lc_vfs_mknod; mess_lc_vfs_mount m_lc_vfs_mount; @@ -2149,6 +2223,12 @@ typedef struct noxfer_message { mess_lc_vfs_readlink m_lc_vfs_readlink; mess_lc_vfs_readwrite m_lc_vfs_readwrite; mess_lc_vfs_select m_lc_vfs_select; + mess_lc_vfs_sendrecv m_lc_vfs_sendrecv; + mess_lc_vfs_shutdown m_lc_vfs_shutdown; + mess_lc_vfs_sockaddr m_lc_vfs_sockaddr; + mess_lc_vfs_socket m_lc_vfs_socket; + mess_lc_vfs_sockmsg m_lc_vfs_sockmsg; + mess_lc_vfs_sockopt m_lc_vfs_sockopt; mess_lc_vfs_stat m_lc_vfs_stat; mess_lc_vfs_statvfs1 m_lc_vfs_statvfs1; mess_lc_vfs_truncate m_lc_vfs_truncate; @@ -2277,6 +2357,7 @@ typedef struct noxfer_message { mess_vfs_fs_utime m_vfs_fs_utime; mess_vfs_lc_fdpair m_vfs_lc_fdpair; mess_vfs_lc_lseek m_vfs_lc_lseek; + mess_vfs_lc_socklen m_vfs_lc_socklen; mess_vfs_lchardriver_cancel m_vfs_lchardriver_cancel; mess_vfs_lchardriver_openclose m_vfs_lchardriver_openclose; mess_vfs_lchardriver_readwrite m_vfs_lchardriver_readwrite; diff --git a/minix/lib/libc/sys/accept.c b/minix/lib/libc/sys/accept.c index 86e85e9cf..6207f233a 100644 --- a/minix/lib/libc/sys/accept.c +++ b/minix/lib/libc/sys/accept.c @@ -1,5 +1,6 @@ #include #include "namespace.h" +#include #include #include @@ -18,20 +19,50 @@ #include #include -#define DEBUG 0 - static int _tcp_accept(int sock, struct sockaddr *__restrict address, socklen_t *__restrict address_len); static int _uds_accept(int sock, struct sockaddr *__restrict address, socklen_t *__restrict address_len); +/* + * Accept a connection on a listening socket, creating a new socket. + */ +static int +__accept(int fd, struct sockaddr * __restrict address, + socklen_t * __restrict address_len) +{ + message m; + int r; + + if (address != NULL && address_len == NULL) { + errno = EFAULT; + return -1; + } + + memset(&m, 0, sizeof(m)); + m.m_lc_vfs_sockaddr.fd = fd; + m.m_lc_vfs_sockaddr.addr = (vir_bytes)address; + m.m_lc_vfs_sockaddr.addr_len = (address != NULL) ? *address_len : 0; + + if ((r = _syscall(VFS_PROC_NR, VFS_ACCEPT, &m)) < 0) + return -1; + + if (address != NULL) + *address_len = m.m_vfs_lc_socklen.len; + return r; +} + int accept(int sock, struct sockaddr *__restrict address, socklen_t *__restrict address_len) { int r; nwio_udpopt_t udpopt; + r = __accept(sock, address, address_len); + if (r != -1 || errno != ENOTSOCK) + return r; + r= _tcp_accept(sock, address, address_len); if (r != -1 || errno != ENOTTY) return r; @@ -45,19 +76,14 @@ int accept(int sock, struct sockaddr *__restrict address, * filedescriptors that do not refer to a socket. */ r= ioctl(sock, NWIOGUDPOPT, &udpopt); - if (r == 0) - { + if (r == 0 || (r == -1 && errno != ENOTTY)) { /* UDP socket */ errno= EOPNOTSUPP; return -1; } - if (errno == ENOTTY) - { - errno= ENOTSOCK; - return -1; - } - return r; + errno = ENOTSOCK; + return -1; } static int _tcp_accept(int sock, struct sockaddr *__restrict address, diff --git a/minix/lib/libc/sys/bind.c b/minix/lib/libc/sys/bind.c index f8bd2fe5b..ddefa6ad1 100644 --- a/minix/lib/libc/sys/bind.c +++ b/minix/lib/libc/sys/bind.c @@ -1,5 +1,6 @@ #include #include "namespace.h" +#include #include #include @@ -33,6 +34,22 @@ static int _udp_bind(int sock, const struct sockaddr *address, static int _uds_bind(int sock, const struct sockaddr *address, socklen_t address_len, struct sockaddr_un *uds_addr); +/* + * Bind a socket to a local address. + */ +static int +__bind(int fd, const struct sockaddr * address, socklen_t address_len) +{ + message m; + + memset(&m, 0, sizeof(m)); + m.m_lc_vfs_sockaddr.fd = fd; + m.m_lc_vfs_sockaddr.addr = (vir_bytes)address; + m.m_lc_vfs_sockaddr.addr_len = address_len; + + return _syscall(VFS_PROC_NR, VFS_BIND, &m); +} + int bind(int sock, const struct sockaddr *address, socklen_t address_len) { int r; @@ -40,6 +57,10 @@ int bind(int sock, const struct sockaddr *address, socklen_t address_len) nwio_udpopt_t udpopt; struct sockaddr_un uds_addr; + r = __bind(sock, address, address_len); + if (r != -1 || errno != ENOTSOCK) + return r; + r= ioctl(sock, NWIOGTCPCONF, &tcpconf); if (r != -1 || errno != ENOTTY) { @@ -74,10 +95,7 @@ int bind(int sock, const struct sockaddr *address, socklen_t address_len) return _uds_bind(sock, address, address_len, &uds_addr); } -#if DEBUG - fprintf(stderr, "bind: not implemented for fd %d\n", sock); -#endif - errno= ENOSYS; + errno = ENOTSOCK; return -1; } diff --git a/minix/lib/libc/sys/connect.c b/minix/lib/libc/sys/connect.c index dd5b7b7f5..be494845f 100644 --- a/minix/lib/libc/sys/connect.c +++ b/minix/lib/libc/sys/connect.c @@ -1,5 +1,7 @@ #include #include "namespace.h" +#include + #include #include @@ -31,6 +33,22 @@ static int _udp_connect(int sock, const struct sockaddr *address, static int _uds_connect(int sock, const struct sockaddr *address, socklen_t address_len); +/* + * Connect a socket to a remote address. + */ +static int +__connect(int fd, const struct sockaddr * address, socklen_t address_len) +{ + message m; + + memset(&m, 0, sizeof(m)); + m.m_lc_vfs_sockaddr.fd = fd; + m.m_lc_vfs_sockaddr.addr = (vir_bytes)address; + m.m_lc_vfs_sockaddr.addr_len = address_len; + + return _syscall(VFS_PROC_NR, VFS_CONNECT, &m); +} + int connect(int sock, const struct sockaddr *address, socklen_t address_len) { @@ -38,6 +56,10 @@ int connect(int sock, const struct sockaddr *address, nwio_tcpconf_t tcpconf; nwio_udpopt_t udpopt; + r = __connect(sock, address, address_len); + if (r != -1 || errno != ENOTSOCK) + return r; + r= ioctl(sock, NWIOGTCPCONF, &tcpconf); if (r != -1 || errno != ENOTTY) { @@ -72,10 +94,7 @@ int connect(int sock, const struct sockaddr *address, return r; } -#if DEBUG - fprintf(stderr, "connect: not implemented for fd %d\n", sock); -#endif - errno= ENOSYS; + errno = ENOTSOCK; return -1; } diff --git a/minix/lib/libc/sys/getpeername.c b/minix/lib/libc/sys/getpeername.c index 35097c57e..f79303406 100644 --- a/minix/lib/libc/sys/getpeername.c +++ b/minix/lib/libc/sys/getpeername.c @@ -1,5 +1,6 @@ #include #include "namespace.h" +#include #include #include @@ -15,8 +16,6 @@ #include #include -#define DEBUG 0 - static int _tcp_getpeername(int sock, struct sockaddr *__restrict address, socklen_t *__restrict address_len, nwio_tcpconf_t *tcpconfp); @@ -26,6 +25,32 @@ static int _udp_getpeername(int sock, struct sockaddr *__restrict address, static int _uds_getpeername(int sock, struct sockaddr *__restrict address, socklen_t *__restrict address_len, struct sockaddr_un *uds_addr); +/* + * Get the remote address of a socket. + */ +static int +__getpeername(int fd, struct sockaddr * __restrict address, + socklen_t * __restrict address_len) +{ + message m; + + if (address_len == NULL) { + errno = EFAULT; + return -1; + } + + memset(&m, 0, sizeof(m)); + m.m_lc_vfs_sockaddr.fd = fd; + m.m_lc_vfs_sockaddr.addr = (vir_bytes)address; + m.m_lc_vfs_sockaddr.addr_len = *address_len; + + if (_syscall(VFS_PROC_NR, VFS_GETPEERNAME, &m) < 0) + return -1; + + *address_len = m.m_vfs_lc_socklen.len; + return 0; +} + int getpeername(int sock, struct sockaddr *__restrict address, socklen_t *__restrict address_len) { @@ -34,6 +59,10 @@ int getpeername(int sock, struct sockaddr *__restrict address, nwio_udpopt_t udpopt; struct sockaddr_un uds_addr; + r = __getpeername(sock, address, address_len); + if (r != -1 || errno != ENOTSOCK) + return r; + r= ioctl(sock, NWIOGTCPCONF, &tcpconf); if (r != -1 || errno != ENOTTY) { @@ -70,11 +99,7 @@ int getpeername(int sock, struct sockaddr *__restrict address, &uds_addr); } - -#if DEBUG - fprintf(stderr, "getpeername: not implemented for fd %d\n", sock); -#endif - errno= ENOSYS; + errno = ENOTSOCK; return -1; } diff --git a/minix/lib/libc/sys/getsockname.c b/minix/lib/libc/sys/getsockname.c index 7eb06114c..e021185fe 100644 --- a/minix/lib/libc/sys/getsockname.c +++ b/minix/lib/libc/sys/getsockname.c @@ -1,13 +1,7 @@ -/* - - getsockname() - - from socket emulation library for Minix 2.0.x - -*/ - #include #include "namespace.h" +#include + #include #include #include @@ -22,9 +16,7 @@ #include #include -/* #define DEBUG 0 -*/ static int _tcp_getsockname(int fd, struct sockaddr *__restrict address, socklen_t *__restrict address_len, nwio_tcpconf_t *tcpconfp); @@ -35,6 +27,32 @@ static int _udp_getsockname(int fd, struct sockaddr *__restrict address, static int _uds_getsockname(int fd, struct sockaddr *__restrict address, socklen_t *__restrict address_len, struct sockaddr_un *uds_addr); +/* + * Get the local address of a socket. + */ +static int +__getsockname(int fd, struct sockaddr * __restrict address, + socklen_t * __restrict address_len) +{ + message m; + + if (address_len == NULL) { + errno = EFAULT; + return -1; + } + + memset(&m, 0, sizeof(m)); + m.m_lc_vfs_sockaddr.fd = fd; + m.m_lc_vfs_sockaddr.addr = (vir_bytes)address; + m.m_lc_vfs_sockaddr.addr_len = *address_len; + + if (_syscall(VFS_PROC_NR, VFS_GETSOCKNAME, &m) < 0) + return -1; + + *address_len = m.m_vfs_lc_socklen.len; + return 0; +} + int getsockname(int fd, struct sockaddr *__restrict address, socklen_t *__restrict address_len) { @@ -43,7 +61,11 @@ int getsockname(int fd, struct sockaddr *__restrict address, nwio_udpopt_t udpopt; struct sockaddr_un uds_addr; -#ifdef DEBUG + r = __getsockname(fd, address, address_len); + if (r != -1 || errno != ENOTSOCK) + return r; + +#if DEBUG fprintf(stderr,"mnx_getsockname: ioctl fd %d.\n", fd); #endif @@ -83,11 +105,7 @@ int getsockname(int fd, struct sockaddr *__restrict address, return _uds_getsockname(fd, address, address_len, &uds_addr); } -#if DEBUG - fprintf(stderr, "getsockname: not implemented for fd %d\n", socket); -#endif - - errno= ENOSYS; + errno = ENOTSOCK; return -1; } diff --git a/minix/lib/libc/sys/getsockopt.c b/minix/lib/libc/sys/getsockopt.c index 907d38d0e..7a1d1cf50 100644 --- a/minix/lib/libc/sys/getsockopt.c +++ b/minix/lib/libc/sys/getsockopt.c @@ -1,5 +1,6 @@ #include #include "namespace.h" +#include #include #include @@ -30,6 +31,34 @@ static int _uds_getsockopt(int sock, int level, int option_name, static void getsockopt_copy(void *return_value, size_t return_len, void *__restrict option_value, socklen_t *__restrict option_len); +/* + * Get socket options. + */ +static int +__getsockopt(int fd, int level, int option_name, + void * __restrict option_value, socklen_t * __restrict option_len) +{ + message m; + + if (option_len == NULL) { + errno = EFAULT; + return -1; + } + + memset(&m, 0, sizeof(m)); + m.m_lc_vfs_sockopt.fd = fd; + m.m_lc_vfs_sockopt.level = level; + m.m_lc_vfs_sockopt.name = option_name; + m.m_lc_vfs_sockopt.buf = (vir_bytes)option_value; + m.m_lc_vfs_sockopt.len = *option_len; + + if (_syscall(VFS_PROC_NR, VFS_GETSOCKOPT, &m) < 0) + return -1; + + *option_len = m.m_vfs_lc_socklen.len; + return 0; +} + int getsockopt(int sock, int level, int option_name, void *__restrict option_value, socklen_t *__restrict option_len) { @@ -38,6 +67,10 @@ int getsockopt(int sock, int level, int option_name, nwio_udpopt_t udpopt; struct sockaddr_un uds_addr; + r = __getsockopt(sock, level, option_name, option_value, option_len); + if (r != -1 || errno != ENOTSOCK) + return r; + r= ioctl(sock, NWIOGTCPOPT, &tcpopt); if (r != -1 || errno != ENOTTY) { @@ -74,11 +107,7 @@ int getsockopt(int sock, int level, int option_name, option_value, option_len); } - -#if DEBUG - fprintf(stderr, "getsockopt: not implemented for fd %d\n", sock); -#endif - errno= ENOTSOCK; + errno = ENOTSOCK; return -1; } diff --git a/minix/lib/libc/sys/listen.c b/minix/lib/libc/sys/listen.c index f8e196786..d0ac51439 100644 --- a/minix/lib/libc/sys/listen.c +++ b/minix/lib/libc/sys/listen.c @@ -1,5 +1,6 @@ #include #include "namespace.h" +#include #include #include @@ -14,12 +15,29 @@ #include #include -#define DEBUG 0 +/* + * Put a socket in listening mode. + */ +static int +__listen(int fd, int backlog) +{ + message m; + + memset(&m, 0, sizeof(m)); + m.m_lc_vfs_listen.fd = fd; + m.m_lc_vfs_listen.backlog = backlog; + + return _syscall(VFS_PROC_NR, VFS_LISTEN, &m); +} int listen(int sock, int backlog) { int r; + r = __listen(sock, backlog); + if (r != -1 || errno != ENOTSOCK) + return r; + r= ioctl(sock, NWIOTCPLISTENQ, &backlog); if (r != -1 || errno != ENOTTY) return r; @@ -28,10 +46,6 @@ int listen(int sock, int backlog) if (r != -1 || errno != ENOTTY) return r; -#if DEBUG - fprintf(stderr, "listen: not implemented for fd %d\n", sock); -#endif - errno= ENOSYS; + errno = ENOTSOCK; return -1; } - diff --git a/minix/lib/libc/sys/recvfrom.c b/minix/lib/libc/sys/recvfrom.c index b66e0a720..f04c627e6 100644 --- a/minix/lib/libc/sys/recvfrom.c +++ b/minix/lib/libc/sys/recvfrom.c @@ -1,5 +1,6 @@ #include #include "namespace.h" +#include #include #include @@ -36,6 +37,38 @@ static ssize_t _uds_recvfrom_dgram(int sock, void *__restrict buffer, size_t length, int flags, struct sockaddr *__restrict address, socklen_t *__restrict address_len); +/* + * Receive a message from a socket. + */ +static ssize_t +__recvfrom(int fd, void * __restrict buffer, size_t length, int flags, + struct sockaddr * __restrict address, + socklen_t * __restrict address_len) +{ + message m; + ssize_t r; + + if (address != NULL && address_len == NULL) { + errno = EFAULT; + return -1; + } + + memset(&m, 0, sizeof(m)); + m.m_lc_vfs_sendrecv.fd = fd; + m.m_lc_vfs_sendrecv.buf = (vir_bytes)buffer; + m.m_lc_vfs_sendrecv.len = length; + m.m_lc_vfs_sendrecv.flags = flags; + m.m_lc_vfs_sendrecv.addr = (vir_bytes)address; + m.m_lc_vfs_sendrecv.addr_len = (address != NULL) ? *address_len : 0; + + if ((r = _syscall(VFS_PROC_NR, VFS_RECVFROM, &m)) < 0) + return -1; + + if (address != NULL) + *address_len = m.m_vfs_lc_socklen.len; + return r; +} + ssize_t recvfrom(int sock, void *__restrict buffer, size_t length, int flags, struct sockaddr *__restrict address, socklen_t *__restrict address_len) @@ -47,6 +80,10 @@ ssize_t recvfrom(int sock, void *__restrict buffer, size_t length, struct sockaddr_un uds_addr; int uds_sotype = -1; + r = __recvfrom(sock, buffer, length, flags, address, address_len); + if (r != -1 || errno != ENOTSOCK) + return r; + #if DEBUG fprintf(stderr, "recvfrom: for fd %d\n", sock); #endif @@ -121,12 +158,10 @@ ssize_t recvfrom(int sock, void *__restrict buffer, size_t length, } return rd; - } + } -#if DEBUG - fprintf(stderr, "recvfrom: not implemented for fd %d\n", sock); -#endif - abort(); + errno = ENOTSOCK; + return -1; } static ssize_t _tcp_recvfrom(int sock, void *__restrict buffer, size_t length, diff --git a/minix/lib/libc/sys/recvmsg.c b/minix/lib/libc/sys/recvmsg.c index f80ae2d27..18ea01cf2 100644 --- a/minix/lib/libc/sys/recvmsg.c +++ b/minix/lib/libc/sys/recvmsg.c @@ -1,5 +1,6 @@ #include #include "namespace.h" +#include #include #include @@ -16,11 +17,82 @@ static ssize_t _uds_recvmsg_conn(int sock, struct msghdr *msg, int flags); static ssize_t _uds_recvmsg_dgram(int sock, struct msghdr *msg, int flags); +/* + * Receive a message from a socket using a message structure. + */ +static ssize_t +__recvmsg(int fd, struct msghdr * msg, int flags) +{ + struct iovec iov; + struct msghdr msg2, *msgp; + char *ptr; + message m; + ssize_t r; + + /* + * Currently, MINIX3 does not support vector I/O operations. Like in + * the readv and writev implementations, we coalesce the data vector + * into a single buffer used for I/O. For future ABI compatibility, we + * then supply this buffer as a single vector element. This involves + * supplying a modified copy of the message header, as well as extra + * pre-checks. Once true vector I/O support has been added, the checks + * and vector I/O coalescing can be removed from here, leaving just the + * system call. Nothing will change at the system call ABI level. + */ + if (msg == NULL || (msg->msg_iovlen > 1 && msg->msg_iov == NULL)) { + errno = EFAULT; + return -1; + } + + if (msg->msg_iovlen < 0 || msg->msg_iovlen > IOV_MAX) { + errno = EMSGSIZE; /* different from readv/writev */ + return -1; + } + + if (msg->msg_iovlen > 1) { + if ((r = _vectorio_setup(msg->msg_iov, msg->msg_iovlen, &ptr, + _VECTORIO_READ)) < 0) + return -1; + + iov.iov_base = ptr; + iov.iov_len = r; + + memcpy(&msg2, msg, sizeof(msg2)); + msg2.msg_iov = &iov; + msg2.msg_iovlen = 1; + msgp = &msg2; + } else + msgp = msg; + + /* Issue the actual system call. */ + memset(&m, 0, sizeof(m)); + m.m_lc_vfs_sockmsg.fd = fd; + m.m_lc_vfs_sockmsg.msgbuf = (vir_bytes)msgp; + m.m_lc_vfs_sockmsg.flags = flags; + + r = _syscall(VFS_PROC_NR, VFS_RECVMSG, &m); + + /* If we coalesced the vector, clean up and copy back the results. */ + if (msgp != msg) { + _vectorio_cleanup(msg->msg_iov, msg->msg_iovlen, ptr, r, + _VECTORIO_READ); + + if (r >= 0) + memcpy(msg, &msg2, sizeof(msg2)); + } + + return r; +} + ssize_t recvmsg(int sock, struct msghdr *msg, int flags) { int r; int uds_sotype; + r = __recvmsg(sock, msg, flags); + if (r != -1 || errno != ENOTSOCK) + return r; + if (msg == NULL) { errno= EFAULT; return -1; @@ -39,11 +111,7 @@ ssize_t recvmsg(int sock, struct msghdr *msg, int flags) } } -#if DEBUG - fprintf(stderr, "recvmsg: not implemented for fd %d\n", sock); -#endif - - errno= ENOSYS; + errno = ENOTSOCK; return -1; } diff --git a/minix/lib/libc/sys/sendmsg.c b/minix/lib/libc/sys/sendmsg.c index b13d8a251..abfdfec47 100644 --- a/minix/lib/libc/sys/sendmsg.c +++ b/minix/lib/libc/sys/sendmsg.c @@ -1,5 +1,6 @@ #include #include "namespace.h" +#include #include #include @@ -17,11 +18,79 @@ static ssize_t _uds_sendmsg_conn(int sock, const struct msghdr *msg, static ssize_t _uds_sendmsg_dgram(int sock, const struct msghdr *msg, int flags); +/* + * Send a message on a socket using a message structure. + */ +static ssize_t +__sendmsg(int fd, const struct msghdr * msg, int flags) +{ + struct iovec iov; + const struct msghdr *msgp; + struct msghdr msg2; + char *ptr; + message m; + ssize_t r; + + /* + * Currently, MINIX3 does not support vector I/O operations. Like in + * the readv and writev implementations, we coalesce the data vector + * into a single buffer used for I/O. For future ABI compatibility, we + * then supply this buffer as a single vector element. This involves + * supplying a modified copy of the message header, as well as extra + * pre-checks. Once true vector I/O support has been added, the checks + * and vector I/O coalescing can be removed from here, leaving just the + * system call. Nothing will change at the system call ABI level. + */ + if (msg == NULL || (msg->msg_iovlen > 1 && msg->msg_iov == NULL)) { + errno = EFAULT; + return -1; + } + + if (msg->msg_iovlen < 0 || msg->msg_iovlen > IOV_MAX) { + errno = EMSGSIZE; /* different from readv/writev */ + return -1; + } + + if (msg->msg_iovlen > 1) { + if ((r = _vectorio_setup(msg->msg_iov, msg->msg_iovlen, &ptr, + _VECTORIO_WRITE)) < 0) + return -1; + + iov.iov_base = ptr; + iov.iov_len = r; + + memcpy(&msg2, msg, sizeof(msg2)); + msg2.msg_iov = &iov; + msg2.msg_iovlen = 1; + msgp = &msg2; + } else + msgp = msg; + + memset(&m, 0, sizeof(m)); + m.m_lc_vfs_sockmsg.fd = fd; + m.m_lc_vfs_sockmsg.msgbuf = (vir_bytes)msgp; + m.m_lc_vfs_sockmsg.flags = flags; + + r = _syscall(VFS_PROC_NR, VFS_SENDMSG, &m); + + /* If we coalesced the vector, clean up. */ + if (msgp != msg) { + _vectorio_cleanup(msg->msg_iov, msg->msg_iovlen, ptr, r, + _VECTORIO_WRITE); + } + + return r; +} + ssize_t sendmsg(int sock, const struct msghdr *msg, int flags) { int r; int uds_sotype; + r = __sendmsg(sock, msg, flags); + if (r != -1 || errno != ENOTSOCK) + return r; + if (msg == NULL) { errno= EFAULT; return -1; @@ -41,11 +110,7 @@ ssize_t sendmsg(int sock, const struct msghdr *msg, int flags) } -#if DEBUG - fprintf(stderr, "sendmsg: not implemented for fd %d\n", sock); -#endif - - errno= ENOSYS; + errno = ENOTSOCK; return -1; } diff --git a/minix/lib/libc/sys/sendto.c b/minix/lib/libc/sys/sendto.c index 18d484df8..ff83d8f7e 100644 --- a/minix/lib/libc/sys/sendto.c +++ b/minix/lib/libc/sys/sendto.c @@ -1,5 +1,6 @@ #include #include "namespace.h" +#include #include #include @@ -32,6 +33,26 @@ static ssize_t _uds_sendto_conn(int sock, const void *message, size_t length, static ssize_t _uds_sendto_dgram(int sock, const void *message, size_t length, int flags, const struct sockaddr *dest_addr, socklen_t dest_len); +/* + * Send a message on a socket. + */ +static ssize_t +__sendto(int fd, const void * buffer, size_t length, int flags, + const struct sockaddr * dest_addr, socklen_t dest_len) +{ + message m; + + memset(&m, 0, sizeof(m)); + m.m_lc_vfs_sendrecv.fd = fd; + m.m_lc_vfs_sendrecv.buf = (vir_bytes)buffer; + m.m_lc_vfs_sendrecv.len = length; + m.m_lc_vfs_sendrecv.flags = flags; + m.m_lc_vfs_sendrecv.addr = (vir_bytes)dest_addr; + m.m_lc_vfs_sendrecv.addr_len = dest_len; + + return _syscall(VFS_PROC_NR, VFS_SENDTO, &m); +} + ssize_t sendto(int sock, const void *message, size_t length, int flags, const struct sockaddr *dest_addr, socklen_t dest_len) { @@ -41,6 +62,10 @@ ssize_t sendto(int sock, const void *message, size_t length, int flags, nwio_ipopt_t ipopt; int uds_sotype = -1; + r = __sendto(sock, message, length, flags, dest_addr, dest_len); + if (r != -1 || errno != ENOTSOCK) + return r; + r= ioctl(sock, NWIOGTCPOPT, &tcpopt); if (r != -1 || errno != ENOTTY) { @@ -114,10 +139,7 @@ ssize_t sendto(int sock, const void *message, size_t length, int flags, return retval; } -#if DEBUG - fprintf(stderr, "sendto: not implemented for fd %d\n", sock); -#endif - errno= ENOSYS; + errno = ENOTSOCK; return -1; } diff --git a/minix/lib/libc/sys/setsockopt.c b/minix/lib/libc/sys/setsockopt.c index ed36dc77e..8ab1dcac8 100644 --- a/minix/lib/libc/sys/setsockopt.c +++ b/minix/lib/libc/sys/setsockopt.c @@ -1,6 +1,8 @@ #include #include "namespace.h" +#include +#include #include #include #include @@ -26,6 +28,25 @@ static int _udp_setsockopt(int sock, int level, int option_name, static int _uds_setsockopt(int sock, int level, int option_name, const void *option_value, socklen_t option_len); +/* + * Set socket options. + */ +static int +__setsockopt(int fd, int level, int option_name, const void * option_value, + socklen_t option_len) +{ + message m; + + memset(&m, 0, sizeof(m)); + m.m_lc_vfs_sockopt.fd = fd; + m.m_lc_vfs_sockopt.level = level; + m.m_lc_vfs_sockopt.name = option_name; + m.m_lc_vfs_sockopt.buf = (vir_bytes)option_value; + m.m_lc_vfs_sockopt.len = option_len; + + return _syscall(VFS_PROC_NR, VFS_SETSOCKOPT, &m); +} + int setsockopt(int sock, int level, int option_name, const void *option_value, socklen_t option_len) { @@ -34,6 +55,10 @@ int setsockopt(int sock, int level, int option_name, nwio_udpopt_t udpopt; struct sockaddr_un uds_addr; + r = __setsockopt(sock, level, option_name, option_value, option_len); + if (r != -1 || errno != ENOTSOCK) + return r; + r= ioctl(sock, NWIOGTCPOPT, &tcpopt); if (r != -1 || errno != ENOTTY) { @@ -70,11 +95,7 @@ int setsockopt(int sock, int level, int option_name, option_value, option_len); } - -#if DEBUG - fprintf(stderr, "setsockopt: not implemented for fd %d\n", sock); -#endif - errno= ENOTSOCK; + errno = ENOTSOCK; return -1; } diff --git a/minix/lib/libc/sys/shutdown.c b/minix/lib/libc/sys/shutdown.c index 001a6a244..f0839f486 100644 --- a/minix/lib/libc/sys/shutdown.c +++ b/minix/lib/libc/sys/shutdown.c @@ -1,6 +1,8 @@ #include #include "namespace.h" +#include +#include #include #include #include @@ -16,12 +18,31 @@ static int _tcp_shutdown(int sock, int how); static int _uds_shutdown(int sock, int how); +/* + * Shut down socket send and receive operations. + */ +static int +__shutdown(int fd, int how) +{ + message m; + + memset(&m, 0, sizeof(m)); + m.m_lc_vfs_shutdown.fd = fd; + m.m_lc_vfs_shutdown.how = how; + + return _syscall(VFS_PROC_NR, VFS_SHUTDOWN, &m); +} + int shutdown(int sock, int how) { int r; struct sockaddr_un uds_addr; nwio_tcpconf_t tcpconf; + r = __shutdown(sock, how); + if (r != -1 || errno != ENOTSOCK) + return r; + r= ioctl(sock, NWIOGTCPCONF, &tcpconf); if (r != -1 || errno != ENOTTY) { @@ -44,10 +65,7 @@ int shutdown(int sock, int how) return _uds_shutdown(sock, how); } -#if DEBUG - fprintf(stderr, "shutdown: not implemented for fd %d\n", sock); -#endif - errno= ENOSYS; + errno = ENOTSOCK; return -1; } diff --git a/minix/lib/libc/sys/socket.c b/minix/lib/libc/sys/socket.c index 6d5350da5..e07f4ba13 100644 --- a/minix/lib/libc/sys/socket.c +++ b/minix/lib/libc/sys/socket.c @@ -1,5 +1,6 @@ #include #include "namespace.h" +#include #ifdef __weak_alias __weak_alias(socket, __socket30) @@ -38,9 +39,29 @@ static int _uds_socket(int type, int protocol); static int _raw_socket(int type, int protocol); static void _socket_flags(int type, int *result); +/* + * Create a socket. + */ +static int +__socket(int domain, int type, int protocol) +{ + message m; + + memset(&m, 0, sizeof(m)); + m.m_lc_vfs_socket.domain = domain; + m.m_lc_vfs_socket.type = type; + m.m_lc_vfs_socket.protocol = protocol; + + return _syscall(VFS_PROC_NR, VFS_SOCKET, &m); +} + int socket(int domain, int type, int protocol) { - int sock_type; + int r, sock_type; + + r = __socket(domain, type, protocol); + if (r != -1 || errno != EAFNOSUPPORT) + return r; sock_type = type & ~SOCK_FLAGS_MASK; @@ -48,37 +69,25 @@ int socket(int domain, int type, int protocol) fprintf(stderr, "socket: domain %d, type %d, protocol %d\n", domain, type, protocol); #endif - if (domain != AF_INET && domain != AF_UNIX) - { -#if DEBUG - fprintf(stderr, "socket: bad domain %d\n", domain); -#endif - errno= EAFNOSUPPORT; - return -1; - } - if (domain == AF_UNIX && (sock_type == SOCK_STREAM || - sock_type == SOCK_DGRAM || - sock_type == SOCK_SEQPACKET)) + if (domain == AF_UNIX) return _uds_socket(type, protocol); - if (domain == AF_INET && sock_type == SOCK_STREAM) - return _tcp_socket(type, protocol); - - if (domain == AF_INET && sock_type == SOCK_DGRAM) - return _udp_socket(type, protocol); - - if (domain == AF_INET && sock_type == SOCK_RAW && protocol == IPPROTO_ICMP) - return _raw_socket(type, protocol); - - if (domain == AF_INET && sock_type == SOCK_RAW && protocol == IPPROTO_UDP) - return _raw_socket(type, protocol); + if (domain == AF_INET) { + switch (sock_type) { + case SOCK_STREAM: + return _tcp_socket(type, protocol); + case SOCK_DGRAM: + return _udp_socket(type, protocol); + case SOCK_RAW: + return _raw_socket(type, protocol); + default: + errno = EPROTOTYPE; + return -1; + } + } -#if DEBUG - fprintf(stderr, "socket: nothing for domain %d, type %d, protocol %d\n", - domain, type, protocol); -#endif - errno= EPROTOTYPE; + errno = EAFNOSUPPORT; return -1; } @@ -194,6 +203,15 @@ static int _raw_socket(int type, int protocol) static int _uds_socket(int type, int protocol) { int fd, r, flags = O_RDWR, sock_type; + + sock_type = type & ~SOCK_FLAGS_MASK; + if (sock_type != SOCK_STREAM && + sock_type != SOCK_DGRAM && + sock_type != SOCK_SEQPACKET) { + errno = EPROTOTYPE; + return -1; + } + if (protocol != 0) { #if DEBUG @@ -212,7 +230,6 @@ static int _uds_socket(int type, int protocol) /* set the type for the socket via ioctl (SOCK_DGRAM, * SOCK_STREAM, SOCK_SEQPACKET, etc) */ - sock_type = type & ~SOCK_FLAGS_MASK; r= ioctl(fd, NWIOSUDSTYPE, &sock_type); if (r == -1) { int ioctl_errno; diff --git a/minix/lib/libc/sys/socketpair.c b/minix/lib/libc/sys/socketpair.c index 118e6e40b..5f4a84460 100644 --- a/minix/lib/libc/sys/socketpair.c +++ b/minix/lib/libc/sys/socketpair.c @@ -1,6 +1,8 @@ #include #include "namespace.h" +#include +#include #include #include #include @@ -17,32 +19,44 @@ static int _uds_socketpair(int type, int protocol, int sv[2]); /* - * Create a pair of connected sockets + * Create a pair of connected sockets. */ -int socketpair(int domain, int type, int protocol, int sv[2]) { +static int +__socketpair(int domain, int type, int protocol, int sv[2]) +{ + message m; -#if DEBUG - fprintf(stderr, "socketpair: domain %d, type %d, protocol %d\n", - domain, type, protocol); -#endif + memset(&m, 0, sizeof(m)); + m.m_lc_vfs_socket.domain = domain; + m.m_lc_vfs_socket.type = type; + m.m_lc_vfs_socket.protocol = protocol; - if (domain != AF_UNIX) - { - errno = EAFNOSUPPORT; + if (_syscall(VFS_PROC_NR, VFS_SOCKETPAIR, &m) < 0) return -1; - } - if (domain == AF_UNIX && - (type == SOCK_STREAM || type == SOCK_SEQPACKET)) - return _uds_socketpair(type, protocol, sv); + sv[0] = m.m_vfs_lc_fdpair.fd0; + sv[1] = m.m_vfs_lc_fdpair.fd1; + return 0; +} + +int +socketpair(int domain, int type, int protocol, int sv[2]) +{ + int r; + + r = __socketpair(domain, type, protocol, sv); + if (r != -1 || errno != EAFNOSUPPORT) + return r; #if DEBUG - fprintf(stderr, - "socketpair: nothing for domain %d, type %d, protocol %d\n", + fprintf(stderr, "socketpair: domain %d, type %d, protocol %d\n", domain, type, protocol); #endif - errno= EPROTOTYPE; + if (domain == AF_UNIX) + return _uds_socketpair(type, protocol, sv); + + errno = EAFNOSUPPORT; return -1; } @@ -52,6 +66,11 @@ static int _uds_socketpair(int type, int protocol, int sv[2]) int r, i; struct stat sbuf; + if (type != SOCK_STREAM && type != SOCK_SEQPACKET) { + errno = EPROTOTYPE; + return -1; + } + if (protocol != 0) { #if DEBUG diff --git a/minix/servers/vfs/Makefile b/minix/servers/vfs/Makefile index 76a22a4ba..3fcd4f5af 100644 --- a/minix/servers/vfs/Makefile +++ b/minix/servers/vfs/Makefile @@ -7,7 +7,8 @@ SRCS= main.c open.c read.c write.c pipe.c dmap.c \ filedes.c stadir.c protect.c time.c \ lock.c misc.c utility.c select.c table.c \ vnode.c vmnt.c request.c \ - tll.c comm.c worker.c coredump.c + tll.c comm.c worker.c coredump.c \ + socket.c .if ${MKCOVERAGE} != "no" SRCS+= gcov.c diff --git a/minix/servers/vfs/proto.h b/minix/servers/vfs/proto.h index 27c6c083a..ca5351bd1 100644 --- a/minix/servers/vfs/proto.h +++ b/minix/servers/vfs/proto.h @@ -247,6 +247,27 @@ int req_utime(endpoint_t fs_e, ino_t inode_nr, struct timespec * actv, struct timespec * modtv); int req_newdriver(endpoint_t fs_e, dev_t dev, char *label); +/* socket.c */ +int do_socket(void); +int do_socketpair(void); +int do_bind(void); +int do_connect(void); +int do_listen(void); +int do_accept(void); +void resume_accept(struct fproc *rfp, int status, dev_t dev, + unsigned int addr_len, int listen_fd); +int do_sendto(void); +int do_recvfrom(void); +void resume_recvfrom(struct fproc *rfp, int status, unsigned int addr_len); +int do_sockmsg(void); +void resume_recvmsg(struct fproc *rfp, int status, unsigned int ctl_len, + unsigned int addr_len, int flags, vir_bytes msg_buf); +int do_setsockopt(void); +int do_getsockopt(void); +int do_getsockname(void); +int do_getpeername(void); +int do_shutdown(void); + /* stadir.c */ int do_chdir(void); int do_fchdir(void); diff --git a/minix/servers/vfs/socket.c b/minix/servers/vfs/socket.c new file mode 100644 index 000000000..8201b0f03 --- /dev/null +++ b/minix/servers/vfs/socket.c @@ -0,0 +1,179 @@ +/* + * IMPORTANT NOTICE: THIS FILE CONTAINS STUBS ONLY RIGHT NOW, TO ENABLE A + * SEAMLESS TRANSITION TO THE NEW API FOR PROGRAMS STATICALLY LINKED TO LIBC! + * + * This file implements the upper socket layer of VFS: the BSD socket system + * calls, and any associated file descriptor, file pointer, vnode, and file + * system processing. In most cases, this layer will call into the lower + * socket layer in order to send the request to a socket driver. Generic file + * calls (e.g., read, write, ioctl, and select) are not implemented here, and + * will directly call into the lower socket layer as well. + * + * The following table shows the system call numbers implemented in this file, + * along with their request and reply message types. Each request layout + * message type is prefixed with "m_lc_vfs_". Each reply layout message type + * is prefixed with "m_vfs_lc_". For requests without a specific reply layout, + * only the "m_type" message field is used in the reply message. + * + * Type Request layout Reply layout + * ---- -------------- ------------ + * VFS_SOCKET socket + * VFS_SOCKETPAIR socket fdpair + * VFS_BIND sockaddr + * VFS_CONNECT sockaddr + * VFS_LISTEN listen + * VFS_ACCEPT sockaddr socklen + * VFS_SENDTO sendrecv + * VFS_RECVFROM sendrecv socklen + * VFS_SENDMSG sockmsg + * VFS_RECVMSG sockmsg + * VFS_SETSOCKOPT sockopt + * VFS_GETSOCKOPT sockopt socklen + * VFS_GETSOCKNAME sockaddr socklen + * VFS_GETPEERNAME sockaddr socklen + * VFS_SHUTDOWN shutdown + */ + +#include "fs.h" + +#include + +/* + * Create a socket. + */ +int +do_socket(void) +{ + + return EAFNOSUPPORT; +} + +/* + * Create a pair of connected sockets. + */ +int +do_socketpair(void) +{ + + return EAFNOSUPPORT; +} + +/* + * Bind a socket to a local address. + */ +int +do_bind(void) +{ + + return ENOTSOCK; +} + +/* + * Connect a socket to a remote address. + */ +int +do_connect(void) +{ + + return ENOTSOCK; +} + +/* + * Put a socket in listening mode. + */ +int +do_listen(void) +{ + + return ENOTSOCK; +} + +/* + * Accept a connection on a listening socket, creating a new socket. + */ +int +do_accept(void) +{ + + return ENOTSOCK; +} + +/* + * Send a message on a socket. + */ +int +do_sendto(void) +{ + + return ENOTSOCK; +} + +/* + * Receive a message from a socket. + */ +int +do_recvfrom(void) +{ + + return ENOTSOCK; +} + +/* + * Send or receive a message on a socket using a message structure. + */ +int +do_sockmsg(void) +{ + + return ENOTSOCK; +} + +/* + * Set socket options. + */ +int +do_setsockopt(void) +{ + + return ENOTSOCK; +} + +/* + * Get socket options. + */ +int +do_getsockopt(void) +{ + + return ENOTSOCK; +} + +/* + * Get the local address of a socket. + */ +int +do_getsockname(void) +{ + + return ENOTSOCK; +} + +/* + * Get the remote address of a socket. + */ +int +do_getpeername(void) +{ + + return ENOTSOCK; +} + +/* + * Shut down socket send and receive operations. + */ +int +do_shutdown(void) +{ + + return ENOTSOCK; +} diff --git a/minix/servers/vfs/table.c b/minix/servers/vfs/table.c index ad649c92d..f3b24ec96 100644 --- a/minix/servers/vfs/table.c +++ b/minix/servers/vfs/table.c @@ -64,4 +64,19 @@ int (* const call_vec[NR_VFS_CALLS])(void) = { CALL(VFS_COPYFD) = do_copyfd, /* copyfd(2) */ CALL(VFS_CHECKPERMS) = do_checkperms, /* checkperms(2) */ CALL(VFS_GETSYSINFO) = do_getsysinfo, /* getsysinfo(2) */ + CALL(VFS_SOCKET) = do_socket, /* socket(2) */ + CALL(VFS_SOCKETPAIR) = do_socketpair, /* socketpair(2) */ + CALL(VFS_BIND) = do_bind, /* bind(2) */ + CALL(VFS_CONNECT) = do_connect, /* connect(2) */ + CALL(VFS_LISTEN) = do_listen, /* listen(2) */ + CALL(VFS_ACCEPT) = do_accept, /* accept(2) */ + CALL(VFS_SENDTO) = do_sendto, /* sendto(2) */ + CALL(VFS_SENDMSG) = do_sockmsg, /* sendmsg(2) */ + CALL(VFS_RECVFROM) = do_recvfrom, /* recvfrom(2) */ + CALL(VFS_RECVMSG) = do_sockmsg, /* recvmsg(2) */ + CALL(VFS_SETSOCKOPT) = do_setsockopt, /* setsockopt(2) */ + CALL(VFS_GETSOCKOPT) = do_getsockopt, /* getsockopt(2) */ + CALL(VFS_GETSOCKNAME) = do_getsockname, /* getsockname(2) */ + CALL(VFS_GETPEERNAME) = do_getpeername, /* getpeername(2) */ + CALL(VFS_SHUTDOWN) = do_shutdown, /* shutdown(2) */ }; diff --git a/minix/tests/common-socket.c b/minix/tests/common-socket.c index 7de4f39ff..1517513b4 100644 --- a/minix/tests/common-socket.c +++ b/minix/tests/common-socket.c @@ -307,8 +307,9 @@ void test_shutdown(const struct socket_test_info *info) errno = 0; rc = shutdown(0, how[i]); - if (!(rc == -1 && errno == ENOSYS) && !info->bug_shutdown) { - test_fail("shutdown() should have failed with ENOSYS"); + if (!(rc == -1 && errno == ENOTSOCK) && !info->bug_shutdown) { + test_fail("shutdown() should have failed with " + "ENOTSOCK"); } debug("test shutdown() with a socket that is not connected"); diff --git a/minix/tests/test56.c b/minix/tests/test56.c index 5819767cc..efe24b0e6 100644 --- a/minix/tests/test56.c +++ b/minix/tests/test56.c @@ -317,7 +317,9 @@ static void test_bind_unix(void) addr.sun_path[2] = 'o'; addr.sun_path[3] = '\0'; SOCKET(sd, PF_UNIX, SOCK_STREAM, 0); - rc = bind(sd, (struct sockaddr *) &addr, strlen(addr.sun_path) + 1); + rc = bind(sd, (struct sockaddr *) &addr, + offsetof(struct sockaddr_un, sun_path) + strlen(addr.sun_path) + + 1); if (rc == -1) { test_fail("bind() should have worked"); } diff --git a/minix/usr.bin/trace/call.c b/minix/usr.bin/trace/call.c index 4f06920e9..1479cfd5c 100644 --- a/minix/usr.bin/trace/call.c +++ b/minix/usr.bin/trace/call.c @@ -98,7 +98,7 @@ put_message(struct trace_proc * proc, const char * name, int flags, if (flags & PF_ALT) put_endpoint(proc, "m_source", m.m_source); - put_value(proc, "m_type", "%x", m.m_type); + put_value(proc, "m_type", "0x%x", m.m_type); put_close_struct(proc, FALSE /*all*/); } diff --git a/minix/usr.bin/trace/inc.h b/minix/usr.bin/trace/inc.h index 2c84069c0..2d71a5da5 100644 --- a/minix/usr.bin/trace/inc.h +++ b/minix/usr.bin/trace/inc.h @@ -17,6 +17,8 @@ #include #include +#include + #include "proc.h" #include "type.h" #include "proto.h" diff --git a/minix/usr.bin/trace/ioctl/net.c b/minix/usr.bin/trace/ioctl/net.c index 90be38735..8d7591c80 100644 --- a/minix/usr.bin/trace/ioctl/net.c +++ b/minix/usr.bin/trace/ioctl/net.c @@ -108,13 +108,9 @@ put_ipaddr(struct trace_proc * proc, const char * name, ipaddr_t ipaddr) { struct in_addr in; - if (!valuesonly) { - in.s_addr = ipaddr; + in.s_addr = ipaddr; - /* Is this an acceptable encapsulation? */ - put_value(proc, name, "[%s]", inet_ntoa(in)); - } else - put_value(proc, name, "0x%08x", ntohl(ipaddr)); + put_in_addr(proc, name, in); } static void @@ -193,97 +189,6 @@ static const struct flags udpopt_flags[] = { FLAG(NWUO_DI_IPOPT), }; -static void -put_family(struct trace_proc * proc, const char * name, int family) -{ - const char *text = NULL; - - if (!valuesonly) { - /* TODO: add all the other protocols */ - switch (family) { - TEXT(AF_UNSPEC); - TEXT(AF_LOCAL); - TEXT(AF_INET); - TEXT(AF_INET6); - } - } - - if (text != NULL) - put_field(proc, name, text); - else - put_value(proc, name, "%d", family); -} - -static const struct flags sock_type[] = { - FLAG_MASK(~SOCK_FLAGS_MASK, SOCK_STREAM), - FLAG_MASK(~SOCK_FLAGS_MASK, SOCK_DGRAM), - FLAG_MASK(~SOCK_FLAGS_MASK, SOCK_RAW), - FLAG_MASK(~SOCK_FLAGS_MASK, SOCK_RDM), - FLAG_MASK(~SOCK_FLAGS_MASK, SOCK_SEQPACKET), - FLAG(SOCK_CLOEXEC), - FLAG(SOCK_NONBLOCK), - FLAG(SOCK_NOSIGPIPE), -}; - -static void -put_shutdown_how(struct trace_proc * proc, const char * name, int how) -{ - const char *text = NULL; - - if (!valuesonly) { - switch (how) { - TEXT(SHUT_RD); - TEXT(SHUT_WR); - TEXT(SHUT_RDWR); - } - } - - if (text != NULL) - put_field(proc, name, text); - else - put_value(proc, name, "%d", how); -} - -static void -put_struct_uucred(struct trace_proc * proc, const char * name, int flags, - vir_bytes addr) -{ - struct uucred cred; - - if (!put_open_struct(proc, name, flags, addr, &cred, sizeof(cred))) - return; - - put_value(proc, "cr_uid", "%u", cred.cr_uid); - if (verbose > 0) { - put_value(proc, "cr_gid", "%u", cred.cr_gid); - if (verbose > 1) - put_value(proc, "cr_ngroups", "%d", cred.cr_ngroups); - put_groups(proc, "cr_groups", PF_LOCADDR, - (vir_bytes)&cred.cr_groups, cred.cr_ngroups); - } - - put_close_struct(proc, verbose > 0); -} - -static void -put_cmsg_type(struct trace_proc * proc, const char * name, int type) -{ - const char *text = NULL; - - if (!valuesonly) { - switch (type) { - TEXT(SCM_RIGHTS); - TEXT(SCM_CREDS); - TEXT(SCM_TIMESTAMP); - } - } - - if (text != NULL) - put_field(proc, name, text); - else - put_value(proc, name, "%d", type); -} - static void put_msg_control(struct trace_proc * proc, struct msg_control * ptr) { @@ -497,7 +402,7 @@ net_ioctl_arg(struct trace_proc * proc, unsigned long req, void * ptr, int dir) if ((sun = (struct sockaddr_un *)ptr) == NULL) return dir; - put_family(proc, "sun_family", sun->sun_family); + put_socket_family(proc, "sun_family", sun->sun_family); /* This could be extended to a generic sockaddr printer.. */ if (sun->sun_family == AF_LOCAL) { @@ -512,8 +417,7 @@ net_ioctl_arg(struct trace_proc * proc, unsigned long req, void * ptr, int dir) if (ptr == NULL) return dir; - put_flags(proc, NULL, sock_type, COUNT(sock_type), "0x%x", - *(int *)ptr); + put_socket_type(proc, NULL, *(int *)ptr); return IF_ALL; case NWIOSUDSSHUT: diff --git a/minix/usr.bin/trace/proto.h b/minix/usr.bin/trace/proto.h index ddd02baee..90333d391 100644 --- a/minix/usr.bin/trace/proto.h +++ b/minix/usr.bin/trace/proto.h @@ -100,10 +100,6 @@ extern int allnames; extern unsigned int verbose; extern unsigned int valuesonly; -/* vfs.c */ -void put_fd(struct trace_proc *proc, const char *name, int fd); -void put_dev(struct trace_proc *proc, const char *name, dev_t dev); - /* service */ const struct calls pm_calls; const struct calls vfs_calls; @@ -112,6 +108,17 @@ const struct calls mib_calls; const struct calls vm_calls; const struct calls ipc_calls; +/* service/vfs.c */ +void put_fd(struct trace_proc *proc, const char *name, int fd); +void put_dev(struct trace_proc *proc, const char *name, dev_t dev); +void put_in_addr(struct trace_proc *proc, const char *name, struct in_addr in); +void put_socket_type(struct trace_proc *proc, const char *name, int type); +void put_socket_family(struct trace_proc *proc, const char *name, int family); +void put_struct_uucred(struct trace_proc *proc, const char *name, int flags, + vir_bytes addr); +void put_cmsg_type(struct trace_proc *proc, const char *name, int type); +void put_shutdown_how(struct trace_proc *proc, const char *name, int how); + /* ioctl/block.c */ const char *block_ioctl_name(unsigned long req); int block_ioctl_arg(struct trace_proc *proc, unsigned long req, void *ptr, diff --git a/minix/usr.bin/trace/service/vfs.c b/minix/usr.bin/trace/service/vfs.c index ed4791dfc..938371ed2 100644 --- a/minix/usr.bin/trace/service/vfs.c +++ b/minix/usr.bin/trace/service/vfs.c @@ -6,6 +6,13 @@ #include #include #include +#include +#include +#include +#if 0 /* not yet, header is missing */ +#include +#endif +#include /* * This function should always be used when printing a file descriptor. It @@ -20,7 +27,7 @@ put_fd(struct trace_proc * proc, const char * name, int fd) } static int -vfs_read_out(struct trace_proc * proc, const message *m_out) +vfs_read_out(struct trace_proc * proc, const message * m_out) { put_fd(proc, "fd", m_out->m_lc_vfs_readwrite.fd); @@ -29,8 +36,8 @@ vfs_read_out(struct trace_proc * proc, const message *m_out) } static void -vfs_read_in(struct trace_proc * proc, const message *m_out, - const message *m_in, int failed) +vfs_read_in(struct trace_proc * proc, const message * m_out, + const message * m_in, int failed) { put_buf(proc, "buf", failed, m_out->m_lc_vfs_readwrite.buf, @@ -41,7 +48,7 @@ vfs_read_in(struct trace_proc * proc, const message *m_out, } static int -vfs_write_out(struct trace_proc * proc, const message *m_out) +vfs_write_out(struct trace_proc * proc, const message * m_out) { put_fd(proc, "fd", m_out->m_lc_vfs_readwrite.fd); @@ -1213,7 +1220,7 @@ put_statvfs_array(struct trace_proc * proc, const char * name, int flags, struct statvfs buf; int i, max; - if ((flags & PF_FAILED) || valuesonly || count < 0) { + if ((flags & PF_FAILED) || valuesonly > 1 || count < 0) { put_ptr(proc, name, addr); return; @@ -1356,6 +1363,1008 @@ vfs_gcov_flush_out(struct trace_proc * proc, const message * m_out) return CT_DONE; } +void +put_socket_family(struct trace_proc * proc, const char * name, int family) +{ + const char *text = NULL; + + if (!valuesonly) { + /* + * For socket(2) and socketpair(2) this should really be using + * the prefix "PF_" since those functions take a protocol + * family rather than an address family. This rule is applied + * fairly consistently within the system. Here I caved because + * I don't want to duplicate this entire function just for the + * one letter. There are exceptions however; some names only + * exist as "PF_". + */ + switch (family) { + TEXT(AF_UNSPEC); + TEXT(AF_LOCAL); + TEXT(AF_INET); + TEXT(AF_IMPLINK); + TEXT(AF_PUP); + TEXT(AF_CHAOS); + TEXT(AF_NS); + TEXT(AF_ISO); + TEXT(AF_ECMA); + TEXT(AF_DATAKIT); + TEXT(AF_CCITT); + TEXT(AF_SNA); + TEXT(AF_DECnet); + TEXT(AF_DLI); + TEXT(AF_LAT); + TEXT(AF_HYLINK); + TEXT(AF_APPLETALK); + TEXT(AF_OROUTE); + TEXT(AF_LINK); + TEXT(PF_XTP); + TEXT(AF_COIP); + TEXT(AF_CNT); + TEXT(PF_RTIP); + TEXT(AF_IPX); + TEXT(AF_INET6); + TEXT(PF_PIP); + TEXT(AF_ISDN); + TEXT(AF_NATM); + TEXT(AF_ARP); + TEXT(PF_KEY); + TEXT(AF_BLUETOOTH); + TEXT(AF_IEEE80211); + TEXT(AF_MPLS); + TEXT(AF_ROUTE); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%d", family); +} + +static const struct flags socket_types[] = { + FLAG_MASK(~SOCK_FLAGS_MASK, SOCK_STREAM), + FLAG_MASK(~SOCK_FLAGS_MASK, SOCK_DGRAM), + FLAG_MASK(~SOCK_FLAGS_MASK, SOCK_RAW), + FLAG_MASK(~SOCK_FLAGS_MASK, SOCK_RDM), + FLAG_MASK(~SOCK_FLAGS_MASK, SOCK_SEQPACKET), + FLAG_MASK(~SOCK_FLAGS_MASK, SOCK_CONN_DGRAM), + FLAG(SOCK_CLOEXEC), + FLAG(SOCK_NONBLOCK), + FLAG(SOCK_NOSIGPIPE), +}; + +void +put_socket_type(struct trace_proc * proc, const char * name, int type) +{ + + put_flags(proc, name, socket_types, COUNT(socket_types), "%d", type); +} + +static void +put_socket_protocol(struct trace_proc * proc, const char * name, int family, + int type, int protocol) +{ + const char *text = NULL; + + if (!valuesonly && (type == SOCK_RAW || protocol != 0)) { + switch (family) { + case PF_INET: + case PF_INET6: + /* TODO: is this all that is used in socket(2)? */ + switch (protocol) { + TEXT(IPPROTO_IP); + TEXT(IPPROTO_ICMP); + TEXT(IPPROTO_IGMP); + TEXT(IPPROTO_TCP); + TEXT(IPPROTO_UDP); + TEXT(IPPROTO_ICMPV6); + TEXT(IPPROTO_RAW); + } + break; +#if 0 /* not yet */ + case PF_BLUETOOTH: + switch (protocol) { + TEXT(BTPROTO_HCI); + TEXT(BTPROTO_L2CAP); + TEXT(BTPROTO_RFCOMM); + TEXT(BTPROTO_SCO); + } + break; +#endif + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%d", protocol); +} + +static int +vfs_socket_out(struct trace_proc * proc, const message * m_out) +{ + + put_socket_family(proc, "domain", m_out->m_lc_vfs_socket.domain); + put_socket_type(proc, "type", m_out->m_lc_vfs_socket.type); + put_socket_protocol(proc, "protocol", m_out->m_lc_vfs_socket.domain, + m_out->m_lc_vfs_socket.type & ~SOCK_FLAGS_MASK, + m_out->m_lc_vfs_socket.protocol); + + return CT_DONE; +} + +static int +vfs_socketpair_out(struct trace_proc * proc, const message * m_out) +{ + + put_socket_family(proc, "domain", m_out->m_lc_vfs_socket.domain); + put_socket_type(proc, "type", m_out->m_lc_vfs_socket.type); + put_socket_protocol(proc, "protocol", m_out->m_lc_vfs_socket.domain, + m_out->m_lc_vfs_socket.type & ~SOCK_FLAGS_MASK, + m_out->m_lc_vfs_socket.protocol); + + return CT_NOTDONE; +} + +static void +vfs_socketpair_in(struct trace_proc * proc, const message * m_out, + const message * m_in, int failed) +{ + + if (!failed) { + put_open(proc, "fd", PF_NONAME, "[", ", "); + put_fd(proc, "fd0", m_in->m_vfs_lc_fdpair.fd0); + put_fd(proc, "fd1", m_in->m_vfs_lc_fdpair.fd1); + put_close(proc, "]"); + } else + put_field(proc, "fd", "&.."); + put_equals(proc); + put_result(proc); +} + +void +put_in_addr(struct trace_proc * proc, const char * name, struct in_addr in) +{ + + if (!valuesonly) { + /* Is this an acceptable encapsulation? */ + put_value(proc, name, "[%s]", inet_ntoa(in)); + } else + put_value(proc, name, "0x%08x", ntohl(in.s_addr)); +} + +static void +put_in6_addr(struct trace_proc * proc, const char * name, struct in6_addr * in) +{ + char buf[INET6_ADDRSTRLEN]; + const char *ptr; + unsigned int i, n; + + if (!valuesonly && + (ptr = inet_ntop(AF_INET6, in, buf, sizeof(buf))) != NULL) { + put_value(proc, name, "[%s]", ptr); + } else { + for (i = n = 0; i < 16; i++) + n += snprintf(buf + n, sizeof(buf) - n, "%02x", + ((unsigned char *)in)[i]); + put_value(proc, name, "0x%s", buf); + } +} + +static void +put_struct_sockaddr(struct trace_proc * proc, const char * name, int flags, + vir_bytes addr, socklen_t addr_len) +{ + char buf[UCHAR_MAX + 1]; + uint8_t len; + sa_family_t family; + struct sockaddr sa; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; + int all, off, left; + + /* + * For UNIX domain sockets, make sure there's always room to add a + * trailing NULL byte, because UDS paths are not necessarily null + * terminated. + */ + if (addr_len < offsetof(struct sockaddr, sa_data) || + addr_len >= sizeof(buf)) { + put_ptr(proc, name, addr); + + return; + } + + if (!put_open_struct(proc, name, flags, addr, buf, addr_len)) + return; + + memcpy(&sa, buf, sizeof(sa)); + len = sa.sa_len; + family = sa.sa_family; + all = (verbose > 1); + + switch (family) { + case AF_LOCAL: + if (verbose > 1) + put_value(proc, "sun_len", "%u", len); + if (verbose > 0) + put_socket_family(proc, "sun_family", family); + off = (int)offsetof(struct sockaddr_un, sun_path); + left = addr_len - off; + if (left > 0) { + buf[addr_len] = 0; /* force null termination */ + put_buf(proc, "sun_path", PF_LOCADDR | PF_PATH, + (vir_bytes)&buf[off], + left + 1 /* include null byte */); + } + break; + case AF_INET: + if (verbose > 1) + put_value(proc, "sin_len", "%u", len); + if (verbose > 0) + put_socket_family(proc, "sin_family", family); + if (addr_len == sizeof(sin)) { + memcpy(&sin, buf, sizeof(sin)); + put_value(proc, "sin_port", "%u", ntohs(sin.sin_port)); + put_in_addr(proc, "sin_addr", sin.sin_addr); + } else + all = FALSE; + break; + case AF_INET6: + if (verbose > 1) + put_value(proc, "sin6_len", "%u", len); + if (verbose > 0) + put_socket_family(proc, "sin6_family", family); + if (addr_len == sizeof(sin6)) { + memcpy(&sin6, buf, sizeof(sin6)); + put_value(proc, "sin6_port", "%u", + ntohs(sin6.sin6_port)); + if (verbose > 1) + put_value(proc, "sin6_flowinfo", "%"PRIu32, + sin6.sin6_flowinfo); + put_in6_addr(proc, "sin6_addr", &sin6.sin6_addr); + if (IN6_IS_ADDR_LINKLOCAL(&sin6.sin6_addr) || + IN6_IS_ADDR_SITELOCAL(&sin6.sin6_addr) || + verbose > 0) + put_value(proc, "sin6_scope_id", "%"PRIu32, + sin6.sin6_scope_id); + } else + all = FALSE; + break; + /* TODO: support for other address families */ + default: + if (verbose > 1) + put_value(proc, "sa_len", "%u", len); + put_socket_family(proc, "sa_family", family); + all = (verbose > 1 && family == AF_UNSPEC); + } + + put_close_struct(proc, all); +} + +/* This function is shared between bind and connect. */ +static int +vfs_bind_out(struct trace_proc * proc, const message * m_out) +{ + + put_fd(proc, "fd", m_out->m_lc_vfs_sockaddr.fd); + put_struct_sockaddr(proc, "addr", 0, m_out->m_lc_vfs_sockaddr.addr, + m_out->m_lc_vfs_sockaddr.addr_len); + put_value(proc, "addr_len", "%u", m_out->m_lc_vfs_sockaddr.addr_len); + + return CT_DONE; +} + +static int +vfs_listen_out(struct trace_proc * proc, const message * m_out) +{ + + put_fd(proc, "fd", m_out->m_lc_vfs_listen.fd); + put_value(proc, "backlog", "%d", m_out->m_lc_vfs_listen.backlog); + + return CT_DONE; +} + +static int +vfs_accept_out(struct trace_proc * proc, const message * m_out) +{ + + put_fd(proc, "fd", m_out->m_lc_vfs_sockaddr.fd); + + return CT_NOTDONE; +} + +static void +vfs_accept_in(struct trace_proc * proc, const message * m_out, + const message * m_in, int failed) +{ + + put_struct_sockaddr(proc, "addr", failed, + m_out->m_lc_vfs_sockaddr.addr, m_in->m_vfs_lc_socklen.len); + /* + * We print the resulting address length rather than the given buffer + * size here, as we do in recvfrom, getsockname, getpeername, and (less + * explicitly) recvmsg. We could also print both, by adding the + * resulting length after the call result. + */ + if (m_out->m_lc_vfs_sockaddr.addr == 0) + put_field(proc, "addr_len", "NULL"); + else if (!failed) + put_value(proc, "addr_len", "{%u}", + m_in->m_vfs_lc_socklen.len); + else + put_field(proc, "addr_len", "&.."); + + put_equals(proc); + put_result(proc); +} + +static const struct flags msg_flags[] = { + FLAG(MSG_OOB), + FLAG(MSG_PEEK), + FLAG(MSG_DONTROUTE), + FLAG(MSG_EOR), + FLAG(MSG_TRUNC), + FLAG(MSG_CTRUNC), + FLAG(MSG_WAITALL), + FLAG(MSG_DONTWAIT), + FLAG(MSG_BCAST), + FLAG(MSG_MCAST), +#ifdef MSG_NOSIGNAL + FLAG(MSG_NOSIGNAL), +#endif + FLAG(MSG_CMSG_CLOEXEC), + FLAG(MSG_NBIO), + FLAG(MSG_WAITFORONE), +}; + +static int +vfs_sendto_out(struct trace_proc * proc, const message * m_out) +{ + + put_fd(proc, "fd", m_out->m_lc_vfs_sendrecv.fd); + put_buf(proc, "buf", 0, m_out->m_lc_vfs_sendrecv.buf, + m_out->m_lc_vfs_readwrite.len); + put_value(proc, "len", "%zu", m_out->m_lc_vfs_sendrecv.len); + put_flags(proc, "flags", msg_flags, COUNT(msg_flags), "0x%x", + m_out->m_lc_vfs_sendrecv.flags); + put_struct_sockaddr(proc, "addr", 0, m_out->m_lc_vfs_sendrecv.addr, + m_out->m_lc_vfs_sendrecv.addr_len); + put_value(proc, "addr_len", "%u", m_out->m_lc_vfs_sendrecv.addr_len); + + return CT_DONE; +} + +static void +put_struct_iovec(struct trace_proc * proc, const char * name, int flags, + vir_bytes addr, int len, ssize_t bmax) +{ + struct iovec iov; + size_t bytes; + int i, imax; + + /* + * For simplicity and clarity reasons, we currently print the I/O + * vector as an array of data elements rather than an array of + * structures. We also copy in each element separately, because as of + * writing there is no system support for more than one element anyway. + * All of this may be changed later. + */ + if ((flags & PF_FAILED) || valuesonly > 1 || addr == 0 || len < 0) { + put_ptr(proc, name, addr); + + return; + } + + if (len == 0 || bmax == 0) { + put_field(proc, name, "[]"); + + return; + } + + /* As per logic below, 'imax' must be set to a nonzero value here. */ + if (verbose == 0) + imax = 4; + else if (verbose == 1) + imax = 16; + else + imax = INT_MAX; + + for (i = 0; i < len && bmax > 0; i++) { + if (mem_get_data(proc->pid, addr, &iov, sizeof(iov)) < 0) { + if (i == 0) { + put_ptr(proc, name, addr); + + return; + } + + len = imax = 0; /* make put_tail() print an error */ + break; + } + + if (i == 0) + put_open(proc, name, 0, "[", ", "); + + bytes = MIN(iov.iov_len, (size_t)bmax); + + if (len < imax) + put_buf(proc, NULL, 0, (vir_bytes)iov.iov_base, bytes); + + addr += sizeof(struct iovec); + bmax -= bytes; + } + + if (imax == 0 || imax < len) + put_tail(proc, len, imax); + put_close(proc, "]"); +} + +void +put_struct_uucred(struct trace_proc * proc, const char * name, int flags, + vir_bytes addr) +{ + struct uucred cred; + + if (!put_open_struct(proc, name, flags, addr, &cred, sizeof(cred))) + return; + + put_value(proc, "cr_uid", "%u", cred.cr_uid); + if (verbose > 0) { + put_value(proc, "cr_gid", "%u", cred.cr_gid); + if (verbose > 1) + put_value(proc, "cr_ngroups", "%d", cred.cr_ngroups); + put_groups(proc, "cr_groups", PF_LOCADDR, + (vir_bytes)&cred.cr_groups, cred.cr_ngroups); + } + + put_close_struct(proc, verbose > 0); +} + +static void +put_socket_level(struct trace_proc * proc, const char * name, int level) +{ + + /* + * Unfortunately, the level is a domain-specific protocol number. That + * means that without knowing how the socket was created, we cannot + * tell what it means. The only thing we can print is SOL_SOCKET, + * which is the same across all domains. + */ + if (!valuesonly && level == SOL_SOCKET) + put_field(proc, name, "SOL_SOCKET"); + else + put_value(proc, name, "%d", level); +} + +void +put_cmsg_type(struct trace_proc * proc, const char * name, int type) +{ + const char *text = NULL; + + if (!valuesonly) { + switch (type) { + TEXT(SCM_RIGHTS); + TEXT(SCM_CREDS); + TEXT(SCM_TIMESTAMP); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%d", type); +} + +static void +put_cmsg_rights(struct trace_proc * proc, const char * name, char * buf, + size_t size, char * cptr, size_t chunk, vir_bytes addr, size_t len) +{ + unsigned int i, nfds; + int *ptr; + + put_open(proc, name, PF_NONAME, "[", ", "); + + /* + * Since file descriptors are important, we print them all, regardless + * of the current verbosity level. Start with the file descriptors + * that are already copied into the local buffer. + */ + ptr = (int *)cptr; + chunk = MIN(chunk, len); + + nfds = chunk / sizeof(int); + for (i = 0; i < nfds; i++) + put_fd(proc, NULL, ptr[i]); + + /* Then do the remaining file descriptors, in chunks. */ + size -= size % sizeof(int); + + for (len -= chunk; len >= sizeof(int); len -= chunk) { + chunk = MIN(len, size); + + if (mem_get_data(proc->pid, addr, buf, chunk) < 0) { + put_field(proc, NULL, ".."); + + break; + } + + ptr = (int *)buf; + nfds = chunk / sizeof(int); + for (i = 0; i < nfds; i++) + put_fd(proc, NULL, ptr[i]); + + addr += chunk; + } + + put_close(proc, "]"); +} + +static void +put_cmsg(struct trace_proc * proc, const char * name, vir_bytes addr, + size_t len) +{ + struct cmsghdr cmsg; + char buf[CMSG_SPACE(sizeof(struct uucred))]; + size_t off, chunk, datalen; + + if (valuesonly > 1 || addr == 0 || len < CMSG_LEN(0)) { + put_ptr(proc, name, addr); + + return; + } + + for (off = 0; off < len; off += CMSG_SPACE(datalen)) { + chunk = MIN(len - off, sizeof(buf)); + + if (chunk < CMSG_LEN(0)) + break; + + if (mem_get_data(proc->pid, addr + off, buf, chunk) < 0) { + if (off == 0) { + put_ptr(proc, name, addr); + + return; + } + break; + } + + if (off == 0) + put_open(proc, name, 0, "[", ", "); + + memcpy(&cmsg, buf, sizeof(cmsg)); + + put_open(proc, NULL, 0, "{", ", "); + if (verbose > 0) + put_value(proc, "cmsg_len", "%u", cmsg.cmsg_len); + put_socket_level(proc, "cmsg_level", cmsg.cmsg_level); + if (cmsg.cmsg_level == SOL_SOCKET) + put_cmsg_type(proc, "cmsg_type", cmsg.cmsg_type); + else + put_value(proc, "cmsg_type", "%d", cmsg.cmsg_type); + + if (cmsg.cmsg_len < CMSG_LEN(0) || off + cmsg.cmsg_len > len) { + put_tail(proc, 0, 0); + put_close(proc, "}"); + break; + } + + datalen = cmsg.cmsg_len - CMSG_LEN(0); + + if (cmsg.cmsg_level == SOL_SOCKET && + cmsg.cmsg_type == SCM_RIGHTS) { + put_cmsg_rights(proc, "cmsg_data", buf, sizeof(buf), + &buf[CMSG_LEN(0)], chunk - CMSG_LEN(0), + addr + off + chunk, datalen); + } else if (cmsg.cmsg_level == SOL_SOCKET && + cmsg.cmsg_type == SCM_CREDS && + datalen >= sizeof(struct uucred) && + chunk >= CMSG_LEN(datalen)) { + put_struct_uucred(proc, "cmsg_data", PF_LOCADDR, + (vir_bytes)&buf[CMSG_LEN(0)]); + } else if (datalen > 0) + put_field(proc, "cmsg_data", ".."); + + if (verbose == 0) + put_field(proc, NULL, ".."); + put_close(proc, "}"); + } + + if (off < len) + put_field(proc, NULL, ".."); + put_close(proc, "]"); +} + +static void +put_struct_msghdr(struct trace_proc * proc, const char * name, int flags, + vir_bytes addr, ssize_t max) +{ + struct msghdr msg; + int all; + + if (!put_open_struct(proc, name, flags, addr, &msg, sizeof(msg))) + return; + + all = TRUE; + + if (msg.msg_name != NULL || verbose > 1) { + put_struct_sockaddr(proc, "msg_name", 0, + (vir_bytes)msg.msg_name, msg.msg_namelen); + if (verbose > 0) + put_value(proc, "msg_namelen", "%u", msg.msg_namelen); + else + all = FALSE; + } else + all = FALSE; + + put_struct_iovec(proc, "msg_iov", 0, (vir_bytes)msg.msg_iov, + msg.msg_iovlen, max); + if (verbose > 0) + put_value(proc, "msg_iovlen", "%d", msg.msg_iovlen); + else + all = FALSE; + + if (msg.msg_control != NULL || verbose > 1) { + put_cmsg(proc, "msg_control", (vir_bytes)msg.msg_control, + msg.msg_controllen); + + if (verbose > 0) + put_value(proc, "msg_controllen", "%u", + msg.msg_controllen); + else + all = FALSE; + } else + all = FALSE; + + /* When receiving, print the flags field as well. */ + if (flags & PF_ALT) + put_flags(proc, "msg_flags", msg_flags, COUNT(msg_flags), + "0x%x", msg.msg_flags); + + put_close_struct(proc, all); +} + +static int +vfs_sendmsg_out(struct trace_proc * proc, const message * m_out) +{ + + put_fd(proc, "fd", m_out->m_lc_vfs_sockmsg.fd); + put_struct_msghdr(proc, "msg", 0, m_out->m_lc_vfs_sockmsg.msgbuf, + SSIZE_MAX); + put_flags(proc, "flags", msg_flags, COUNT(msg_flags), "0x%x", + m_out->m_lc_vfs_sockmsg.flags); + + return CT_DONE; +} + +static int +vfs_recvfrom_out(struct trace_proc * proc, const message * m_out) +{ + + put_fd(proc, "fd", m_out->m_lc_vfs_sendrecv.fd); + + return CT_NOTDONE; +} + +static void +vfs_recvfrom_in(struct trace_proc * proc, const message * m_out, + const message * m_in, int failed) +{ + + put_buf(proc, "buf", failed, m_out->m_lc_vfs_sendrecv.buf, + m_in->m_type); + put_value(proc, "len", "%zu", m_out->m_lc_vfs_sendrecv.len); + put_flags(proc, "flags", msg_flags, COUNT(msg_flags), "0x%x", + m_out->m_lc_vfs_sendrecv.flags); + put_struct_sockaddr(proc, "addr", failed, + m_out->m_lc_vfs_sendrecv.addr, m_in->m_vfs_lc_socklen.len); + if (m_out->m_lc_vfs_sendrecv.addr == 0) + put_field(proc, "addr_len", "NULL"); + else if (!failed) + put_value(proc, "addr_len", "{%u}", + m_in->m_vfs_lc_socklen.len); + else + put_field(proc, "addr_len", "&.."); + + put_equals(proc); + put_result(proc); +} + +static int +vfs_recvmsg_out(struct trace_proc * proc, const message * m_out) +{ + + put_fd(proc, "fd", m_out->m_lc_vfs_sockmsg.fd); + + return CT_NOTDONE; +} + +static void +vfs_recvmsg_in(struct trace_proc * proc, const message * m_out, + const message * m_in, int failed) +{ + + /* + * We choose to print only the resulting structure in this case. Doing + * so is easier and less messy than printing both the original and the + * result for the fields that are updated by the system (msg_namelen + * and msg_controllen); also, this approach is stateless. Admittedly + * it is not entirely consistent with many other parts of the trace + * output, though. + */ + put_struct_msghdr(proc, "msg", PF_ALT | failed, + m_out->m_lc_vfs_sockmsg.msgbuf, m_in->m_type); + put_flags(proc, "flags", msg_flags, COUNT(msg_flags), "0x%x", + m_out->m_lc_vfs_sockmsg.flags); + + put_equals(proc); + put_result(proc); +} + +static void +put_sockopt_name(struct trace_proc * proc, const char * name, int level, + int optname) +{ + const char *text = NULL; + + /* + * The only level for which we can know names is SOL_SOCKET. See also + * put_socket_level(). Of course we could guess, but then we need a + * proper guessing system, which should probably also take into account + * the [gs]etsockopt option length. TODO. + */ + if (!valuesonly && level == SOL_SOCKET) { + switch (optname) { + TEXT(SO_DEBUG); + TEXT(SO_ACCEPTCONN); + TEXT(SO_REUSEADDR); + TEXT(SO_KEEPALIVE); + TEXT(SO_DONTROUTE); + TEXT(SO_BROADCAST); + TEXT(SO_USELOOPBACK); + TEXT(SO_LINGER); + TEXT(SO_OOBINLINE); + TEXT(SO_REUSEPORT); + TEXT(SO_NOSIGPIPE); + TEXT(SO_TIMESTAMP); + TEXT(SO_PASSCRED); + TEXT(SO_PEERCRED); + TEXT(SO_SNDBUF); + TEXT(SO_RCVBUF); + TEXT(SO_SNDLOWAT); + TEXT(SO_RCVLOWAT); + TEXT(SO_ERROR); + TEXT(SO_TYPE); + TEXT(SO_OVERFLOWED); + TEXT(SO_NOHEADER); + TEXT(SO_SNDTIMEO); + TEXT(SO_RCVTIMEO); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "0x%x", optname); +} + +static void +put_sockopt_data(struct trace_proc * proc, const char * name, int flags, + int level, int optname, vir_bytes addr, socklen_t len) +{ + const char *text; + int i; + struct linger l; + struct uucred cr; + struct timeval tv; + void *ptr; + size_t size; + + /* See above regarding ambiguity for levels other than SOL_SOCKET. */ + if ((flags & PF_FAILED) || valuesonly > 1 || len == 0 || + level != SOL_SOCKET) { + put_ptr(proc, name, addr); + + return; + } + + /* Determine how much data to get, and where to put it. */ + switch (optname) { + case SO_DEBUG: + case SO_ACCEPTCONN: + case SO_REUSEADDR: + case SO_KEEPALIVE: + case SO_DONTROUTE: + case SO_BROADCAST: + case SO_USELOOPBACK: + case SO_OOBINLINE: + case SO_REUSEPORT: + case SO_NOSIGPIPE: + case SO_TIMESTAMP: + case SO_PASSCRED: + case SO_SNDBUF: + case SO_RCVBUF: + case SO_SNDLOWAT: + case SO_RCVLOWAT: + case SO_ERROR: + case SO_TYPE: + case SO_OVERFLOWED: + case SO_NOHEADER: + ptr = &i; + size = sizeof(i); + break; + case SO_LINGER: + ptr = &l; + size = sizeof(l); + break; + case SO_PEERCRED: + ptr = &cr; + size = sizeof(cr); + break; + case SO_SNDTIMEO: + case SO_RCVTIMEO: + ptr = &tv; + size = sizeof(tv); + break; + default: + put_ptr(proc, name, addr); + return; + } + + /* Get the data. Do not bother with truncated values. */ + if (len < size || mem_get_data(proc->pid, addr, ptr, size) < 0) { + put_ptr(proc, name, addr); + + return; + } + + /* Print the data according to the option name. */ + switch (optname) { + case SO_LINGER: + /* This isn't going to appear anywhere else; do it inline. */ + put_open(proc, name, 0, "{", ", "); + put_value(proc, "l_onoff", "%d", l.l_onoff); + put_value(proc, "l_linger", "%d", l.l_linger); + put_close(proc, "}"); + break; + case SO_PEERCRED: + put_struct_uucred(proc, name, PF_LOCADDR, (vir_bytes)&cr); + break; + case SO_ERROR: + put_open(proc, name, 0, "{", ", "); + if (!valuesonly && (text = get_error_name(i)) != NULL) + put_field(proc, NULL, text); + else + put_value(proc, NULL, "%d", i); + put_close(proc, "}"); + break; + case SO_TYPE: + put_open(proc, name, 0, "{", ", "); + put_socket_type(proc, NULL, i); + put_close(proc, "}"); + break; + case SO_SNDTIMEO: + case SO_RCVTIMEO: + put_struct_timeval(proc, name, PF_LOCADDR, (vir_bytes)&tv); + break; + default: + /* All other options are integer values. */ + put_value(proc, name, "{%d}", i); + } +} + +static int +vfs_setsockopt_out(struct trace_proc * proc, const message * m_out) +{ + int level, name; + + level = m_out->m_lc_vfs_sockopt.level; + name = m_out->m_lc_vfs_sockopt.name; + + put_fd(proc, "fd", m_out->m_lc_vfs_sockopt.fd); + put_socket_level(proc, "level", level); + put_sockopt_name(proc, "name", level, name); + put_sockopt_data(proc, "buf", 0, level, name, + m_out->m_lc_vfs_sockopt.buf, m_out->m_lc_vfs_sockopt.len); + put_value(proc, "len", "%u", m_out->m_lc_vfs_sockopt.len); + + return CT_DONE; +} + +static int +vfs_getsockopt_out(struct trace_proc * proc, const message * m_out) +{ + int level; + + level = m_out->m_lc_vfs_sockopt.level; + + put_fd(proc, "fd", m_out->m_lc_vfs_sockopt.fd); + put_socket_level(proc, "level", level); + put_sockopt_name(proc, "name", level, m_out->m_lc_vfs_sockopt.name); + + return CT_NOTDONE; +} + +static void +vfs_getsockopt_in(struct trace_proc * proc, const message * m_out, + const message * m_in, int failed) +{ + + put_sockopt_data(proc, "buf", failed, m_out->m_lc_vfs_sockopt.level, + m_out->m_lc_vfs_sockopt.name, m_out->m_lc_vfs_sockopt.buf, + m_in->m_vfs_lc_socklen.len); + /* + * For the length, we follow the same scheme as for addr_len pointers + * in accept() et al., in that we print the result only. We need not + * take into account that the given buffer is NULL as it must not be. + */ + if (!failed) + put_value(proc, "len", "%u", m_out->m_lc_vfs_sockopt.len); + else + put_field(proc, "len", "&.."); + + put_equals(proc); + put_result(proc); +} + +/* This function is shared between getsockname and getpeername. */ +static int +vfs_getsockname_out(struct trace_proc * proc, const message * m_out) +{ + + put_fd(proc, "fd", m_out->m_lc_vfs_sockaddr.fd); + + return CT_NOTDONE; +} + +static void +vfs_getsockname_in(struct trace_proc * proc, const message * m_out, + const message * m_in, int failed) +{ + + put_struct_sockaddr(proc, "addr", failed, + m_out->m_lc_vfs_sockaddr.addr, m_in->m_vfs_lc_socklen.len); + if (m_out->m_lc_vfs_sockaddr.addr == 0) + put_field(proc, "addr_len", "NULL"); + else if (!failed) + put_value(proc, "addr_len", "{%u}", + m_in->m_vfs_lc_socklen.len); + else + put_field(proc, "addr_len", "&.."); + + put_equals(proc); + put_result(proc); +} + +void +put_shutdown_how(struct trace_proc * proc, const char * name, int how) +{ + const char *text = NULL; + + if (!valuesonly) { + switch (how) { + TEXT(SHUT_RD); + TEXT(SHUT_WR); + TEXT(SHUT_RDWR); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%d", how); +} + +static int +vfs_shutdown_out(struct trace_proc * proc, const message * m_out) +{ + + put_fd(proc, "fd", m_out->m_lc_vfs_shutdown.fd); + put_shutdown_how(proc, "how", m_out->m_lc_vfs_shutdown.how); + + return CT_DONE; +} + #define VFS_CALL(c) [((VFS_ ## c) - VFS_BASE)] static const struct call_handler vfs_map[] = { @@ -1411,6 +2420,28 @@ static const struct call_handler vfs_map[] = { vfs_svrctl_in), VFS_CALL(GCOV_FLUSH) = HANDLER("gcov_flush", vfs_gcov_flush_out, default_in), + VFS_CALL(SOCKET) = HANDLER("socket", vfs_socket_out, default_in), + VFS_CALL(SOCKETPAIR) = HANDLER("socketpair", vfs_socketpair_out, + vfs_socketpair_in), + VFS_CALL(BIND) = HANDLER("bind", vfs_bind_out, default_in), + VFS_CALL(CONNECT) = HANDLER("connect", vfs_bind_out, default_in), + VFS_CALL(LISTEN) = HANDLER("listen", vfs_listen_out, default_in), + VFS_CALL(ACCEPT) = HANDLER("accept", vfs_accept_out, vfs_accept_in), + VFS_CALL(SENDTO) = HANDLER("sendto", vfs_sendto_out, default_in), + VFS_CALL(SENDMSG) = HANDLER("sendmsg", vfs_sendmsg_out, default_in), + VFS_CALL(RECVFROM) = HANDLER("recvfrom", vfs_recvfrom_out, + vfs_recvfrom_in), + VFS_CALL(RECVMSG) = HANDLER("recvmsg", vfs_recvmsg_out, + vfs_recvmsg_in), + VFS_CALL(SETSOCKOPT) = HANDLER("setsockopt", vfs_setsockopt_out, + default_in), + VFS_CALL(GETSOCKOPT) = HANDLER("getsockopt", vfs_getsockopt_out, + vfs_getsockopt_in), + VFS_CALL(GETSOCKNAME) = HANDLER("getsockname", vfs_getsockname_out, + vfs_getsockname_in), + VFS_CALL(GETPEERNAME) = HANDLER("getpeername", vfs_getsockname_out, + vfs_getsockname_in), + VFS_CALL(SHUTDOWN) = HANDLER("shutdown", vfs_shutdown_out, default_in), }; const struct calls vfs_calls = {