]> Zhao Yanbai Git Server - minix.git/commitdiff
LWIP - the lwip server
authorTomas Hruby <tom@minix3.org>
Thu, 7 Apr 2011 07:44:11 +0000 (07:44 +0000)
committerTomas Hruby <tom@minix3.org>
Thu, 7 Apr 2011 07:44:11 +0000 (07:44 +0000)
The server implements inet-like interface to vfs and drivers. The core
functionality is contained in the liblwip.

13 files changed:
servers/lwip/Makefile [new file with mode: 0644]
servers/lwip/driver.c [new file with mode: 0644]
servers/lwip/driver.h [new file with mode: 0644]
servers/lwip/eth.c [new file with mode: 0644]
servers/lwip/inet_config.c [new file with mode: 0644]
servers/lwip/inet_config.h [new file with mode: 0644]
servers/lwip/lwip.c [new file with mode: 0644]
servers/lwip/proto.h [new file with mode: 0644]
servers/lwip/raw_ip.c [new file with mode: 0644]
servers/lwip/socket.c [new file with mode: 0644]
servers/lwip/socket.h [new file with mode: 0644]
servers/lwip/tcp.c [new file with mode: 0644]
servers/lwip/udp.c [new file with mode: 0644]

diff --git a/servers/lwip/Makefile b/servers/lwip/Makefile
new file mode 100644 (file)
index 0000000..34afd7f
--- /dev/null
@@ -0,0 +1,26 @@
+# Makefile for inet.
+PROG=  lwip
+SRCS=  lwip.c          \
+       socket.c        \
+       driver.c        \
+       udp.c           \
+       tcp.c           \
+       raw_ip.c        \
+       inet_config.c   \
+       eth.c
+
+.PATH: ${.CURDIR}/generic
+
+DPADD+=        ${LIBDRIVER} ${LIBSYS} ${LIBUTIL}
+LDADD+=        -ldriver -lsys -lutil -ltimers -llwip
+
+MAN=
+
+BINDIR?= /usr/sbin
+
+CPPFLAGS+=  -I${.CURDIR} -D_MINIX -D_SYSTEM
+CPPFLAGS+= -I${.CURDIR}/../../lib/liblwip/include
+
+CFLAGS += -Wall -Wextra -std=c99
+
+.include <bsd.prog.mk>
diff --git a/servers/lwip/driver.c b/servers/lwip/driver.c
new file mode 100644 (file)
index 0000000..6299da7
--- /dev/null
@@ -0,0 +1,815 @@
+/*
+ * This file implements handling of meesagges send by drivers
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <minix/ipc.h>
+#include <minix/com.h>
+#include <minix/sysutil.h>
+#include <minix/safecopies.h>
+
+#include <net/ioctl.h>
+#include <net/gen/in.h>
+#include <net/gen/ip_io.h>
+#include <net/gen/route.h>
+#include <net/gen/ether.h>
+#include <net/gen/eth_io.h>
+
+#include <lwip/pbuf.h>
+#include <lwip/netif.h>
+#include <netif/etharp.h>
+
+#include "proto.h"
+#include "socket.h"
+#include "driver.h"
+
+#if 0
+#define debug_drv_print(str, ...) printf("LWIP %s:%d : " str "\n", \
+               __func__, __LINE__, ##__VA_ARGS__)
+#else
+#define debug_drv_print(...) debug_print(__VA_ARGS__)
+#endif
+
+#define RAW_BUF_SIZE   (32 << 10)
+
+static struct nic devices[MAX_DEVS];
+
+static ip_addr_t ip_addr_none = { IPADDR_NONE };
+extern endpoint_t lwip_ep;
+
+void nic_assign_driver(const char * dev_type,
+                       unsigned dev_num,
+                       const char * driver_name,
+                       unsigned instance,
+                       int is_default)
+{
+       struct nic * nic;
+
+       if (strcmp(dev_type, "eth") != 0) {
+               printf("LWIP : Cannot handle other than ethernet devices, "
+                               "ignoring '%s%d'\n", dev_type, dev_num);
+               return;
+       }
+
+       nic = &devices[dev_num];
+       snprintf(nic->name, NIC_NAME_LEN, "%s%d", dev_type, dev_num);
+       nic->name[NIC_NAME_LEN - 1] = '\0';
+       snprintf(nic->drv_name, DRV_NAME_LEN, "%s_%d", driver_name, instance);
+       nic->drv_name[DRV_NAME_LEN - 1] = '\0';
+       nic->is_default = is_default;
+       nic->netif.name[0] = 'e';
+       nic->netif.name[1] = 't';
+       nic->netif.num = dev_num;
+
+       debug_print("/dev/%s driven by %s default = %d",
+                       nic->name, nic->drv_name, is_default);
+}
+
+static struct nic * lookup_nic_by_drv_ep(endpoint_t ep)
+{
+       int i;
+
+       for (i = 0; i < MAX_DEVS; i++) {
+               if (devices[i].drv_ep == ep)
+                       return &devices[i];
+       }
+
+       return NULL;
+}
+
+static struct nic * lookup_nic_by_drv_name(const char * name)
+{
+       int i;
+
+       for (i = 0; i < MAX_DEVS; i++) {
+               if (strcmp(devices[i].drv_name, name) == 0)
+                       return &devices[i];
+       }
+
+       return NULL;
+}
+
+static struct nic * lookup_nic_default(void)
+{
+       int i;
+
+       for (i = 0; i < MAX_DEVS; i++) {
+               if (devices[i].is_default)
+                       return &devices[i];
+       }
+
+       return NULL;
+}
+
+void nic_init_all(void)
+{
+       int i;
+       unsigned g;
+
+       for (i = 0; i < MAX_DEVS; i++) {
+               devices[i].drv_ep = NONE;
+               devices[i].is_default = 0;
+
+               if (cpf_getgrants(&devices[i].rx_iogrant, 1) != 1)
+                       panic("Cannot initialize grants");
+               if (cpf_getgrants(&devices[i].rx_iovec[0].iov_grant, 1) != 1)
+                       panic("Cannot initialize grants");
+               if (cpf_getgrants(&devices[i].tx_iogrant, 1) != 1)
+                       panic("Cannot initialize grants");
+               for (g = 0; g < TX_IOVEC_NUM; g++) {
+                       cp_grant_id_t * gid = &devices[i].tx_iovec[g].iov_grant;
+                       if (cpf_getgrants(gid, 1) != 1)
+                               panic("Cannot initialize grants");
+               }
+               devices[i].raw_socket = NULL;
+       }
+}
+
+static void driver_setup_read(struct nic * nic)
+{
+       message m;
+
+       debug_print("device /dev/%s", nic->name);
+       //assert(nic->rx_pbuf == NULL);
+       if (!(nic->rx_pbuf == NULL)) {
+               panic("device /dev/%s rx_pbuf %p", nic->name, nic->rx_pbuf);
+       }
+
+       if (!(nic->rx_pbuf = pbuf_alloc(PBUF_RAW, ETH_MAX_PACK_SIZE + ETH_CRC_SIZE, PBUF_RAM)))
+               panic("Cannot allocate rx pbuf");
+
+       if (cpf_setgrant_direct(nic->rx_iovec[0].iov_grant,
+                               nic->drv_ep, (vir_bytes) nic->rx_pbuf->payload,
+                               nic->rx_pbuf->len, CPF_WRITE) != OK)
+               panic("Failed to set grant");
+       nic->rx_iovec[0].iov_size = nic->rx_pbuf->len;
+
+       m.m_type = DL_READV_S;
+       m.DL_ENDPT = lwip_ep;
+       m.DL_COUNT = 1;
+       m.DL_GRANT = nic->rx_iogrant;
+
+       if (asynsend(nic->drv_ep, &m) != OK)
+               panic("asynsend to the driver failed!");
+}
+
+static void nic_up(struct nic * nic, message * m)
+{
+       memcpy(nic->netif.hwaddr, m->DL_HWADDR, NETIF_MAX_HWADDR_LEN);
+
+       debug_print("device %s is up MAC : %02x:%02x:%02x:%02x:%02x:%02x",
+                       nic->name,
+                       nic->netif.hwaddr[0],
+                       nic->netif.hwaddr[1],
+                       nic->netif.hwaddr[2],
+                       nic->netif.hwaddr[3],
+                       nic->netif.hwaddr[4],
+                       nic->netif.hwaddr[5]);
+
+       driver_setup_read(nic);
+
+       netif_set_link_up(&nic->netif);
+       netif_set_up(&nic->netif);
+}
+
+int driver_tx(struct nic * nic)
+{
+       struct packet_q * pkt;
+       unsigned len;
+       message m;
+
+       int err;
+
+       debug_print("device /dev/%s", nic->name);
+       assert(nic->tx_buffer);
+
+       pkt = driver_tx_head(nic);
+       if (pkt == NULL) {
+               debug_print("no packets enqueued");
+               return 0;
+       }
+
+       assert(pkt->buf_len <= nic->max_pkt_sz);
+       
+       if ((len = pkt->buf_len) < nic->min_pkt_sz)
+               len = nic->min_pkt_sz;
+       err = cpf_setgrant_direct(nic->tx_iovec[0].iov_grant,
+                       nic->drv_ep, (vir_bytes) pkt->buf,
+                       len, CPF_READ);
+       debug_print("packet len %d", len);
+       if (err != OK)
+               panic("Failed to set grant");
+       nic->tx_iovec[0].iov_size = len;
+       
+       if (cpf_setgrant_direct(nic->tx_iogrant, nic->drv_ep,
+                       (vir_bytes) &nic->tx_iovec,
+                       sizeof(iovec_s_t), CPF_READ) != OK)
+               panic("Failed to set grant");
+
+       m.m_type = DL_WRITEV_S;
+       m.DL_ENDPT = lwip_ep;
+       m.DL_COUNT = 1;
+       m.DL_GRANT = nic->tx_iogrant;
+
+       if (asynsend(nic->drv_ep, &m) != OK)
+               panic("asynsend to the driver failed!");
+       nic->state = DRV_SENDING;
+       
+       debug_print("packet sent to driver");
+
+       return 1;
+}
+
+static void nic_pkt_sent(struct nic * nic)
+{
+       debug_print("device /dev/%s", nic->name);
+       assert(nic->state != DRV_IDLE);
+
+       /* packet has been sent, we are not intereted anymore */
+       driver_tx_dequeue(nic);
+       /*
+        * Try to transmit the next packet. Failure means that no packet is
+        * enqueued and thus the device is entering idle state
+        */
+       if (!driver_tx(nic))
+               nic->state = DRV_IDLE;
+}
+
+__unused static void print_pkt(unsigned char * pkt, int len)
+{
+       int i = 0;
+
+       printf("--- PKT ---\n");
+
+       while (i < len) {
+               int x;
+
+               for (x = 0; x < 8 && i < len; x++, i++)
+                       printf("%02x ", pkt[i]);
+
+               kputc(' ');
+
+               for (x = 0; x < 8 && i < len; x++, i++)
+                       printf("%02x ", pkt[i]);
+
+               kputc('\n');
+       }
+
+       printf("--- PKT END ---\n");
+}
+
+static int raw_receive(message * m,
+                       struct pbuf *pbuf)
+{
+       struct pbuf * p;
+       unsigned rem_len = m->COUNT;
+       unsigned written = 0;
+       int err;
+
+       debug_print("user buffer size : %d\n", rem_len);
+
+       for (p = pbuf; p && rem_len; p = p->next) {
+               size_t cp_len;
+
+               cp_len = (rem_len < p->len) ? rem_len : p->len;
+               err = copy_to_user(m->IO_ENDPT, p->payload, cp_len,
+                               (cp_grant_id_t) m->IO_GRANT,
+                               written);
+
+               if (err != OK)
+                       return err;
+
+               written += cp_len;
+               rem_len -= cp_len;
+       }
+
+       debug_print("copied %d bytes\n", written);
+       return written;
+}
+
+int raw_socket_input(struct pbuf * pbuf, struct nic * nic)
+{
+       struct socket * sock;
+       struct pbuf * pbuf_new;
+
+       if ((sock = nic->raw_socket) == NULL)
+               return 0;
+
+       debug_print("socket num : %ld", get_sock_num(sock));
+
+       if (sock->flags & SOCK_FLG_OP_PENDING) {
+               int ret;
+               /* we are resuming a suspended operation */
+               ret = raw_receive(&sock->mess, pbuf);
+
+               if (ret > 0) {
+                       sock_revive(sock, ret);
+                       sock->flags &= ~SOCK_FLG_OP_PENDING;
+                       return 0;
+               } else {
+                       sock_revive(sock, ret);
+                       sock->flags &= ~SOCK_FLG_OP_PENDING;
+               }
+       }
+
+       /* Do not enqueue more data than allowed */
+       if (sock->recv_data_size > RAW_BUF_SIZE) {
+               return 0;
+       }
+
+       /*
+        * nobody is waiting for the data or an error occured above, we enqueue
+        * the packet. We store a copy of this packet
+        */
+       pbuf_new = pbuf_alloc(PBUF_RAW, pbuf->tot_len, PBUF_RAM);
+       if (pbuf_new == NULL) {
+               debug_print("LWIP : cannot allocated new pbuf\n");
+               return 0;
+       }
+
+       if (pbuf_copy(pbuf_new, pbuf) != ERR_OK) {
+               debug_print("LWIP : cannot copy pbuf\n");
+               return 0;
+       }
+
+       /*
+        * If we didn't managed to enqueue the packet we report it as not
+        * consumed
+        */
+       if (sock_enqueue_data(sock, pbuf_new, pbuf_new->tot_len) != OK) {
+               pbuf_free(pbuf_new);
+       }
+
+       return 0;
+}
+
+static void nic_pkt_received(struct nic * nic, unsigned size)
+{
+       assert(nic->netif.input);
+
+#if 0
+       print_pkt((unsigned char *) nic->rx_pbuf->payload, 64 /*nic->rx_pbuf->len */);
+#endif
+       
+       assert(nic->rx_pbuf->tot_len == nic->rx_pbuf->len);
+       nic->rx_pbuf->tot_len = nic->rx_pbuf->len = size - ETH_CRC_SIZE;
+
+       nic->netif.input(nic->rx_pbuf, &nic->netif);
+       nic->rx_pbuf = NULL;
+       driver_setup_read(nic);
+}
+
+void driver_request(message * m)
+{
+       struct nic * nic;
+
+       if ((nic = lookup_nic_by_drv_ep(m->m_source)) == NULL) {
+               printf("LWIP : request from unknown driver %d\n", m->m_source);
+               return;
+       }
+
+       switch (m->m_type) {
+       case DL_CONF_REPLY:
+               if (m->DL_STAT == OK)
+                       nic_up(nic, m);
+               break;
+       case DL_TASK_REPLY:
+               /*
+               if (!(m->DL_FLAGS & DL_PACK_SEND) && !(m->DL_FLAGS & DL_PACK_RECV)) {
+                       printf("void reply from driver\n");
+                       break;
+               }
+               */
+               if (m->DL_FLAGS & DL_PACK_SEND)
+                       nic_pkt_sent(nic);
+               if (m->DL_FLAGS & DL_PACK_RECV)
+                       nic_pkt_received(nic, m->DL_COUNT);
+               break;
+       case DL_STAT_REPLY:
+               break;
+       default:
+               printf("LWIP : unexpected request %d from driver %d\n",
+                                               m->m_type, m->m_source);
+       }
+}
+
+void driver_up(const char * label, endpoint_t ep)
+{
+       struct nic * nic;
+
+       nic = lookup_nic_by_drv_name(label);
+       
+       if (nic) {
+               debug_print("LWIP : driver '%s' / %d is up for /dev/%s\n",
+                               label, ep, nic->name);
+               nic->drv_ep = ep;
+       } else
+               printf("LWIP : WARNING unexpected driver '%s' up event\n",
+                                                               label);
+
+       nic->state = DRV_IDLE;
+
+       if (!netif_add(&nic->netif, &ip_addr_none, &ip_addr_none, &ip_addr_none,
+                               nic, ethernetif_init, ethernet_input)) {
+               printf("LWIP : failed to add device /dev/%s\n", nic->name);
+               nic->drv_ep = NONE;
+       }
+       if (nic->is_default)
+               netif_set_default(&nic->netif);
+
+       /* FIXME we support ethernet only, 2048 is safe */
+       nic->tx_buffer = debug_malloc(2048);
+       if (nic->tx_buffer == NULL)
+               panic("Cannot allocate tx_buffer");
+
+       /* prepare the RX grant once and forever */
+       if (cpf_setgrant_direct(nic->rx_iogrant,
+                               nic->drv_ep,
+                               (vir_bytes) &nic->rx_iovec,
+                               1 * sizeof(iovec_s_t), CPF_READ) != OK)
+               panic("Failed to set grant");
+}
+
+static void raw_recv_free(__unused void * data)
+{
+       pbuf_free((struct pbuf *) data);
+}
+
+static void nic_op_close(struct socket * sock, __unused message * m)
+{
+       struct nic * nic = (struct nic *)sock->data;
+
+       debug_drv_print("socket %d", get_sock_num(sock));
+       
+       sock_dequeue_data_all(sock, raw_recv_free);
+       sock->ops = NULL;
+
+       if (nic->raw_socket == sock) {
+               nic->raw_socket = NULL;
+               debug_drv_print("no active raw sock at %s", nic->name);
+       }
+
+       sock_reply(sock, OK);
+}
+
+static void nic_ioctl_set_conf(__unused struct socket * sock,
+                               struct nic * nic,
+                               message * m)
+{
+       nwio_ipconf_t ipconf;
+       int err;
+
+       err = copy_from_user(m->IO_ENDPT, &ipconf, sizeof(ipconf),
+                               (cp_grant_id_t) m->IO_GRANT, 0);
+       if (err != OK)
+               send_reply(m, err);
+
+       if (ipconf.nwic_flags & NWIC_IPADDR_SET)
+               netif_set_ipaddr(&nic->netif,
+                               (ip_addr_t *)&ipconf.nwic_ipaddr);
+       if (ipconf.nwic_flags & NWIC_NETMASK_SET)
+               netif_set_netmask(&nic->netif,
+                               (ip_addr_t *)&ipconf.nwic_netmask);
+       nic->flags = ipconf.nwic_flags;
+       if (nic->flags & NWEO_EN_BROAD)
+               nic->netif.flags |= NETIF_FLAG_BROADCAST;
+       
+       send_reply(m, OK);
+}
+
+static void nic_ioctl_get_conf(__unused struct socket * sock,
+                               struct nic * nic,
+                               message * m)
+{
+       nwio_ipconf_t ipconf;
+       int err;
+
+       ipconf.nwic_flags = nic->flags;
+       ipconf.nwic_ipaddr = nic->netif.ip_addr.addr;
+       ipconf.nwic_netmask = nic->netif.netmask.addr;
+       ipconf.nwic_mtu = nic->netif.mtu;
+       
+       err = copy_to_user(m->IO_ENDPT, &ipconf, sizeof(ipconf),
+                               (cp_grant_id_t) m->IO_GRANT, 0);
+       if (err != OK)
+               send_reply(m, err);
+
+       send_reply(m, OK);
+}
+
+static void nic_ioctl_set_gateway(__unused struct socket * sock,
+                               struct nic * nic,
+                               message * m)
+{
+       nwio_route_t route;
+       int err;
+
+       err = copy_from_user(m->IO_ENDPT, &route, sizeof(route),
+                               (cp_grant_id_t) m->IO_GRANT, 0);
+       if (err != OK)
+               send_reply(m, err);
+
+       netif_set_gw(&nic->netif, (ip_addr_t *)&route.nwr_gateway);
+       
+       send_reply(m, OK);
+}
+
+static void nic_ioctl_get_ethstat(__unused struct socket * sock,
+                               struct nic * nic,
+                               message * m)
+{
+       int err;
+       nwio_ethstat_t ethstat;
+
+       debug_drv_print("device /dev/%s", nic->name);
+       /*
+        * The device is not up yet, there is nothing to report or it is not
+        * an ethernet device
+        */
+       if (!nic->netif.flags & NETIF_FLAG_UP ||
+                       !(nic->netif.flags & (NETIF_FLAG_ETHERNET |
+                               NETIF_FLAG_ETHARP))) {
+               printf("LWIP no such device FUCK\n");
+               send_reply(m, ENODEV);
+               return;
+       }
+
+       memset(&ethstat, 0, sizeof(ethstat));
+       memcpy(&ethstat.nwes_addr, nic->netif.hwaddr, 6);
+       
+       err = copy_to_user(m->IO_ENDPT, &ethstat, sizeof(ethstat),
+                               (cp_grant_id_t) m->IO_GRANT, 0);
+       if (err != OK)
+               send_reply(m, err);
+
+       send_reply(m, OK);
+}
+
+static void nic_ioctl_set_ethopt(struct socket * sock,
+                               struct nic * nic,
+                               message * m)
+{
+       int err;
+       nwio_ethopt_t ethopt;
+
+       assert(nic);
+
+       if (!sock) {
+               send_reply(m, EINVAL);
+               return;
+       }
+
+       debug_drv_print("device /dev/%s", nic->name);
+       /*
+        * The device is not up yet, there is nothing to report or it is not
+        * an ethernet device
+        */
+       if (!nic->netif.flags & NETIF_FLAG_UP ||
+                       !(nic->netif.flags & (NETIF_FLAG_ETHERNET |
+                               NETIF_FLAG_ETHARP))) {
+               send_reply(m, ENODEV);
+               return;
+       }
+
+       err = copy_from_user(m->IO_ENDPT, &ethopt, sizeof(ethopt),
+                               (cp_grant_id_t) m->IO_GRANT, 0);
+       if (err != OK)
+               send_reply(m, err);
+
+       /* we want to get data from this sock */
+       if (ethopt.nweo_flags & NWEO_COPY) {
+               if (nic->raw_socket) {
+                       send_reply(m, EBUSY);
+                       return;
+               }
+
+               nic->raw_socket = sock;
+               debug_drv_print("active raw sock %d at %s",
+                               get_sock_num(sock), nic->name);
+       }
+
+       send_reply(m, OK);
+}
+
+static void nic_do_ioctl(struct socket * sock, struct nic * nic, message * m)
+{
+       debug_print("device /dev/%s req %c %d %d",
+                       nic->name,
+                       (m->REQUEST >> 8) & 0xff,
+                       m->REQUEST & 0xff,
+                       (m->REQUEST >> 16) & _IOCPARM_MASK);
+       
+       debug_drv_print("socket %d", sock ? get_sock_num(sock) : -1);
+
+       switch (m->REQUEST) {
+       case NWIOSIPCONF:
+               nic_ioctl_set_conf(sock, nic, m);
+               break;
+       case NWIOGIPCONF:
+               nic_ioctl_get_conf(sock, nic, m);
+               break;
+       case NWIOSIPOROUTE:
+               nic_ioctl_set_gateway(sock, nic, m);
+               break;
+       case NWIOGETHSTAT:
+               nic_ioctl_get_ethstat(sock, nic, m);
+               break;
+       case NWIOSETHOPT:
+               nic_ioctl_set_ethopt(sock, nic, m);
+               break;
+       default:
+               send_reply(m, EBADIOCTL);
+               return;
+       }
+}
+
+void nic_default_ioctl(message *m)
+{
+       struct nic * nic = lookup_nic_default();
+
+       if (nic == NULL) {
+               debug_print("No default nic, reporting error");
+               send_reply(m, EBADIOCTL);
+               return;
+       }
+
+       nic_do_ioctl(NULL, nic, m);
+}
+
+static void nic_op_ioctl(struct socket * sock, message * m)
+{
+       nic_do_ioctl(sock, (struct nic *)sock->data, m);
+}
+
+static void nic_op_read(struct socket * sock, message * m)
+{
+       debug_drv_print("sock num %d", get_sock_num(sock));
+
+       if (sock->recv_head) {
+               /* data available receive immeditely */
+
+               struct pbuf * pbuf;
+               int ret;
+
+               pbuf = sock->recv_head->data;
+
+               ret = raw_receive(m, pbuf);
+
+               if (ret > 0) {
+                       sock_dequeue_data(sock);
+                       sock->recv_data_size -= pbuf->tot_len;
+                       pbuf_free(pbuf);
+               }
+               sock_reply(sock, ret);
+       } else {
+               /* store the message so we know how to reply */
+               sock->mess = *m;
+               /* operation is being processes */
+               sock->flags |= SOCK_FLG_OP_PENDING;
+
+               debug_print("no data to read, suspending");
+               sock_reply(sock, SUSPEND);
+       }
+}
+
+static void nic_op_write(struct socket * sock, message * m)
+{
+       int ret;
+       struct pbuf * pbuf;
+       struct nic * nic = (struct nic *)sock->data;
+
+       assert(nic);
+       debug_print("device %s data size %d", nic->name,
+                       get_sock_num(sock), m->COUNT);
+
+       pbuf = pbuf_alloc(PBUF_RAW, m->COUNT, PBUF_RAM);
+       if (!pbuf) {
+               ret = ENOMEM;
+               goto write_err;
+       }
+
+       if ((ret = copy_from_user(m->IO_ENDPT, pbuf->payload, m->COUNT,
+                               (cp_grant_id_t) m->IO_GRANT, 0)) != OK) {
+               pbuf_free(pbuf);
+               goto write_err;
+       }
+
+       if ((ret = nic->netif.linkoutput(&nic->netif, pbuf) != ERR_OK)) {
+               debug_print("raw linkoutput failed %d", ret);
+               ret = EIO;
+       } else
+               ret = m->COUNT;
+       
+
+       pbuf_free(pbuf);
+       
+write_err:
+       sock_reply(sock, ret);
+}
+
+static struct sock_ops nic_ops = {
+       .write          = nic_op_write,
+       .read           = nic_op_read,
+       .close          = nic_op_close,
+       .ioctl          = nic_op_ioctl,
+       .select         = generic_op_select,
+       .select_reply   = generic_op_select_reply
+};
+
+void nic_open(message *m)
+{
+       struct socket * sock;
+
+       debug_print("device %d", m->DEVICE);
+
+       if (m->DEVICE > MAX_DEVS || devices[m->DEVICE].drv_ep == NONE) {
+               send_reply(m, ENODEV);
+               return;
+       }
+
+       sock = get_unused_sock();
+
+       if (sock == NULL) {
+               send_reply(m, ENODEV);
+               return;
+       }
+       if (sock->ops != NULL) {
+               send_reply(m, EBUSY);
+               return;
+       }
+
+       sock->ops = &nic_ops;
+       sock->select_ep = NONE;
+       sock->recv_data_size = 0;
+       sock->data = &devices[m->DEVICE];
+
+       send_reply(m, get_sock_num(sock));
+}
+
+static int driver_pkt_enqueue(struct packet_q ** head,
+                               struct packet_q ** tail,
+                               struct pbuf * pbuf)
+{
+       struct packet_q * pkt;
+       char * b;
+
+       pkt = (struct packet_q *) malloc(sizeof(struct packet_q) + pbuf->tot_len);
+       if (!pkt)
+               return ENOMEM;
+
+       pkt->next = NULL;
+       pkt->buf_len = pbuf->tot_len;
+       
+       for (b = pkt->buf; pbuf; pbuf = pbuf->next) {
+               memcpy(b, pbuf->payload, pbuf->len);
+               b += pbuf->len;
+       }
+
+       if (*head == NULL)
+               *head = *tail = pkt;
+       else {
+               (*tail)->next = pkt;
+               *tail = pkt;
+       }
+
+       return OK;
+}
+
+int driver_tx_enqueue(struct nic * nic, struct pbuf * pbuf)
+{
+       debug_print("device /dev/%s", nic->name);
+       return driver_pkt_enqueue(&nic->tx_head, &nic->tx_tail, pbuf);
+}
+
+static void driver_pkt_dequeue(struct packet_q ** head,
+                                       struct packet_q ** tail)
+{
+       struct packet_q * pkt;
+
+       /* we always dequeue only if there is something to dequeue */
+       assert(*head);
+
+       pkt = *head;
+
+       if ((*head = pkt->next) == NULL)
+               *tail = NULL;
+
+       debug_free(pkt);
+}
+
+void driver_tx_dequeue(struct nic * nic)
+{
+       debug_print("device /dev/%s", nic->name);
+       driver_pkt_dequeue(&nic->tx_head, &nic->tx_tail);
+}
+
+struct packet_q * driver_tx_head(struct nic * nic)
+{
+       debug_print("device /dev/%s", nic->name);
+
+       if (!nic->tx_head)
+               return NULL;
+       return nic->tx_head;
+}
diff --git a/servers/lwip/driver.h b/servers/lwip/driver.h
new file mode 100644 (file)
index 0000000..01d4d3b
--- /dev/null
@@ -0,0 +1,56 @@
+#ifndef __LWIP_DRIVER_H_
+#define __LWIP_DRIVER_H_
+
+#include <minix/endpoint.h>
+#include <minix/ds.h>
+
+#include <lwip/pbuf.h>
+
+#define NIC_NAME_LEN   6
+#define DRV_NAME_LEN   DS_MAX_KEYLEN
+
+#define TX_IOVEC_NUM   16 /* something the drivers assume */
+
+struct packet_q {
+       struct packet_q *       next;
+       unsigned                buf_len;
+       char                    buf[];
+};
+
+#define DRV_IDLE       0
+#define DRV_SENDING    1
+#define DRV_RECEIVING  2
+
+struct nic {
+       unsigned                flags;
+       char                    name[NIC_NAME_LEN];
+       char                    drv_name[DRV_NAME_LEN];
+       endpoint_t              drv_ep;
+       int                     is_default;
+       int                     state;
+       cp_grant_id_t           rx_iogrant;
+       iovec_s_t               rx_iovec[1];
+       struct pbuf *           rx_pbuf;
+       cp_grant_id_t           tx_iogrant;
+       iovec_s_t               tx_iovec[TX_IOVEC_NUM];
+       struct packet_q *       tx_head;
+       struct packet_q *       tx_tail;
+       void *                  tx_buffer;
+       struct netif            netif;
+       unsigned                max_pkt_sz;
+       unsigned                min_pkt_sz;
+       struct socket           * raw_socket;
+};
+
+int driver_tx_enqueue(struct nic * nic, struct pbuf * pbuf);
+void driver_tx_dequeue(struct nic * nic);
+struct packet_q * driver_tx_head(struct nic * nic);
+
+/*
+ * Transmit the next packet in the TX queue of this device. Returns 1 if
+ * success, 0 otherwise.
+ */
+int driver_tx(struct nic * nic);
+int raw_socket_input(struct pbuf * pbuf, struct nic * nic);
+
+#endif /* __LWIP_DRIVER_H_ */
diff --git a/servers/lwip/eth.c b/servers/lwip/eth.c
new file mode 100644 (file)
index 0000000..9e15bbf
--- /dev/null
@@ -0,0 +1,136 @@
+/**
+ * @file
+ * Ethernet Interface Skeleton
+ *
+ */
+
+/*
+ * Copyright (c) 2001-2004 Swedish Institute of Computer Science.
+ * All rights reserved. 
+ * 
+ * Redistribution and use in source and binary forms, with or without modification, 
+ * are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission. 
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED 
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 
+ * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY 
+ * OF SUCH DAMAGE.
+ *
+ * This file is part of the lwIP TCP/IP stack.
+ * 
+ * Author: Adam Dunkels <adam@sics.se>
+ *
+ */
+
+/*
+ * This file is a skeleton for developing Ethernet network interface
+ * drivers for lwIP. Add code to the low_level functions and do a
+ * search-and-replace for the word "ethernetif" to replace it with
+ * something that better describes your network interface.
+ */
+
+#include <minix/sysutil.h>
+
+#include "lwip/opt.h"
+
+#include "lwip/def.h"
+#include "lwip/mem.h"
+#include "lwip/pbuf.h"
+#include "lwip/sys.h"
+#include <lwip/stats.h>
+#include <lwip/snmp.h>
+#include <netif/etharp.h>
+
+#include <net/gen/ether.h>
+#include <net/gen/eth_io.h>
+
+#include "proto.h"
+#include "driver.h"
+
+extern endpoint_t lwip_ep;
+
+static err_t low_level_output(__unused struct netif *netif, struct pbuf *pbuf)
+{
+       struct nic * nic;
+
+       nic = (struct nic *) netif->state;
+       assert(&nic->netif == netif);
+
+       debug_print("device /dev/%s", nic->name);
+
+       if (driver_tx_enqueue(nic, pbuf) != OK)
+               return ERR_MEM;
+
+       /* if the driver is idle, start transmitting the packet */
+       if (nic->state == DRV_IDLE) {
+               if (!driver_tx(nic))
+                       return ERR_MEM;
+       }
+
+       return ERR_OK;
+}
+
+static void low_level_init(struct netif *netif)
+{
+       message m;
+       struct nic * nic = (struct nic *) netif->state;
+
+       assert(nic);
+
+       /* set MAC hardware address length */
+       netif->hwaddr_len = ETHARP_HWADDR_LEN;
+
+       /* maximum transfer unit */
+       netif->mtu = 1500;
+       nic->max_pkt_sz = ETH_MAX_PACK_SIZE;
+       nic->min_pkt_sz = ETH_MIN_PACK_SIZE;
+
+       /* device capabilities */
+       netif->flags = NETIF_FLAG_ETHARP;
+
+        m.DL_MODE = DL_NOMODE;
+        if (nic->flags & NWEO_EN_BROAD)
+                m.DL_MODE |= DL_BROAD_REQ;
+        if (nic->flags & NWEO_EN_MULTI)
+                m.DL_MODE |= DL_MULTI_REQ;
+        if (nic->flags & NWEO_EN_PROMISC)
+                m.DL_MODE |= DL_PROMISC_REQ;
+
+        m.m_type = DL_CONF;
+
+       if (asynsend(((struct nic *)netif->state)->drv_ep , &m) != OK)
+               printf("LWIP : ERROR cannot send DL_CONF to driver\n");
+}
+
+
+err_t ethernetif_init(struct netif *netif)
+{
+       /*
+        * Initialize the snmp variables and counters inside the struct netif.
+        * The last argument should be replaced with your link speed, in units
+        * of bits per second.
+       NETIF_INIT_SNMP(netif, snmp_ifType_ethernet_csmacd, LINK_SPEED_OF_YOUR_NETIF_IN_BPS);
+        */
+
+       netif->output = etharp_output;
+       netif->linkoutput = low_level_output;
+
+       /* initialize the hardware */
+       low_level_init(netif);
+
+       return ERR_OK;
+}
diff --git a/servers/lwip/inet_config.c b/servers/lwip/inet_config.c
new file mode 100644 (file)
index 0000000..475be3c
--- /dev/null
@@ -0,0 +1,271 @@
+/*
+inet/inet_config.c
+
+Created:       Nov 11, 1992 by Philip Homburg
+
+Modified:      Apr 07, 2001 by Kees J. Bot
+               Read the configuration file and fill in the xx_conf[] arrays.
+
+Copyright 1995 Philip Homburg
+*/
+
+#define _MINIX_SOURCE 1
+#define _POSIX_SOURCE 1
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <minix/type.h>
+#include <minix/sysutil.h>
+#include <minix/syslib.h>
+#include "inet_config.h"
+
+#include "proto.h"
+#include "socket.h"
+
+
+struct eth_conf eth_conf[IP_PORT_MAX];
+struct psip_conf psip_conf[IP_PORT_MAX];
+struct ip_conf ip_conf[IP_PORT_MAX];
+struct tcp_conf tcp_conf[IP_PORT_MAX];
+struct udp_conf udp_conf[IP_PORT_MAX];
+dev_t ip_dev;
+
+int eth_conf_nr;
+int psip_conf_nr;
+int ip_conf_nr;
+int tcp_conf_nr;
+int udp_conf_nr;
+
+int ip_forward_directed_bcast= 0;      /* Default is off */
+
+static int ifdefault= -1;              /* Default network interface. */
+
+static void fatal(char *label)
+{
+       printf("init: %s: %s\n", label, strerror(errno));
+       exit(1);
+}
+
+static void check_mknod(char *device, mode_t mode, int minor)
+/* Check if a device exists with the proper device number. */
+{
+       dev_t dev;
+
+       dev= (ip_dev & 0xFF00) | minor;
+
+       unlink(device);
+       if (mknod(device, S_IFCHR | mode, dev) < 0) fatal(device);
+       printf("mknod %s c %d %d\n", device, (ip_dev >> 8), minor);
+}
+
+static int cfg_fd;
+static char word[16];
+static unsigned char line[256], *lineptr;
+static unsigned linenr;
+
+static __dead void error(void)
+{
+       printf("inet: error on line %u\n", linenr);
+       exit(1);
+}
+
+static int nextline(void)
+{
+       /* Read a line from the configuration file, to be used by subsequent
+        * token() calls. Skip empty lines, and lines where the first character
+        * after leading "whitespace" is '#'. The last line of the file need
+        * not be terminated by a newline. Return 1 if a line was read in
+        * successfully, and 0 on EOF or error.
+        */
+       unsigned char *lp, c;
+       int r, skip;
+
+       lineptr = lp = line;
+       linenr++;
+       skip = -1;
+
+       while ((r = read(cfg_fd, &c, 1)) == 1) {
+               if (c == '\n') {
+                       if (skip == 0)
+                               break;
+
+                       linenr++;
+                       skip = -1;
+                       continue;
+               }
+
+               if (skip == -1 && c > ' ')
+                       skip = (c == '#');
+
+               if (skip == 0 && lp < (unsigned char *) line + sizeof(line)-1)
+                       *lp++ = c;
+       }
+
+       *lp = 0;
+       return (r == 1 || lp != line);
+}
+
+static void token(int need)
+{
+       /* Read a word from the configuration line.  Return a null string on
+        * EOL.  Return a punctuation as a one character word.  If 'need' is
+        * true then an actual word is expected at this point, so err out if
+        * not.
+        */
+       unsigned char *wp;
+       static unsigned char c= '\n';
+
+       wp= (unsigned char *) word;
+       *wp = 0;
+
+       while (c <= ' ') {
+               if (*lineptr == 0) {
+                       if (need) error();
+                       return;
+               }
+               c = *lineptr++;
+       }
+
+       do {
+               if (wp < (unsigned char *) word + sizeof(word)-1) *wp++ = c;
+               c = (*lineptr != 0) ? *lineptr++ : ' ';
+               if (word[0] == ';' || word[0] == '{' || word[0] == '}') {
+                       if (need) error();
+                       break;
+               }
+       } while (c > ' ' && c != ';' && c != '{' && c != '}');
+       *wp = 0;
+}
+
+void inet_read_conf(void)
+{
+       int ifno, enable;
+       struct stat st;
+
+       { static int first= 1; 
+               if (!first)
+                       panic(( "LWIP : read_conf: called a second time" ));
+               first= 0;
+#if 0
+               *(u8_t *)0 = 0xcc;      /* INT 3 */
+#endif
+       }
+
+
+       /* Open the configuration file. */
+       if ((cfg_fd= open(PATH_INET_CONF, O_RDONLY)) == -1)
+               fatal(PATH_INET_CONF);
+
+       while (nextline()) {
+               token(1);
+               char drv_name[128];
+               unsigned instance;
+
+               if (strncmp(word, "eth", 3) == 0) {
+
+                       ifno = strtol(word+3, NULL, 10);
+                       token(1);
+#if 1
+                       strncpy(drv_name, word, 128);
+#else
+                       sprintf(drv_name, "%s_debug", word);
+#endif
+                       token(1);
+                       instance = strtol(word, NULL, 10);
+               } else {
+                       printf("inet: Unknown device '%s'\n", word);
+                       error();
+               }
+
+               enable= 7;      /* 1 = IP, 2 = TCP, 4 = UDP */
+
+               token(0);
+               if (word[0] == '{') {
+                       token(0);
+                       while (word[0] != '}') {
+                               if (strcmp(word, "default") == 0) {
+                                       if (ifdefault != -1) {
+                                               printf(
+                               "inet: ip%d and ip%d can't both be default\n",
+                                                       ifdefault, ifno);
+                                               error();
+                                       }
+                                       ifdefault= ifno;
+                                       token(0);
+                               } else
+                               if (strcmp(word, "no") == 0) {
+                                       token(1);
+                                       if (strcmp(word, "ip") == 0) {
+                                               enable= 0;
+                                       } else
+                                       if (strcmp(word, "tcp") == 0) {
+                                               enable &= ~2;
+                                       } else
+                                       if (strcmp(word, "udp") == 0) {
+                                               enable &= ~4;
+                                       } else {
+                                               printf(
+                                               "inet: Can't do 'no %s'\n",
+                                                       word);
+                                               exit(1);
+                                       }
+                                       token(0);
+                               } else {
+                                       printf("inet: Unknown option '%s'\n",
+                                               word);
+                                       exit(1);
+                               }
+                               if (word[0] == ';') token(0);
+                               else
+                               if (word[0] != '}') error();
+                       }
+                       token(0);
+               }
+               if (word[0] != ';' && word[0] != 0) error();
+
+               nic_assign_driver("eth", ifno, drv_name, instance, ifdefault == ifno);
+       }
+
+       if (ifdefault == -1) {
+               printf("inet: No networks or no default network defined\n");
+               exit(1);
+       }
+
+       /* Set umask 0 so we can creat mode 666 devices. */
+       (void) umask(0);
+
+       /* See what the device number of /dev/ip is.  That's what we
+        * used last time for the network devices, so we keep doing so.
+        */
+       if (stat("/dev/ip", &st) < 0) fatal("/dev/ip");
+       ip_dev= st.st_rdev;
+
+       /* create protocol devices */
+       check_mknod("/dev/ip", 0600, SOCK_TYPE_IP);
+       check_mknod("/dev/tcp", 0666, SOCK_TYPE_TCP);
+       check_mknod("/dev/udp", 0666, SOCK_TYPE_UDP);
+
+       /*
+        * create hw devices, to configure ip we need also ip devices for each
+        */
+       check_mknod("/dev/ip0", 0600, SOCK_TYPES + 0);
+       check_mknod("/dev/eth0", 0600, SOCK_TYPES + 0);
+
+       check_mknod("/dev/ip1", 0600, SOCK_TYPES + 1);
+       check_mknod("/dev/eth1", 0600, SOCK_TYPES + 1);
+
+       check_mknod("/dev/ip2", 0600, SOCK_TYPES + 2);
+       check_mknod("/dev/eth2", 0600, SOCK_TYPES + 2);
+
+       check_mknod("/dev/ip3", 0600, SOCK_TYPES + 3);
+       check_mknod("/dev/eth3", 0600, SOCK_TYPES + 3);
+
+       check_mknod("/dev/ip4", 0600, SOCK_TYPES + 4);
+       check_mknod("/dev/eth4", 0600, SOCK_TYPES + 4);
+
+}
diff --git a/servers/lwip/inet_config.h b/servers/lwip/inet_config.h
new file mode 100644 (file)
index 0000000..743ab9a
--- /dev/null
@@ -0,0 +1,92 @@
+/*
+inet/inet_config.h
+
+Created:       Nov 11, 1992 by Philip Homburg
+
+Defines values for configurable parameters. The structure definitions for
+configuration information are also here.
+
+Copyright 1995 Philip Homburg
+*/
+
+#ifndef INET__INET_CONFIG_H
+#define INET__INET_CONFIG_H
+
+/* Inet configuration file. */
+#define PATH_INET_CONF "/etc/inet.conf"
+
+#define IP_PORT_MAX    32      /* Up to this many network devices */
+extern int eth_conf_nr;                /* Number of ethernets */
+extern int psip_conf_nr;       /* Number of Pseudo IP networks */
+extern int ip_conf_nr;         /* Number of configured IP layers */
+extern int tcp_conf_nr;                /* Number of configured TCP layers */
+extern int udp_conf_nr;                /* Number of configured UDP layers */
+
+extern dev_t ip_dev;           /* Device number of /dev/ip */
+
+struct eth_conf
+{
+       char *ec_label;         /* Process label name if nonnull */
+       u8_t ec_port;           /* Ethernet port for VLAN if label == NULL */
+       u8_t ec_ifno;           /* Interface number of /dev/eth* */
+       u16_t ec_vlan;          /* VLAN number of this net if label == NULL */
+};
+#define eth_is_vlan(ecp)       ((ecp)->ec_label == NULL)
+
+struct psip_conf
+{
+       u8_t pc_ifno;           /* Interface number of /dev/psip* */
+};
+
+struct ip_conf
+{
+       u8_t ic_devtype;        /* Underlying device type: Ethernet / PSIP */
+       u8_t ic_port;           /* Port of underlying device */
+       u8_t ic_ifno;           /* Interface number of /dev/ip*, tcp*, udp* */
+};
+
+struct tcp_conf
+{
+       u8_t tc_port;           /* IP port number */
+};
+
+struct udp_conf
+{
+       u8_t uc_port;           /* IP port number */
+};
+
+/* Types of networks. */
+#define NETTYPE_ETH    1
+#define NETTYPE_PSIP   2
+
+/* To compute the minor device number for a device on an interface. */
+#define if2minor(ifno, dev)    (1 + (ifno) * 8 + (dev))
+
+#define IPSTAT_DEV     "/dev/ipstat"
+#define IPSTAT_MODE    0666    /* Is this right? What about just setuid apps */
+#define IPSTAT_MINOR   0       /* Minor number of /dev/ipstat */
+
+/* Offsets of the minor device numbers within a group per interface. */
+#define ETH_DEV_OFF    0
+#define PSIP_DEV_OFF   0
+#define IP_DEV_OFF     1
+#define TCP_DEV_OFF    2
+#define UDP_DEV_OFF    3
+
+extern struct eth_conf eth_conf[IP_PORT_MAX];
+extern struct psip_conf psip_conf[IP_PORT_MAX];
+extern struct ip_conf ip_conf[IP_PORT_MAX];
+extern struct tcp_conf tcp_conf[IP_PORT_MAX];
+extern struct udp_conf udp_conf[IP_PORT_MAX];
+void read_conf(void);
+extern char *sbrk(int);
+void *alloc(size_t size);
+
+/* Options */
+extern int ip_forward_directed_bcast;
+
+#endif /* INET__INET_CONFIG_H */
+
+/*
+ * $PchId: inet_config.h,v 1.10 2003/08/21 09:24:33 philip Exp $
+ */
diff --git a/servers/lwip/lwip.c b/servers/lwip/lwip.c
new file mode 100644 (file)
index 0000000..dbe381b
--- /dev/null
@@ -0,0 +1,264 @@
+#include <unistd.h>
+#include <timers.h>
+#include <sys/svrctl.h>
+#include <minix/ds.h>
+#include <minix/endpoint.h>
+#include <errno.h>
+#include <minix/sef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <minix/syslib.h>
+#include <minix/sysutil.h>
+#include <minix/timers.h>
+
+#include "proto.h"
+#include "socket.h"
+
+#include <lwip/mem.h>
+#include <lwip/pbuf.h>
+#include <lwip/stats.h>
+#include <lwip/netif.h>
+#include <netif/etharp.h>
+#include <lwip/tcp_impl.h>
+
+endpoint_t lwip_ep;
+
+static timer_t tcp_ftmr, tcp_stmr, arp_tmr;
+static int arp_ticks, tcp_fticks, tcp_sticks;
+
+static struct netif * netif_lo;
+
+static void driver_announce()
+{
+       /* Announce we are up after a fresh start or restart. */
+       int err;
+       char key[DS_MAX_KEYLEN];
+       char label[DS_MAX_KEYLEN];
+       char *driver_prefix = "drv.vfs.";
+
+       /* Callers are allowed to use sendrec to communicate with drivers.
+        * For this reason, there may blocked callers when a driver restarts.
+        * Ask the kernel to unblock them (if any).
+        */
+       if ((err = sys_statectl(SYS_STATE_CLEAR_IPC_REFS)) != OK)
+               panic("LWIP : sys_statectl failed: %d\n", err);
+
+       /* Publish a driver up event. */
+       if ((err = ds_retrieve_label_name(label, getprocnr())) != OK)
+               panic("LWIP : unable to get own label: %d\n", err);
+       snprintf(key, DS_MAX_KEYLEN, "%s%s", driver_prefix, label);
+       if ((err = ds_publish_u32(key, DS_DRIVER_UP, DSF_OVERWRITE)))
+               panic("LWIP : unable to publish driver up event: %d\n", err);
+}
+
+void sys_init(void)
+{
+}
+
+static void arp_watchdog(__unused timer_t *tp)
+{
+       etharp_tmr();
+       set_timer(&arp_tmr, arp_ticks, arp_watchdog, 0);
+}
+
+static void tcp_fwatchdog(__unused timer_t *tp)
+{
+       tcp_fasttmr();
+       set_timer(&tcp_ftmr, tcp_fticks, tcp_fwatchdog, 0);
+}
+
+static void tcp_swatchdog(__unused timer_t *tp)
+{
+       tcp_slowtmr();
+       set_timer(&tcp_ftmr, tcp_sticks, tcp_swatchdog, 0);
+}
+
+static int sef_cb_init_fresh(__unused int type, __unused sef_init_info_t *info)
+{
+       int err;
+       unsigned hz;
+
+       char my_name[16];
+       int my_priv;
+
+       err = sys_whoami(&lwip_ep, my_name, sizeof(my_name), &my_priv);
+       if (err != OK)
+               panic("Cannot get own endpoint");
+
+       nic_init_all();
+       inet_read_conf();
+
+       /* init lwip library */
+       stats_init();
+       sys_init();
+       mem_init();
+       memp_init();
+       pbuf_init();
+
+       hz = sys_hz();
+
+       arp_ticks = ARP_TMR_INTERVAL / (1000 / hz);
+       tcp_fticks = TCP_FAST_INTERVAL / (1000 / hz);
+       tcp_sticks = TCP_SLOW_INTERVAL / (1000 / hz);
+
+       etharp_init();
+       
+       set_timer(&arp_tmr, arp_ticks, arp_watchdog, 0);
+       set_timer(&tcp_ftmr, tcp_fticks, tcp_fwatchdog, 0);
+       set_timer(&tcp_stmr, tcp_sticks, tcp_swatchdog, 0);
+       
+       netif_init();
+       netif_lo = netif_find("lo0");
+
+       /* Read configuration. */
+#if 0
+       nw_conf();
+
+       /* Get a random number */
+       timerand= 1;
+       fd = open(RANDOM_DEV_NAME, O_RDONLY | O_NONBLOCK);
+       if (fd != -1)
+       {
+               err= read(fd, randbits, sizeof(randbits));
+               if (err == sizeof(randbits))
+                       timerand= 0;
+               else
+               {
+                       printf("inet: unable to read random data from %s: %s\n",
+                               RANDOM_DEV_NAME, err == -1 ? strerror(errno) :
+                               err == 0 ? "EOF" : "not enough data");
+               }
+               close(fd);
+       }
+       else
+       {
+               printf("inet: unable to open random device %s: %s\n",
+                               RANDOM_DEV_NAME, strerror(errno));
+       }
+       if (timerand)
+       {
+               printf("inet: using current time for random-number seed\n");
+               err= gettimeofday(&tv, NULL);
+               if (err == -1)
+               {
+                       printf("sysutime failed: %s\n", strerror(errno));
+                       exit(1);
+               }
+               memcpy(randbits, &tv, sizeof(tv));
+       }
+       init_rand256(randbits);
+#endif
+
+       /* Subscribe to driver events for network drivers. */
+       if ((err = ds_subscribe("drv\\.net\\..*",
+                                       DSF_INITIAL | DSF_OVERWRITE)) != OK)
+               panic(("inet: can't subscribe to driver events"));
+
+       /* Announce we are up. INET announces its presence to VFS just like
+        * any other driver.
+        */
+       driver_announce();
+
+       return(OK);
+}
+
+static void sef_local_startup()
+{
+       /* Register init callbacks. */
+       sef_setcb_init_fresh(sef_cb_init_fresh);
+       sef_setcb_init_restart(sef_cb_init_fresh);
+
+       /* No live update support for now. */
+
+       /* Let SEF perform startup. */
+       sef_startup();
+}
+
+static void ds_event(void)
+{
+       char key[DS_MAX_KEYLEN];
+       char *driver_prefix = "drv.net.";
+       char *label;
+       u32_t value;
+       int type;
+       endpoint_t owner_endpoint;
+       int r;
+
+       /* We may get one notification for multiple updates from DS. Get events
+        * and owners from DS, until DS tells us that there are no more.
+        */
+       while ((r = ds_check(key, &type, &owner_endpoint)) == OK) {
+               r = ds_retrieve_u32(key, &value);
+               if(r != OK) {
+                       printf("LWIP : ds_event: ds_retrieve_u32 failed\n");
+                       return;
+               }
+
+               /* Only check for network driver up events. */
+               if(strncmp(key, driver_prefix, sizeof(driver_prefix))
+                               || value != DS_DRIVER_UP)
+                       return;
+
+               /* The driver label comes after the prefix. */
+               label = key + strlen(driver_prefix);
+
+               /* A driver is (re)started. */
+               driver_up(label, owner_endpoint);
+       }
+
+       if(r != ENOENT)
+               printf("LWIP : ds_event: ds_check failed: %d\n", r);
+}
+
+static void netif_poll_lo(void)
+{
+       if (netif_lo == NULL)
+               return;
+
+       while (netif_lo->loop_first)
+               netif_poll(netif_lo);
+}
+
+int main(__unused int argc, __unused char ** argv)
+{
+       sef_local_startup();
+
+       for(;;) {
+               int err, ipc_status;
+               message m;
+
+               netif_poll_lo();
+
+               mq_process();
+
+               if ((err = sef_receive_status(ANY, &m, &ipc_status)) != OK) {
+                       printf("LWIP : sef_receive_status errr %d\n", err);
+                       continue;
+               }
+
+               if (m.m_source == VFS_PROC_NR)
+                       socket_request(&m);
+               else if (is_ipc_notify(ipc_status)) {
+                       switch (m.m_source) {
+                       case CLOCK:
+                               expire_timers(m.NOTIFY_TIMESTAMP);
+                               break;
+                       case DS_PROC_NR:
+                               ds_event();
+                               break;
+                       case PM_PROC_NR:
+                               panic("LWIP : unhandled event from PM");
+                               break;
+                       default:
+                               printf("LWIP : unexpected notify from %d\n",
+                                                               m.m_source);
+                               continue;
+                       }
+               } else
+                       /* all other request can be from drivers only */
+                       driver_request(&m);
+       }
+
+       return 0;
+}
diff --git a/servers/lwip/proto.h b/servers/lwip/proto.h
new file mode 100644 (file)
index 0000000..e0fe88d
--- /dev/null
@@ -0,0 +1,58 @@
+#ifndef __LWIP_PROTO_H__
+#define __LWIP_PROTO_H__
+
+#include <errno.h>
+#include <minix/ipc.h>
+#include <minix/endpoint.h>
+#include <minix/syslib.h>
+#include <minix/safecopies.h>
+#include <minix/const.h>
+
+#include <lwip/err.h>
+#include <lwip/netif.h>
+
+#if 0
+#define debug_print(str, ...) printf("LWIP %s:%d : " str "\n", \
+               __func__, __LINE__, ##__VA_ARGS__)
+#else
+#define debug_print(...)
+#endif
+
+/* driver .c */
+void nic_assign_driver(const char * dev_type,
+                       unsigned dev_num,
+                       const char * driver_name,
+                       unsigned instance,
+                       int is_default);
+void nic_init_all(void);
+void driver_request(message * m);
+void driver_up(const char * label, endpoint_t ep);
+/* opens a raw NIC socket */
+void nic_open(message *m);
+void nic_default_ioctl(message *m);
+
+/* inet_config.c */
+void inet_read_conf(void);
+
+/* eth.c */
+err_t ethernetif_init(struct netif *netif);
+
+static inline int copy_from_user(endpoint_t proc,
+                               void * dst_ptr,
+                               size_t size,
+                               cp_grant_id_t gid,
+                               vir_bytes offset)
+{
+       return sys_safecopyfrom(proc, gid, offset, (vir_bytes)dst_ptr, size, D);
+}
+
+static inline int copy_to_user(endpoint_t proc,
+                               void * src_ptr,
+                               size_t size,
+                               cp_grant_id_t gid,
+                               vir_bytes offset)
+{
+       return sys_safecopyto(proc, gid, offset, (vir_bytes)src_ptr, size, D);
+}
+
+#endif /* __LWIP_PROTO_H__ */
diff --git a/servers/lwip/raw_ip.c b/servers/lwip/raw_ip.c
new file mode 100644 (file)
index 0000000..16c7c7f
--- /dev/null
@@ -0,0 +1,368 @@
+#include <stdlib.h>
+
+#include <net/ioctl.h>
+#include <net/gen/in.h>
+#include <net/gen/ip_io.h>
+
+#include <lwip/raw.h>
+#include <lwip/ip_addr.h>
+
+#include "socket.h"
+#include "proto.h"
+
+#define RAW_IP_BUF_SIZE        (32 << 10)
+
+#define sock_alloc_buf(s)      debug_malloc(s)
+#define sock_free_buf(x)       debug_free(x)
+
+struct raw_ip_recv_data {
+       ip_addr_t       ip;
+       struct pbuf *   pbuf;
+};
+
+#define raw_ip_recv_alloc()    debug_malloc(sizeof(struct raw_ip_recv_data))
+
+static void raw_ip_recv_free(void * data)
+{
+       if (((struct raw_ip_recv_data *)data)->pbuf)
+               pbuf_free(((struct raw_ip_recv_data *)data)->pbuf);
+       debug_free(data);
+}
+
+
+static int raw_ip_op_open(struct socket * sock, __unused message * m)
+{
+       debug_print("socket num %ld", get_sock_num(sock));
+
+       if (!(sock->buf = sock_alloc_buf(RAW_IP_BUF_SIZE))) {
+               return ENOMEM;
+       }
+       sock->buf_size = RAW_IP_BUF_SIZE;
+
+       return OK;
+}
+
+static void raw_ip_close(struct socket * sock)
+{
+       /* deque and free all enqueued data before closing */
+       sock_dequeue_data_all(sock, raw_ip_recv_free);
+
+       if (sock->pcb)
+               raw_remove(sock->pcb);
+       if (sock->buf)
+               sock_free_buf(sock->buf);
+
+       /* mark it as unused */
+       sock->ops = NULL;
+}
+
+static void raw_ip_op_close(struct socket * sock, __unused message * m)
+{
+       debug_print("socket num %ld", get_sock_num(sock));
+
+       raw_ip_close(sock);
+
+       sock_reply(sock, OK);
+}
+
+static int raw_ip_do_receive(message * m,
+                       struct pbuf *pbuf)
+{
+       struct pbuf * p;
+       unsigned rem_len = m->COUNT;
+       unsigned written = 0, hdr_sz = 0;
+       int err;
+
+       debug_print("user buffer size : %d\n", rem_len);
+
+       for (p = pbuf; p && rem_len; p = p->next) {
+               size_t cp_len;
+
+               cp_len = (rem_len < p->len) ? rem_len : p->len;
+               err = copy_to_user(m->IO_ENDPT, p->payload, cp_len,
+                               (cp_grant_id_t) m->IO_GRANT,
+                               hdr_sz + written);
+
+               if (err != OK)
+                       return err;
+
+               written += cp_len;
+               rem_len -= cp_len;
+       }
+
+       debug_print("copied %d bytes\n", written + hdr_sz);
+       return written + hdr_sz;
+}
+
+static u8_t raw_ip_op_receive(void *arg,
+                       __unused struct raw_pcb *pcb,
+                       struct pbuf *pbuf,
+                       ip_addr_t *addr)
+{
+       struct socket * sock = (struct socket *) arg;
+       struct raw_ip_recv_data * data;
+       int ret;
+
+       debug_print("socket num : %ld addr : %x\n",
+                       get_sock_num(sock), (unsigned int) addr->addr);
+
+       if (sock->flags & SOCK_FLG_OP_PENDING) {
+               /* we are resuming a suspended operation */
+               ret = raw_ip_do_receive(&sock->mess, pbuf);
+
+               if (ret > 0) {
+                       sock_revive(sock, ret);
+                       sock->flags &= ~SOCK_FLG_OP_PENDING;
+                       if (sock->usr_flags & NWIO_EXCL) {
+                               pbuf_free(pbuf);
+                               return 1;
+                       } else
+                               return 0;
+               } else {
+                       sock_revive(sock, ret);
+                       sock->flags &= ~SOCK_FLG_OP_PENDING;
+               }
+       }
+
+       /* Do not enqueue more data than allowed */
+       if (sock->recv_data_size > RAW_IP_BUF_SIZE)
+               return 0;
+
+       /*
+        * nobody is waiting for the data or an error occured above, we enqueue
+        * the packet
+        */
+       if (!(data = raw_ip_recv_alloc())) {
+               return 0;
+       }
+
+       data->ip = *addr;
+       if (sock->usr_flags & NWIO_EXCL) {
+               data->pbuf = pbuf;
+               ret = 1;
+       } else {
+               /* we store a copy of this packet */
+               data->pbuf = pbuf_alloc(PBUF_RAW, pbuf->tot_len, PBUF_RAM);
+               if (data->pbuf == NULL) {
+                       debug_print("LWIP : cannot allocated new pbuf\n");
+                       raw_ip_recv_free(data);
+                       return 0;
+               }
+
+               if (pbuf_copy(data->pbuf, pbuf) != ERR_OK) {
+                       debug_print("LWIP : cannot copy pbuf\n");
+                       raw_ip_recv_free(data);
+                       return 0;
+               }
+
+               ret = 0;
+       }
+
+       /*
+        * If we didn't managed to enqueue the packet we report it as not
+        * consumed
+        */
+       if (sock_enqueue_data(sock, data, data->pbuf->tot_len) != OK) {
+               raw_ip_recv_free(data);
+               ret = 0;
+       }
+
+       return ret;
+}
+
+static void raw_ip_op_read(struct socket * sock, message * m)
+{
+       debug_print("socket num %ld", get_sock_num(sock));
+
+       if (sock->pcb == NULL) {
+               sock_reply(sock, EIO);
+               return;
+       }
+
+       if (sock->recv_head) {
+               /* data available receive immeditely */
+
+               struct raw_ip_recv_data * data;
+               int ret;
+
+               data = (struct raw_ip_recv_data *) sock->recv_head->data;
+
+               ret = raw_ip_do_receive(m,      data->pbuf);
+
+               if (ret > 0) {
+                       sock_dequeue_data(sock);
+                       sock->recv_data_size -= data->pbuf->tot_len;
+                       raw_ip_recv_free(data);
+               }
+               sock_reply(sock, ret);
+       } else {
+               /* store the message so we know how to reply */
+               sock->mess = *m;
+               /* operation is being processes */
+               sock->flags |= SOCK_FLG_OP_PENDING;
+
+               debug_print("no data to read, suspending");
+               sock_reply(sock, SUSPEND);
+       }
+}
+
+static void raw_ip_op_write(struct socket * sock, message * m)
+{
+       int ret;
+       struct pbuf * pbuf;
+       struct ip_hdr * ip_hdr;
+
+       debug_print("socket num %ld data size %d",
+                       get_sock_num(sock), m->COUNT);
+
+       if (sock->pcb == NULL) {
+               ret = EIO;
+               goto write_err;
+       }
+
+       if ((size_t) m->COUNT > sock->buf_size) {
+               ret = ENOMEM;
+               goto write_err;
+       }
+
+       pbuf = pbuf_alloc(PBUF_LINK, m->COUNT, PBUF_RAM);
+       if (!pbuf) {
+               ret = ENOMEM;
+               goto write_err;
+       }
+
+       if ((ret = copy_from_user(m->IO_ENDPT, pbuf->payload, m->COUNT,
+                               (cp_grant_id_t) m->IO_GRANT, 0)) != OK) {
+               pbuf_free(pbuf);
+               goto write_err;
+       }
+
+       ip_hdr = (struct ip_hdr *) pbuf->payload;
+       if (pbuf_header(pbuf, -IP_HLEN)) {
+               pbuf_free(pbuf);
+               ret = EIO;
+               goto write_err;
+       }
+
+       if ((ret = raw_sendto((struct raw_pcb *)sock->pcb, pbuf,
+                               (ip_addr_t *) &ip_hdr->dest)) != OK) {
+               debug_print("raw_sendto failed %d", ret);
+               ret = EIO;
+       } else
+               ret = m->COUNT;
+       
+
+       pbuf_free(pbuf);
+       
+write_err:
+       sock_reply(sock, ret);
+}
+
+static void raw_ip_set_opt(struct socket * sock, message * m)
+{
+       int err;
+       nwio_ipopt_t ipopt;
+       struct raw_pcb * pcb;
+
+       err = copy_from_user(m->IO_ENDPT, &ipopt, sizeof(ipopt),
+                               (cp_grant_id_t) m->IO_GRANT, 0);
+
+       if (err != OK)
+               sock_reply(sock, err);
+
+       debug_print("ipopt.nwio_flags = 0x%lx", ipopt.nwio_flags);
+       debug_print("ipopt.nwio_proto = 0x%x", ipopt.nwio_proto);
+       debug_print("ipopt.nwio_rem = 0x%x",
+                               (unsigned int) ipopt.nwio_rem);
+
+       if (sock->pcb == NULL) {
+               if (!(pcb = raw_new(ipopt.nwio_proto))) {
+                       raw_ip_close(sock);
+                       sock_reply(sock, ENOMEM);
+                       return;
+               }
+
+               sock->pcb = pcb;
+       } else
+               pcb = (struct raw_pcb *) sock->pcb;
+
+       if (pcb->protocol != ipopt.nwio_proto) {
+               debug_print("conflicting ip socket protocols\n");
+               sock_reply(sock, EBADIOCTL);
+       }
+
+       sock->usr_flags = ipopt.nwio_flags;
+
+#if 0
+       if (raw_bind(pcb, (ip_addr_t *)&ipopt.nwio_rem) == ERR_USE) {
+               raw_ip_close(sock);
+               sock_reply(sock, EADDRINUSE);
+               return;
+       }
+#endif
+
+       /* register a receive hook */
+       raw_recv((struct raw_pcb *) sock->pcb, raw_ip_op_receive, sock);
+
+       sock_reply(sock, OK);
+}
+
+static void raw_ip_get_opt(struct socket * sock, message * m)
+{
+       int err;
+       nwio_ipopt_t ipopt;
+       struct raw_pcb * pcb = (struct raw_pcb *) sock->pcb;
+
+       assert(pcb);
+
+       ipopt.nwio_rem = pcb->remote_ip.addr;
+       ipopt.nwio_flags = sock->usr_flags;
+
+       if ((unsigned) m->COUNT < sizeof(ipopt)) {
+               sock_reply(sock, EINVAL);
+               return;
+       }
+
+       err = copy_to_user(m->IO_ENDPT, &ipopt, sizeof(ipopt),
+                               (cp_grant_id_t) m->IO_GRANT, 0);
+
+       if (err != OK)
+               sock_reply(sock, err);
+
+       sock_reply(sock, OK);
+}
+
+static void raw_ip_op_ioctl(struct socket * sock, message * m)
+{
+       debug_print("socket num %ld req %c %d %d",
+                       get_sock_num(sock),
+                       (m->REQUEST >> 8) & 0xff,
+                       m->REQUEST & 0xff,
+                       (m->REQUEST >> 16) & _IOCPARM_MASK);
+       
+       switch (m->REQUEST) {
+       case NWIOSIPOPT:
+               raw_ip_set_opt(sock, m);
+               break;
+       case NWIOGIPOPT:
+               raw_ip_get_opt(sock, m);
+               break;
+       default:
+               /*
+                * /dev/ip can be also accessed as a default device to be
+                * configured
+                */
+               nic_default_ioctl(m);
+               return;
+       }
+}
+
+struct sock_ops sock_raw_ip_ops = {
+       .open           = raw_ip_op_open,
+       .close          = raw_ip_op_close,
+       .read           = raw_ip_op_read,
+       .write          = raw_ip_op_write,
+       .ioctl          = raw_ip_op_ioctl,
+       .select         = generic_op_select,
+       .select_reply   = generic_op_select_reply
+};
diff --git a/servers/lwip/socket.c b/servers/lwip/socket.c
new file mode 100644 (file)
index 0000000..de67399
--- /dev/null
@@ -0,0 +1,644 @@
+/*
+ * This file implements handling of socket-related requests from VFS
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+
+#include <minix/ipc.h>
+#include <minix/com.h>
+#include <minix/callnr.h>
+#include <minix/sysutil.h>
+
+#include <lwip/tcp.h>
+
+#include <net/ioctl.h>
+
+#include "inet_config.h"
+#include "proto.h"
+#include "socket.h"
+
+#if 0
+#define debug_sock_print(str, ...) printf("LWIP %s:%d : " str "\n", \
+               __func__, __LINE__, ##__VA_ARGS__)
+#else
+#define debug_sock_print(...) debug_print(__VA_ARGS__)
+#endif
+
+
+struct socket socket[MAX_SOCKETS];
+static int notified;
+
+#define recv_q_alloc() debug_malloc(sizeof(struct recv_q))
+#define recv_q_free    debug_free
+
+struct mq {
+       message         m;
+       struct mq *     prev;
+       struct mq *     next;
+};
+
+#define mq_alloc()     debug_malloc(sizeof(struct mq))
+#define mq_free                debug_free
+
+static struct mq * mq_head, *mq_tail;
+
+static int mq_enqueue(message * m)
+{
+       struct mq * mq;
+
+       debug_sock_print("sock %d op %d", m->DEVICE, m->m_type);
+       mq = mq_alloc();
+
+       if (mq == NULL)
+               return -1;
+
+       mq->next = NULL;
+       mq->m = *m;
+
+       if (mq_head) {
+               mq->prev = mq_tail;
+               mq_tail->next = mq;
+               mq_tail = mq;
+       }
+       else {
+               mq->prev = NULL;
+               mq_head = mq_tail = mq;
+       }
+
+       return 0;
+}
+
+__unused static struct mq * mq_dequeue_head(void)
+{
+       struct mq * ret;
+
+       if (!mq_head)
+               return NULL;
+
+       ret = mq_head;
+
+       if (mq_head != mq_tail) {
+               mq_head = mq_head->next;
+               mq_head->prev = NULL;
+       } else
+               mq_head = mq_tail = NULL;
+       
+       debug_sock_print("socket %d\n", ret->m.DEVICE);
+
+       return ret;
+}
+
+static void mq_dequeue(struct mq * mq)
+{
+       if (mq_head == mq_tail)
+               mq_head = mq_tail = NULL;
+       else {
+               if (mq->prev == NULL) {
+                       mq_head = mq->next;
+                       mq_head->prev = NULL;
+               } else
+                       mq->prev->next = mq->next;
+               if (mq->next == NULL) {
+                       mq_tail = mq->prev;
+                       mq_tail->next = NULL;
+               } else
+                       mq->next->prev = mq->prev;
+       }
+}
+
+static int mq_cancel(message * m)
+{
+       struct mq * mq;
+
+       for (mq = mq_tail; mq; mq = mq->prev) {
+               if (m->DEVICE == mq->m.DEVICE &&
+                               m->IO_ENDPT == mq->m.IO_ENDPT &&
+                               m->IO_GRANT == mq->m.IO_GRANT) {
+                       debug_sock_print("socket %d\n", mq->m.DEVICE);
+                       break;
+               }
+       }
+
+       mq_dequeue(mq);
+       mq_free(mq);
+
+       return 1;
+}
+
+int sock_enqueue_data(struct socket * sock, void * data, unsigned size)
+{
+       struct recv_q * r;
+
+       if (!(r = recv_q_alloc()))
+               return ENOMEM;
+
+       r->data = data;
+       r->next = NULL;
+
+       if (sock->recv_head) {
+               sock->recv_tail->next = r;
+               sock->recv_tail = r;
+       } else {
+               sock->recv_head = sock->recv_tail = r;
+       }
+
+       assert(size > 0);
+       sock->recv_data_size += size;
+
+       return OK;
+}
+
+void * sock_dequeue_data(struct socket * sock)
+{
+       void * data;
+       struct recv_q * r;
+
+       if ((r = sock->recv_head)) {
+               data = r->data;
+               if (!(sock->recv_head = r->next))
+                       sock->recv_tail = NULL;
+               recv_q_free(r);
+
+               return data;
+       }
+
+       return NULL;
+}
+
+void sock_dequeue_data_all(struct socket * sock,
+                               recv_data_free_fn data_free)
+{
+       void * data;
+
+       while ((data = sock_dequeue_data(sock)))
+               data_free(data);
+       sock->recv_data_size = 0;
+}
+
+static void set_reply_msg(message * m, int status)
+{
+       int proc, ref;
+
+       proc= m->IO_ENDPT;
+       ref= (int)m->IO_GRANT;
+
+       m->REP_ENDPT= proc;
+       m->REP_STATUS= status;
+       m->REP_IO_GRANT= ref;
+}
+
+void send_reply(message * m, int status)
+{
+       int result;
+
+       debug_sock_print("status %d", status);
+       set_reply_msg(m, status);
+       
+       m->m_type = TASK_REPLY;
+       result = send(m->m_source, m);
+       if (result != OK)
+               panic("LWIP : unable to send (err %d)", result);
+}
+
+void sock_revive(struct socket * sock, int status)
+{
+       int result;
+
+       assert(!(sock->flags & SOCK_FLG_OP_REVIVING));
+       assert(sock->flags & (SOCK_FLG_OP_PENDING | SOCK_FLG_OP_SUSPENDED));
+
+       if (notified) {
+               debug_sock_print("already notified");
+               return;
+       }
+       else {
+               assert(sock->mess.m_type != DEV_REVIVE);
+               notified = 1;
+       }
+       
+       debug_sock_print("socket num %ld, status %d",
+                       get_sock_num(sock), status);
+
+       sock->mess.m_type = DEV_REVIVE;
+       set_reply_msg(&sock->mess, status);
+       
+       result = notify(sock->mess.m_source);
+       if (result != OK)
+               panic("LWIP : unable to notify (err %d)", result);
+
+       sock->flags |= SOCK_FLG_OP_REVIVING;
+}
+
+void sock_select_notify(struct socket * sock)
+{
+       int result;
+
+       debug_sock_print("socket num %ld", get_sock_num(sock));
+       assert(sock->select_ep != NONE);
+
+       sock->flags |= SOCK_FLG_SEL_CHECK;
+       if (notified) {
+               debug_sock_print("already notified");
+               return;
+       }
+       else
+               notified = 1;
+       
+       result = notify(sock->select_ep);
+       if (result != OK)
+               panic("LWIP : unable to notify (err %d)", result);
+}
+
+void sock_reply(struct socket * sock, int status)
+{
+       debug_sock_print("socket num %ld status %d type %d",
+                       get_sock_num(sock), status, sock->mess.m_type);
+       /*
+        * If the status is SUSPEND send the
+        * message only if this operation wasn't
+        * suspended already, e.g. by enqueing the
+        * message when the socket was busy
+        * because of another pending message
+        *
+        * If there is a pending operation or we a reprocessing a suspended
+        * operation, revive.
+        *
+        * Otherwise send a message straightaway
+        */
+       if (status == SUSPEND) {
+               if (sock->flags & SOCK_FLG_OP_SUSPENDED) {
+                       debug_sock_print("suspended before");
+                       sock->flags &= ~SOCK_FLG_OP_SUSPENDED;
+                       return;
+               }
+               message m = sock->mess;
+               debug_sock_print("SUSPEND");
+               send_reply(&m, status);
+       } else if (sock->flags & (SOCK_FLG_OP_PENDING | SOCK_FLG_OP_SUSPENDED)) {
+               sock_revive(sock, status);
+               /*
+                * From now on, we process suspended calls as any other. The
+                * status is set and will be collected
+                */
+               sock->flags &= ~SOCK_FLG_OP_SUSPENDED;
+       } else
+               send_reply(&sock->mess, status);
+}
+
+struct socket * get_unused_sock(void)
+{
+       int i;
+
+       for (i = SOCK_TYPES + MAX_DEVS; i < MAX_SOCKETS; i++) {
+               if (socket[i].ops == NULL) {
+                       /* clear it all */
+                       memset(&socket[i], 0, sizeof(struct socket));
+                       return &socket[i];
+               }
+       }
+
+       return NULL;
+}
+
+struct socket * get_nic_sock(unsigned dev)
+{
+       if (dev < MAX_DEVS)
+               return &socket[dev + SOCK_TYPES];
+       else
+               return NULL;
+}
+
+static void socket_open(message * m)
+{
+       struct sock_ops * ops;
+       struct socket * sock;
+       int ret = OK;
+
+       switch (m->DEVICE) {
+       case SOCK_TYPE_TCP:
+               ops = &sock_tcp_ops;
+               break;
+       case SOCK_TYPE_UDP:
+               ops = &sock_udp_ops;
+               break;
+       case SOCK_TYPE_IP:
+               ops = &sock_raw_ip_ops;
+               break;
+       default:
+               if (m->DEVICE - SOCK_TYPES  < MAX_DEVS) {
+                       m->DEVICE -= SOCK_TYPES;
+                       nic_open(m);
+                       return;
+               }
+               printf("LWIP unknown socket type %d\n", m->DEVICE);
+               send_reply(m, EINVAL);
+               return;
+       }
+       
+       sock = get_unused_sock();
+       if (!sock) {
+               printf("LWIP : no free socket\n");
+               send_reply(m, EAGAIN);
+               return;
+       }
+
+       sock->ops = ops;
+       sock->select_ep = NONE;
+       sock->recv_data_size = 0;
+
+       if (sock->ops && sock->ops->open)
+               ret = sock->ops->open(sock, m);
+       
+       if (ret == OK) {
+               debug_sock_print("new socket %ld", get_sock_num(sock));
+               send_reply(m, get_sock_num(sock));
+       } else {
+               debug_sock_print("failed %d", ret);
+               send_reply(m, ret);
+       }
+}
+
+static void do_status(message * m)
+{
+       int i;
+
+       debug_sock_print("called");
+
+       notified = 0;
+
+       for (i = 0; i < MAX_SOCKETS; i++) {
+               struct socket * sock = &socket[i];
+               if (!sock->ops) {
+                       continue;
+               }
+               if (sock->flags & (SOCK_FLG_OP_REVIVING)) {
+                       /*
+                        * We send the reply and we are done with this request
+                        */
+                       debug_sock_print("status %d ep %d sent sock %ld type %d", 
+                                       sock->mess.REP_STATUS,
+                                       sock->mess.REP_ENDPT,
+                                       get_sock_num(sock),
+                                       sock->mess.m_type);
+                       send(m->m_source, &sock->mess);
+                       /*
+                        * Remove only the reviving flag, i.e. the status has
+                        * been consumed. SOCK_FLG_OP_PENDING may stay set. For
+                        * instance in case of a TCP write, the application is
+                        * already notified while the process of sending is
+                        * still going on
+                        */
+                       sock->flags &= ~SOCK_FLG_OP_REVIVING;
+                       return;
+               }
+
+               /*
+                * We check select AFTER possible reviving an operation,
+                * otherwise the select will fail as the socket is still
+                * blocking
+                */
+               if (sock_select_check_set(sock)) {
+                       if (sock->ops && sock->ops->select_reply) {
+                               message msg;
+                               msg.m_type = DEV_IO_READY;
+                               msg.DEV_MINOR = get_sock_num(sock);
+                               msg.DEV_SEL_OPS = 0;
+                               sock->ops->select_reply(sock, &msg);
+                               if (msg.DEV_SEL_OPS) {
+                                       int result;
+
+                                       debug_sock_print("socket num %d select "
+                                               "result 0x%x sent",
+                                               msg.DEV_MINOR,
+                                               msg.DEV_SEL_OPS);
+                                       result = send(sock->select_ep, &msg);
+                                       if (result != OK)
+                                               panic("LWIP : unable to send "
+                                                       "(err %d)", result);
+                                       sock_clear_select(sock);
+                                       sock->select_ep = NONE;
+                                       return;
+                               }
+                       }
+               }
+       }
+
+       debug_sock_print("no status");
+       m->m_type = DEV_NO_STATUS;
+       send(m->m_source, m);
+}
+
+static void socket_request_socket(struct socket * sock, message * m)
+{
+       switch (m->m_type) {
+       case DEV_READ_S:
+               if (sock && sock->ops && sock->ops->read)
+                       sock->ops->read(sock, m);
+               else
+                       send_reply(m, EINVAL);
+               return;
+       case DEV_WRITE_S:
+               if (sock && sock->ops && sock->ops->write)
+                       sock->ops->write(sock, m);
+               else
+                       send_reply(m, EINVAL);
+               return;
+       case DEV_IOCTL_S:
+               if (sock && sock->ops && sock->ops->ioctl)
+                       sock->ops->ioctl(sock, m);
+               else
+                       send_reply(m, EINVAL);
+               return;
+       default:
+               panic("LWIP : cannot happen!");
+       }
+}
+
+void socket_request(message * m)
+{
+       struct socket * sock;
+
+       switch (m->m_type) {
+       case DEV_OPEN:
+               socket_open(m);
+               return;
+       case DEV_CLOSE:
+               sock = get_sock(m->DEVICE);
+               if (sock && sock->ops && sock->ops->close) {
+                       sock->flags &= ~SOCK_FLG_OP_PENDING;
+                       sock->mess = *m;
+                       sock->ops->close(sock, m);
+               } else
+                       send_reply(m, EINVAL);
+               return;
+       case DEV_READ_S:
+       case DEV_WRITE_S:
+       case DEV_IOCTL_S:
+               sock = get_sock(m->DEVICE);
+               if (!sock) {
+                       send_reply(m, EINVAL);
+                       return;
+               }
+               /*
+                * If an operation is pending (blocking operation) or writing is
+                * still going and we want to read, suspend the new operation
+                */
+               if ((sock->flags & (SOCK_FLG_OP_PENDING | SOCK_FLG_OP_REVIVING)) |
+                               (m->m_type == DEV_READ_S &&
+                                sock->flags & SOCK_FLG_OP_WRITING)) {
+                       char * o = "\0";
+                       if (sock->flags & SOCK_FLG_OP_READING)
+                               o = "READ";
+                       else if (sock->flags & SOCK_FLG_OP_WRITING)
+                               o = "WRITE";
+                       else
+                               o = "non R/W op";
+                       debug_sock_print("socket %ld is busy by %s\n",
+                                       get_sock_num(sock), o);
+                       if (mq_enqueue(m) == 0) {
+                               send_reply(m, SUSPEND);
+                       } else {
+                               debug_sock_print("Enqueuing suspended "
+                                                       "call failed");
+                               send_reply(m, ENOMEM);
+                       }
+                       return;
+               }
+               sock->mess = *m;
+               socket_request_socket(sock, m);
+               return;
+       case CANCEL:
+               sock = get_sock(m->DEVICE);
+               debug_sock_print("socket num %ld", get_sock_num(sock));
+               /* Cancel the last operation in the queue */
+               if (mq_cancel(m)) {
+                       send_reply(m, EINTR);
+                       return;
+               /* ... or a blocked read */
+               } else if (sock->flags & SOCK_FLG_OP_PENDING &&
+                               sock->flags & SOCK_FLG_OP_READING) {
+                       sock->flags &= ~SOCK_FLG_OP_PENDING;
+                       send_reply(m, EINTR);
+                       return;
+               /*
+                * .. or return the status of the operation which was finished
+                * before canceled
+                */
+               } else if (sock->flags & SOCK_FLG_OP_REVIVING) {
+                       sock->flags &= ~SOCK_FLG_OP_REVIVING;
+                       send_reply(m, sock->mess.REP_STATUS);
+               } else
+                       panic("LWIP : no operation to cancel");
+
+               return;
+       case DEV_SELECT:
+               /* 
+                * Select is always executed immediately and is never suspended.
+                * Although, it sets actions which must be monitored
+                */
+               sock = get_sock(m->DEVICE);
+               assert(sock->select_ep == NONE || sock->select_ep == m->m_source);
+               
+               if (sock && sock->ops && sock->ops->select) {
+                       sock->ops->select(sock, m);
+                       if (sock_select_set(sock))
+                               sock->select_ep = m->m_source;
+               } else
+                       send_reply(m, EINVAL);
+               return;
+       case DEV_STATUS:
+               do_status(m);
+               return;
+       default:
+               printf("LWIP : unknown message from VFS, type %d\n",
+                                                       m->m_type);
+       }
+       send_reply(m, EGENERIC);
+}
+
+void mq_process(void)
+{
+       struct mq * mq;
+       struct socket * sock;
+
+       mq = mq_head;
+
+       while(mq) {
+               struct mq * next = mq->next;
+       
+               sock = get_sock(mq->m.DEVICE);
+               if (!(sock->flags &
+                               (SOCK_FLG_OP_PENDING | SOCK_FLG_OP_REVIVING)) &&
+                               !(mq->m.m_type == DEV_READ_S &&
+                                sock->flags & SOCK_FLG_OP_WRITING)) {
+                       sock->flags = SOCK_FLG_OP_SUSPENDED;
+                       debug_sock_print("resuming op on sock %ld\n",
+                                       get_sock_num(sock));
+                       sock->mess = mq->m;
+                       socket_request_socket(sock, &sock->mess);
+                       mq_dequeue(mq);
+                       mq_free(mq);
+                       return;
+               }
+
+               mq = next;
+       }
+}
+
+void generic_op_select(struct socket * sock, message * m)
+{
+       int retsel = 0, sel;
+
+       debug_print("socket num %ld 0x%x", get_sock_num(sock), m->IO_ENDPT);
+
+       sel = m->IO_ENDPT;
+
+       /* in this case any operation would block, no error */
+       if (sock->flags & SOCK_FLG_OP_PENDING) {
+               if (sel & SEL_NOTIFY) {
+                       if (sel & SEL_RD)
+                               sock->flags |= SOCK_FLG_SEL_READ;
+                       if (sel & SEL_WR)
+                               sock->flags |= SOCK_FLG_SEL_WRITE;
+                       /* FIXME we do not monitor error */
+               }
+               send_reply(m, 0);
+               return;
+       }
+
+       if (sel & SEL_RD) {
+               if (sock->recv_head)
+                       retsel |= SEL_RD;
+               else if (sel & SEL_NOTIFY)
+                       sock->flags |= SOCK_FLG_SEL_READ;
+       }
+       /* FIXME generic packet socket never blocks on write */
+       if (sel & SEL_WR)
+               retsel |= SEL_WR;
+       /* FIXME SEL_ERR is ignored, we do not generate exceptions */
+
+       send_reply(m, retsel);
+}
+
+void generic_op_select_reply(struct socket * sock, __unused message * m)
+{
+       assert(sock->select_ep != NONE);
+       debug_print("socket num %ld", get_sock_num(sock));
+
+       /* unused for generic packet socket, see generic_op_select() */
+       assert((sock->flags & (SOCK_FLG_SEL_WRITE | SOCK_FLG_SEL_ERROR)) == 0);
+
+       if (sock->flags & (SOCK_FLG_OP_PENDING | SOCK_FLG_OP_REVIVING)) {
+               debug_print("WARNING socket still blocking!");
+               return;
+       }
+
+       if (sock->flags & SOCK_FLG_SEL_READ && sock->recv_head)
+               m->DEV_SEL_OPS |= SEL_RD;
+       
+       if (m->DEV_SEL_OPS) 
+               sock->flags &= ~(SOCK_FLG_SEL_WRITE | SOCK_FLG_SEL_READ |
+                                                       SOCK_FLG_SEL_ERROR);
+}
diff --git a/servers/lwip/socket.h b/servers/lwip/socket.h
new file mode 100644 (file)
index 0000000..bddcd65
--- /dev/null
@@ -0,0 +1,135 @@
+#ifndef __LWIP_SERVER_SOCKET_H__
+#define __LWIP_SERVER_SOCKET_H__
+
+#include <minix/ipc.h>
+#include <minix/endpoint.h>
+
+#include "inet_config.h"
+#include "proto.h"
+
+#define SOCK_TYPE_IP   0
+#define SOCK_TYPE_TCP  1
+#define SOCK_TYPE_UDP  2
+#define SOCK_TYPES     3
+
+struct socket;
+
+typedef void (* sock_op_t)(struct socket *, message *);
+typedef int (* sock_op_open_t)(struct socket *, message *);
+
+struct sock_ops {
+       sock_op_open_t  open;
+       sock_op_t       close;
+       sock_op_t       read;
+       sock_op_t       write;
+       sock_op_t       ioctl;
+       sock_op_t       select;
+       sock_op_t       select_reply;
+};
+
+struct recv_q {
+       struct recv_q * next;
+       void *          data;
+};
+
+#define SOCK_FLG_OP_PENDING    0x1
+#define SOCK_FLG_OP_REVIVING   0x2
+#define SOCK_FLG_OP_SUSPENDED  0x4     /* set when processing a suspended op */
+#define SOCK_FLG_OP_LISTENING  0x100   /* tcp socket is in a listening mode */
+#define        SOCK_FLG_OP_CONNECTING  0x200   /* set when waiting for a connect */
+#define SOCK_FLG_OP_READING    0x400   /* reading operation in progress */
+#define SOCK_FLG_OP_WRITING    0x800   /* writing operation in progress */
+#define SOCK_FLG_CLOSED                0x1000  /* tcp socket has been closed do not
+                                          expect any more data */
+/* select() flags - they say what action do we monitor */
+#define SOCK_FLG_SEL_WRITE     0x100000
+#define SOCK_FLG_SEL_READ      0x200000
+#define SOCK_FLG_SEL_ERROR     0x400000
+#define SOCK_FLG_SEL_CHECK     0x800000 /* select satisfied, go and check it */
+
+#define sock_select_set(sock)  ((sock)->flags & (SOCK_FLG_SEL_WRITE |  \
+                               SOCK_FLG_SEL_READ | SOCK_FLG_SEL_ERROR))
+#define sock_select_read_set(sock)     ((sock)->flags & SOCK_FLG_SEL_READ)
+#define sock_select_write_set(sock)    ((sock)->flags & SOCK_FLG_SEL_WRITE)
+#define sock_select_rw_set(sock)       ((sock)->flags & (SOCK_FLG_SEL_READ | \
+                                                       SOCK_FLG_SEL_WRITE))
+#define sock_select_error_set(sock)    ((sock)->flags & SOCK_FLG_SEL_ERROR)
+#define sock_select_check_set(sock)    ((sock)->flags & SOCK_FLG_SEL_CHECK)
+#define sock_clear_select(sock)        do {                                    \
+       (sock)->flags &= ~(SOCK_FLG_SEL_READ | SOCK_FLG_SEL_WRITE |     \
+                       SOCK_FLG_SEL_ERROR | SOCK_FLG_SEL_CHECK);       \
+} while (0)
+
+struct socket {
+       int                     type;
+       u32_t                   flags;
+       unsigned long           usr_flags;
+       void *                  pcb;
+       struct sock_ops *       ops;
+       void *                  buf;
+       size_t                  buf_size;
+       message                 mess; /* store the message which initiated the
+                                        last operation on this socket in case
+                                        we have to suspend the operation */
+       endpoint_t              select_ep;
+       struct recv_q *         recv_head;
+       struct recv_q *         recv_tail;
+       unsigned                recv_data_size; /* sum of data enqueued */
+       void *                  data;
+};
+
+extern struct sock_ops sock_udp_ops;
+extern struct sock_ops sock_tcp_ops;
+extern struct sock_ops sock_raw_ip_ops;
+
+#define get_sock_num(x) ((long int) ((x) - socket))
+#define is_valid_sock_num(x) (x < MAX_SOCKETS)
+#define get_sock(x) &socket[x]
+
+#define MAX_SOCKETS 255 /* FIXME as log as the sockets are identified by the
+                          minor device number 255 is ok */
+#define MAX_DEVS 5
+#define RESERVED (SOCK_TYPES + MAX_DEVS) /* rounded to 8 */
+
+extern struct socket socket[MAX_SOCKETS];
+
+void socket_request(message * m);
+void mq_process(void);
+
+
+struct socket * get_unused_sock(void);
+struct socket * get_nic_sock(unsigned dev);
+
+void send_reply(message * m, int status);
+void sock_reply(struct socket * sock, int status);
+void sock_revive(struct socket * sock, int status);
+
+typedef void (* recv_data_free_fn)(void *);
+
+int sock_enqueue_data(struct socket * sock, void * data, unsigned size);
+void * sock_dequeue_data(struct socket * sock);
+void sock_dequeue_data_all(struct socket * sock,
+                               recv_data_free_fn data_free);
+
+void sock_select_notify(struct socket * sock);
+
+static inline void * debug_malloc(size_t s)
+{
+       void * ret;
+
+       ret = malloc(s);
+       // printf("allocated %p size %d\n", ret, s);
+       return ret;
+}
+
+#define debug_free(x) do {                                                     \
+       if (0)                                                                  \
+               printf("free called from %s:%d %s freeing %p\n", __FILE__,      \
+                                               __LINE__, __func__, (x));       \
+       free(x);                                                                \
+} while(0)
+
+void generic_op_select(struct socket * sock, message * m);
+void generic_op_select_reply(struct socket * sock, message * m);
+
+#endif /* __LWIP_SERVER_SOCKET_H__ */
diff --git a/servers/lwip/tcp.c b/servers/lwip/tcp.c
new file mode 100644 (file)
index 0000000..5cc7a8b
--- /dev/null
@@ -0,0 +1,1205 @@
+#include <stdlib.h>
+#include <assert.h>
+#include <minix/sysutil.h>
+
+#include <net/ioctl.h>
+#include <net/gen/in.h>
+#include <net/gen/tcp.h>
+#include <net/gen/tcp_io.h>
+
+#include <lwip/tcp.h>
+#include <lwip/tcp_impl.h>
+#include <lwip/ip_addr.h>
+
+#include "socket.h"
+#include "proto.h"
+
+#define TCP_BUF_SIZE   (32 << 10)
+
+#define sock_alloc_buf(s)      debug_malloc(s)
+#define sock_free_buf(x)       debug_free(x)
+
+static int do_tcp_debug;
+
+#if 0
+#define debug_tcp_print(str, ...) printf("LWIP %s:%d : " str "\n", \
+               __func__, __LINE__, ##__VA_ARGS__)
+#else
+#define debug_tcp_print(...) debug_print(__VA_ARGS__)
+#endif
+
+struct wbuf {
+       unsigned        len;
+       unsigned        written;
+       unsigned        unacked;
+       unsigned        rem_len;
+       struct wbuf     * next;
+       char            data[];
+};
+
+struct wbuf_chain {
+       struct wbuf * head;
+       struct wbuf * tail;
+       struct wbuf * unsent; /* points to the first buffer that contains unsent
+                                data. It may point anywhere between head and
+                                tail */
+};
+
+static void tcp_error_callback(void *arg, err_t err)
+{
+       int perr;
+       struct socket * sock = (struct socket *) arg;
+
+       debug_tcp_print("socket num %ld err %d", get_sock_num(sock), err);
+
+       switch (err) {
+       case ERR_RST:
+               perr = ECONNREFUSED;
+               break;
+       case ERR_CLSD:
+               perr = EPIPE;
+               break;
+       case ERR_CONN:
+               perr = ENOTCONN;
+               break;
+       default:
+               perr = EIO;
+       }
+       
+       if (sock->flags & SOCK_FLG_OP_PENDING) {
+               sock_revive(sock, perr);
+               sock->flags &= ~SOCK_FLG_OP_PENDING;
+       } else if (sock_select_set(sock))
+               sock_select_notify(sock);
+       /*
+        * When error callback is called the tcb either does not exist anymore
+        * or is going to be deallocated soon after. We must not use the pcb
+        * anymore
+        */
+       sock->pcb = NULL;
+}
+
+static int tcp_fill_new_socket(struct socket * sock, struct tcp_pcb * pcb)
+{
+       struct wbuf_chain * wc;
+
+       if (!(wc = malloc(sizeof(struct wbuf_chain))))
+               return ENOMEM;
+       
+       wc-> head = wc->tail = wc->unsent = NULL;
+       sock->buf = wc;
+       sock->buf_size = 0;
+       
+       sock->pcb = pcb;
+       tcp_arg(pcb, sock);
+       tcp_err(pcb, tcp_error_callback);
+       tcp_nagle_disable(pcb);
+
+       return OK;
+}
+
+static int tcp_op_open(struct socket * sock, __unused message * m)
+{
+       struct tcp_pcb * pcb;
+       int ret;
+
+       debug_tcp_print("socket num %ld", get_sock_num(sock));
+
+       if (!(pcb = tcp_new()))
+               return ENOMEM;
+       debug_tcp_print("new tcp pcb %p\n", pcb);
+       
+       if ((ret = tcp_fill_new_socket(sock, pcb) != OK))
+               tcp_abandon(pcb, 0);
+       
+       return ret;
+}
+
+static void tcp_recv_free(__unused void * data)
+{
+       pbuf_free((struct pbuf *) data);
+}
+
+static void tcp_backlog_free(void * data)
+{
+       tcp_abort((struct tcp_pcb *) data);
+}
+
+static void free_wbuf_chain(struct wbuf_chain * wc)
+{
+       struct wbuf * wb;
+
+       assert(wc != NULL);
+
+       wb = wc->head;
+       while (wb) {
+               struct wbuf * w = wb;
+               debug_tcp_print("freeing wbuf %p", wb);
+               wb = wb->next;
+               debug_free(w);
+       }
+
+       debug_free(wc);
+}
+
+static void tcp_op_close(struct socket * sock, __unused message * m)
+{
+       debug_tcp_print("socket num %ld", get_sock_num(sock));
+
+       if (sock->flags & SOCK_FLG_OP_LISTENING)
+               sock_dequeue_data_all(sock, tcp_backlog_free);
+       else
+               sock_dequeue_data_all(sock, tcp_recv_free);
+       debug_tcp_print("dequed RX data");
+
+       if (sock->pcb) {
+               int err;
+
+               /* we are not able to handle any callback anymore */
+               tcp_arg((struct tcp_pcb *)sock->pcb, NULL);
+               tcp_err((struct tcp_pcb *)sock->pcb, NULL);
+               tcp_sent((struct tcp_pcb *)sock->pcb, NULL);
+               tcp_recv((struct tcp_pcb *)sock->pcb, NULL);
+
+               err = tcp_close(sock->pcb);
+               assert(err == ERR_OK);
+               sock->pcb = NULL;
+       }
+       debug_tcp_print("freed pcb");
+
+       if (sock->buf) {
+               free_wbuf_chain((struct wbuf_chain *) sock->buf);
+               sock->buf = NULL;
+       }
+       debug_tcp_print("freed TX data");
+
+       sock_reply(sock, OK);
+       debug_tcp_print("socket unused");
+       
+       /* mark it as unused */
+       sock->ops = NULL;
+}
+
+__unused static void print_tcp_payload(unsigned char * buf, int len)
+{
+       int i;
+
+       printf("LWIP tcp payload (%d) :\n", len);
+       for (i = 0; i < len; i++, buf++) {
+               printf("%02x ", buf[0]);
+               if (i % 8 == 7)
+                       kputc('\n');
+       }
+       kputc('\n');
+}
+
+static int read_from_tcp(struct socket * sock, message * m)
+{
+       unsigned rem_buf, written = 0;
+       struct pbuf * p;
+
+       assert(!(sock->flags & SOCK_FLG_OP_LISTENING) && sock->recv_head);
+
+       rem_buf = m->COUNT;
+
+       debug_tcp_print("socket num %ld recv buff sz %d", get_sock_num(sock), rem_buf);
+
+       p = (struct pbuf *)sock->recv_head->data;
+       while (rem_buf) {
+               int err;
+
+               if (rem_buf >= p->len) {
+                       struct pbuf * np;
+
+                       /*
+                        * FIXME perhaps copy this to a local buffer and do a
+                        * single copy to user then
+                        */
+#if 0
+                       print_tcp_payload(p->payload, p->len);
+#endif
+                       err = copy_to_user(m->IO_ENDPT, p->payload, p->len,
+                                       (cp_grant_id_t) m->IO_GRANT, written);
+                       if (err != OK)
+                               goto cp_error;
+                       sock->recv_data_size -= p->len;
+
+                       debug_tcp_print("whole pbuf copied (%d bytes)", p->len);
+                       rem_buf -= p->len;
+                       written += p->len;
+
+                       if ((np = p->next)) {
+                               pbuf_ref(np);
+                               if (pbuf_free(p) != 1)
+                                       panic("LWIP : pbuf_free != 1");
+                               /*
+                                * Mark where we are going to continue if an
+                                * error occurs
+                                */
+                               sock->recv_head->data = np;
+                               p = np;
+                       } else {
+                               sock_dequeue_data(sock);
+                               pbuf_free(p);
+                               if (sock->recv_head)
+                                       p = (struct pbuf *)sock->recv_head->data;
+                               else
+                                       break;
+                       }
+
+                       if (rem_buf == 0)
+                               break;
+               } else {
+                       /*
+                        * It must be PBUF_RAM for us to be able to shift the
+                        * payload pointer
+                        */
+                       assert(p->type == PBUF_RAM);
+                       
+#if 0
+                       print_tcp_payload(p->payload, rem_buf);
+#endif
+                       err = copy_to_user(m->IO_ENDPT, p->payload, rem_buf,
+                                       (cp_grant_id_t) m->IO_GRANT, written);
+                       if (err != OK)
+                               goto cp_error;
+                       sock->recv_data_size -= rem_buf;
+
+                       debug_tcp_print("partial pbuf copied (%d bytes)", rem_buf);
+                       /*
+                        * The whole pbuf hasn't been copied out, we only shift
+                        * the payload pointer to remember where to continue
+                        * next time
+                        */
+                       pbuf_header(p, -rem_buf);
+                       written += rem_buf;
+                       break;
+               }
+       }
+
+       debug_tcp_print("%d bytes written to userspace", written);
+       //printf("%d wr, queue %d\n", written, sock->recv_data_size);
+       tcp_recved((struct tcp_pcb *) sock->pcb, written);
+       return written;
+
+cp_error:
+       if (written) {
+               debug_tcp_print("%d bytes written to userspace", written);
+               return written;
+       } else
+               return EFAULT;
+}
+
+static void tcp_op_read(struct socket * sock, message * m)
+{
+       debug_tcp_print("socket num %ld", get_sock_num(sock));
+
+       if (!sock->pcb || ((struct tcp_pcb *) sock->pcb)->state !=
+                                                       ESTABLISHED) {
+               debug_tcp_print("Connection not established\n");
+               sock_reply(sock, ENOTCONN);
+               return;
+       }
+       if (sock->recv_head) {
+               /* data available receive immeditely */
+               int ret = read_from_tcp(sock,  m);
+               debug_tcp_print("read op finished");
+               sock_reply(sock, ret);
+       } else {
+               if (sock->flags & SOCK_FLG_CLOSED) {
+                       printf("socket %ld already closed!!! call from %d\n",
+                                       get_sock_num(sock), m->IO_ENDPT);
+                       do_tcp_debug = 1;
+                       sock_reply(sock, 0);
+                       return;
+               }
+               /* operation is being processed */
+               debug_tcp_print("no data to read, suspending");
+               sock_reply(sock, SUSPEND);
+               sock->flags |= SOCK_FLG_OP_PENDING | SOCK_FLG_OP_READING;
+       }
+}
+
+static struct wbuf * wbuf_add(struct socket * sock, unsigned sz)
+{
+       struct wbuf * wbuf;
+       struct wbuf_chain * wc = (struct wbuf_chain *)sock->buf;
+
+       assert(wc);
+
+       wbuf = debug_malloc(sizeof(struct wbuf) + sz);
+       if (!wbuf)
+               return NULL;
+       
+       wbuf->len = sz;
+       wbuf->written = wbuf->unacked = 0;
+       wbuf->next = NULL;
+
+       if (wc->head == NULL)
+               wc->head = wc->tail = wbuf;
+       else {
+               wc->tail->next = wbuf;
+               wc->tail = wbuf;
+       }
+
+       sock->buf_size += sz;
+       debug_tcp_print("buffer %p size %d\n", wbuf, sock->buf_size);
+
+       return wbuf;
+}
+
+static struct wbuf * wbuf_ack_sent(struct socket * sock, unsigned sz)
+{
+       struct wbuf_chain * wc = (struct wbuf_chain *) sock->buf;
+       struct wbuf ** wb;
+
+       wb = &wc->head;
+       while (sz && *wb) {
+               if ((*wb)->unacked <= sz) {
+                       struct wbuf * w;
+                       assert((*wb)->rem_len == 0);
+                       w = *wb;
+                       *wb = w->next;
+                       sock->buf_size -= w->len;
+                       sz -= w->unacked;
+                       debug_tcp_print("whole buffer acked (%d / %d), removed",
+                                       w->unacked, w->len);
+                       debug_free(w);
+               } else {
+                       (*wb)->unacked -= sz;
+                       (*wb)->written += sz;
+                       debug_tcp_print("acked %d / %d bytes", sz, (*wb)->len);
+                       sz = 0;
+               }
+       }
+
+       /* did we write out more than we had? */
+       assert(sz == 0);
+
+       if (wc->head == NULL)
+               wc->tail = NULL;
+       debug_tcp_print("buffer size %d\n", sock->buf_size);
+
+       return wc->head;
+}
+
+static void tcp_op_write(struct socket * sock, message * m)
+{
+       int ret;
+       struct wbuf * wbuf;
+       unsigned snd_buf_len, usr_buf_len;
+       u8_t flgs = 0;
+
+
+       if (!sock->pcb) {
+               sock_reply(sock, ENOTCONN);
+               return;
+       }
+
+       usr_buf_len = m->COUNT;
+       debug_tcp_print("socket num %ld data size %d",
+                       get_sock_num(sock), usr_buf_len);
+
+       /*
+        * Let at most one buffer grow beyond TCP_BUF_SIZE. This is to minimize
+        * small writes from userspace if only a few bytes were sent before
+        */
+       if (sock->buf_size >= TCP_BUF_SIZE) {
+               /* FIXME do not block for now */
+               debug_tcp_print("WARNING : tcp buffers too large, cannot allocate more");
+               sock_reply(sock, ENOMEM);
+               return;
+       }
+       /*
+        * Never let the allocated buffers grow more than to 2xTCP_BUF_SIZE and
+        * never copy more than space available
+        */
+       usr_buf_len = (usr_buf_len > TCP_BUF_SIZE ? TCP_BUF_SIZE : usr_buf_len);
+       wbuf = wbuf_add(sock, usr_buf_len);
+       debug_tcp_print("new wbuf for %d bytes", wbuf->len);
+       
+       if (!wbuf) {
+               debug_tcp_print("cannot allocate new buffer of %d bytes", usr_buf_len);
+               sock_reply(sock, ENOMEM);
+       }
+
+       if ((ret = copy_from_user(m->IO_ENDPT, wbuf->data, usr_buf_len,
+                               (cp_grant_id_t) m->IO_GRANT, 0)) != OK) {
+               sock_reply(sock, ret);
+               return;
+       }
+
+       wbuf->written = 0;
+       wbuf->rem_len = usr_buf_len;
+
+       /*
+        * If a writing operation is already in progress, we just enqueue the
+        * data and quit.
+        */
+       if (sock->flags & SOCK_FLG_OP_WRITING) {
+               struct wbuf_chain * wc = (struct wbuf_chain *)sock->buf;
+               /*
+                * We are adding a buffer with unsent data. If we don't have any other
+                * unsent data, set the pointer to this buffer.
+                */
+               if (wc->unsent == NULL) {
+                       wc->unsent = wbuf;
+                       debug_tcp_print("unsent %p remains %d\n", wbuf, wbuf->rem_len);
+               }
+               debug_tcp_print("returns %d\n", usr_buf_len);
+               sock_reply(sock, usr_buf_len);
+               /*
+                * We cannot accept new operations (write). We set the flag
+                * after sending reply not to revive only. We could deadlock.
+                */
+               if (sock->buf_size >= TCP_BUF_SIZE)
+                       sock->flags |= SOCK_FLG_OP_PENDING;
+
+               return;
+       }
+
+       /*
+        * Start sending data if the operation is not in progress yet. The
+        * current buffer is the nly one we have, we cannot send more.
+        */
+
+       snd_buf_len = tcp_sndbuf((struct tcp_pcb *)sock->pcb);
+       debug_tcp_print("tcp can accept %d bytes", snd_buf_len);
+
+       wbuf->unacked = (snd_buf_len < wbuf->rem_len ? snd_buf_len : wbuf->rem_len);
+       wbuf->rem_len -= wbuf->unacked;
+
+       if (wbuf->rem_len) {
+               flgs = TCP_WRITE_FLAG_MORE;
+               /*
+                * Remember that this buffer has some data which we didn't pass
+                * to tcp yet.
+                */
+               ((struct wbuf_chain *)sock->buf)->unsent = wbuf;
+               debug_tcp_print("unsent %p remains %d\n", wbuf, wbuf->rem_len);
+       }
+
+       ret = tcp_write((struct tcp_pcb *)sock->pcb, wbuf->data,
+                                               wbuf->unacked, flgs);
+       tcp_output((struct tcp_pcb *)sock->pcb);
+       debug_tcp_print("%d bytes to tcp", wbuf->unacked);
+
+       if (ret == ERR_OK) {
+               /*
+                * Operation is being processed, no need to remember the message
+                * in this case, we are going to reply immediatly
+                */
+               debug_tcp_print("returns %d\n", usr_buf_len);
+               sock_reply(sock, usr_buf_len);
+               sock->flags |= SOCK_FLG_OP_WRITING;
+               if (sock->buf_size >= TCP_BUF_SIZE)
+                       sock->flags |= SOCK_FLG_OP_PENDING;
+       } else
+               sock_reply(sock, EIO);
+}
+
+static void tcp_set_conf(struct socket * sock, message * m)
+{
+       int err;
+       nwio_tcpconf_t tconf;
+       struct tcp_pcb * pcb = (struct tcp_pcb *) sock->pcb;
+
+       debug_tcp_print("socket num %ld", get_sock_num(sock));
+
+       assert(pcb);
+
+       err = copy_from_user(m->IO_ENDPT, &tconf, sizeof(tconf),
+                               (cp_grant_id_t) m->IO_GRANT, 0);
+
+       if (err != OK)
+               sock_reply(sock, err);
+
+       debug_tcp_print("tconf.nwtc_flags = 0x%lx", tconf.nwtc_flags);
+       debug_tcp_print("tconf.nwtc_remaddr = 0x%x",
+                               (unsigned int) tconf.nwtc_remaddr);
+       debug_tcp_print("tconf.nwtc_remport = 0x%x", ntohs(tconf.nwtc_remport));
+       debug_tcp_print("tconf.nwtc_locaddr = 0x%x",
+                               (unsigned int) tconf.nwtc_locaddr);
+       debug_tcp_print("tconf.nwtc_locport = 0x%x", ntohs(tconf.nwtc_locport));
+
+       sock->usr_flags = tconf.nwtc_flags;
+
+       if (sock->usr_flags & NWTC_SET_RA)
+               pcb->remote_ip.addr = tconf.nwtc_remaddr;
+       if (sock->usr_flags & NWTC_SET_RP)
+               pcb->remote_port = ntohs(tconf.nwtc_remport);
+
+       if (sock->usr_flags & NWTC_LP_SET) {
+               /* FIXME the user library can only bind to ANY anyway */
+               if (tcp_bind(pcb, IP_ADDR_ANY, ntohs(tconf.nwtc_locport)) == ERR_USE) {
+                       sock_reply(sock, EADDRINUSE);
+                       return;
+               }
+       }
+       
+       sock_reply(sock, OK);
+}
+
+static void tcp_get_conf(struct socket * sock, message * m)
+{
+       int err;
+       nwio_tcpconf_t tconf;
+       struct tcp_pcb * pcb = (struct tcp_pcb *) sock->pcb;
+
+       debug_tcp_print("socket num %ld", get_sock_num(sock));
+
+       assert(pcb);
+
+       tconf.nwtc_locaddr = pcb->local_ip.addr;
+       tconf.nwtc_locport = htons(pcb->local_port);
+       tconf.nwtc_remaddr = pcb->remote_ip.addr;
+       tconf.nwtc_remport = htons(pcb->remote_port);
+       tconf.nwtc_flags = sock->usr_flags;
+
+       debug_tcp_print("tconf.nwtc_flags = 0x%lx", tconf.nwtc_flags);
+       debug_tcp_print("tconf.nwtc_remaddr = 0x%x",
+                               (unsigned int) tconf.nwtc_remaddr);
+       debug_tcp_print("tconf.nwtc_remport = 0x%x", ntohs(tconf.nwtc_remport));
+       debug_tcp_print("tconf.nwtc_locaddr = 0x%x",
+                               (unsigned int) tconf.nwtc_locaddr);
+       debug_tcp_print("tconf.nwtc_locport = 0x%x", ntohs(tconf.nwtc_locport));
+
+       if ((unsigned) m->COUNT < sizeof(tconf)) {
+               sock_reply(sock, EINVAL);
+               return;
+       }
+       
+       err = copy_to_user(m->IO_ENDPT, &tconf, sizeof(tconf),
+                               (cp_grant_id_t) m->IO_GRANT, 0);
+
+       if (err != OK)
+               sock_reply(sock, err);
+
+       sock_reply(sock, OK);
+}
+
+static int enqueue_rcv_data(struct socket * sock, struct pbuf * pbuf)
+{
+       /* Do not enqueue more data than allowed */
+       if (0 && sock->recv_data_size > 4 * TCP_BUF_SIZE)
+               return ERR_MEM;
+
+       if (sock_enqueue_data(sock, pbuf, pbuf->tot_len) != OK) {
+               debug_tcp_print("data enqueueing failed");
+               return ERR_MEM;
+       }
+       debug_tcp_print("enqueued %d bytes", pbuf->tot_len);
+       //printf("enqueued %d bytes, queue %d\n", pbuf->tot_len, sock->recv_data_size);
+
+       return ERR_OK;
+}
+
+static err_t tcp_recv_callback(void *arg,
+                               struct tcp_pcb *tpcb,
+                               struct pbuf *pbuf,
+                               err_t err)
+{
+       int ret, enqueued = 0;
+       struct socket * sock = (struct socket *) arg;
+
+       debug_tcp_print("socket num %ld", get_sock_num(sock));
+
+       if (sock->pcb == NULL) {
+               if (sock_select_set(sock))
+                       sock_select_notify(sock);
+               return ERR_OK;
+       }
+
+       assert((struct tcp_pcb *) sock->pcb == tpcb);
+
+       if (err != ERR_OK)
+               return ERR_OK;
+       if (!pbuf) {
+               debug_tcp_print("tcp stream closed on the remote side");
+               // sock->flags |= SOCK_FLG_CLOSED;
+
+               /* wake up the reader and report EOF */
+               if (sock->flags & SOCK_FLG_OP_PENDING &&
+                               sock->flags & SOCK_FLG_OP_READING &&
+                               !(sock->flags & SOCK_FLG_OP_REVIVING)) {
+                       sock_revive(sock, 0);
+                       sock->flags &= ~(SOCK_FLG_OP_PENDING |
+                                       SOCK_FLG_OP_READING);
+               }
+#if 0
+               /* if there are any undelivered data, drop them */
+               sock_dequeue_data_all(sock, tcp_recv_free);
+               tcp_abandon(tpcb, 0);
+               sock->pcb = NULL;
+#endif
+
+               return ERR_OK;
+       }
+
+       /*
+        * FIXME we always enqueue the data first. If the head is empty and read
+        * operation is pending we could try to deliver immeditaly without
+        * enqueueing
+        */
+       if (enqueue_rcv_data(sock, pbuf) == ERR_OK)
+               enqueued = 1;
+
+       /*
+        * Deliver data if there is a pending read operation, otherwise notify
+        * select if the socket is being monitored
+        */
+       if (sock->flags & SOCK_FLG_OP_PENDING) {
+               if (sock->flags & SOCK_FLG_OP_READING) {
+                       ret = read_from_tcp(sock, &sock->mess);
+                       debug_tcp_print("read op finished");
+                       sock_revive(sock, ret);
+                       sock->flags &= ~(SOCK_FLG_OP_PENDING |
+                                       SOCK_FLG_OP_READING);
+               }
+       } else if (!(sock->flags & SOCK_FLG_OP_WRITING) &&
+                       sock_select_rw_set(sock))
+               sock_select_notify(sock);
+
+       /* perhaps we have deliverd some data to user, try to enqueue again */
+       if (!enqueued) {
+               return enqueue_rcv_data(sock, pbuf);
+       } else
+               return ERR_OK;
+}
+
+static err_t tcp_sent_callback(void *arg, struct tcp_pcb *tpcb, u16_t len)
+{
+       struct socket * sock = (struct socket *) arg;
+       struct wbuf * wbuf;
+       struct wbuf_chain * wc = (struct wbuf_chain *) sock->buf;
+       unsigned snd_buf_len;
+       int ret;
+       
+       debug_tcp_print("socket num %ld", get_sock_num(sock));
+
+       /* an error might have had happen */
+       if (sock->pcb == NULL) {
+               if (sock_select_set(sock))
+                       sock_select_notify(sock);
+               return ERR_OK;
+       }
+
+       assert((struct tcp_pcb *)sock->pcb == tpcb);
+
+       /* operation must have been canceled, do not send any other data */
+       if (!sock->flags & SOCK_FLG_OP_PENDING)
+               return ERR_OK;
+
+       wbuf = wbuf_ack_sent(sock, len);
+
+       if (wbuf == NULL) {
+               debug_tcp_print("all data acked, nothing more to send");
+               sock->flags &= ~SOCK_FLG_OP_WRITING;
+               if (!(sock->flags & SOCK_FLG_OP_READING))
+                       sock->flags &= ~SOCK_FLG_OP_PENDING;
+               /* no reviving, we must notify. Write and read possible */
+               if (sock_select_rw_set(sock))
+                       sock_select_notify(sock);
+               return ERR_OK;
+       }
+
+       /* we have just freed some space, write will be accepted */
+       if (sock->buf_size < TCP_BUF_SIZE && sock_select_rw_set(sock)) {
+               if (!(sock->flags & SOCK_FLG_OP_READING)) {
+                       sock->flags &= ~SOCK_FLG_OP_PENDING;
+                       sock_select_notify(sock);
+               }
+       }
+
+       /*
+        * Check if there is some space for new data, there should be, we just
+        * got a confirmation that some data reached the other end of the
+        * connection
+        */
+       snd_buf_len = tcp_sndbuf(tpcb);
+       assert(snd_buf_len > 0);
+       debug_tcp_print("tcp can accept %d bytes", snd_buf_len);
+
+       if (!wc->unsent) {
+               debug_tcp_print("nothing to send");
+               return ERR_OK;
+       }
+
+       wbuf = wc->unsent;
+       while (wbuf) {
+               unsigned towrite;
+               u8_t flgs = 0;
+
+               towrite = (snd_buf_len < wbuf->rem_len ?
+                                       snd_buf_len : wbuf->rem_len);
+               wbuf->rem_len -= towrite;
+               debug_tcp_print("data to send, sending %d", towrite);
+
+               if (wbuf->rem_len || wbuf->next)
+                       flgs = TCP_WRITE_FLAG_MORE;
+               ret = tcp_write(tpcb, wbuf->data + wbuf->written + wbuf->unacked,
+                                               towrite, flgs);
+               debug_tcp_print("%d bytes to tcp", towrite);
+
+               /* tcp_output() is called once we return from this callback */
+
+               if (ret != ERR_OK) {
+                       debug_print("tcp_write() failed (%d), written %d"
+                                       , ret, wbuf->written);
+                       sock->flags &= ~(SOCK_FLG_OP_PENDING | SOCK_FLG_OP_WRITING);
+                       /* no reviving, we must notify. Write and read possible */
+                       if (sock_select_rw_set(sock))
+                               sock_select_notify(sock);
+                       return ERR_OK;
+               }
+               
+               wbuf->unacked += towrite;
+               snd_buf_len -= towrite;
+               debug_tcp_print("tcp still accepts %d bytes\n", snd_buf_len);
+
+               if (snd_buf_len) {
+                       assert(wbuf->rem_len == 0);
+                       wbuf = wbuf->next;
+                       wc->unsent = wbuf;
+                       if (wbuf)
+                               debug_tcp_print("unsent %p remains %d\n",
+                                               wbuf, wbuf->rem_len);
+                       else {
+                               debug_tcp_print("nothing to send");
+                       }
+               } else
+                       break;
+       }
+
+       return ERR_OK;
+}
+
+static err_t tcp_connected_callback(void *arg,
+                               struct tcp_pcb *tpcb,
+                               __unused err_t err)
+{
+       struct socket * sock = (struct socket *) arg;
+
+       debug_tcp_print("socket num %ld err %d", get_sock_num(sock), err);
+
+       if (sock->pcb == NULL) {
+               if (sock_select_set(sock))
+                       sock_select_notify(sock);
+               return ERR_OK;
+       }
+
+       assert((struct tcp_pcb *)sock->pcb == tpcb);
+
+       tcp_sent(tpcb, tcp_sent_callback);
+       tcp_recv(tpcb, tcp_recv_callback);
+       sock_revive(sock, OK);
+       sock->flags &= ~(SOCK_FLG_OP_PENDING | SOCK_FLG_OP_CONNECTING);
+
+       /* revive does the sock_select_notify() for us */
+
+       return ERR_OK;
+}
+
+static void tcp_op_connect(struct socket * sock)
+{
+       ip_addr_t remaddr;
+       struct tcp_pcb * pcb;
+       err_t err;
+
+       debug_tcp_print("socket num %ld", get_sock_num(sock));
+       /*
+        * Connecting is going to send some packets. Unless an immediate error
+        * occurs this operation is going to block
+        */
+       sock_reply(sock, SUSPEND);
+       sock->flags |= SOCK_FLG_OP_PENDING | SOCK_FLG_OP_CONNECTING;
+
+       /* try to connect now */
+       pcb = (struct tcp_pcb *) sock->pcb;
+       remaddr = pcb->remote_ip;
+       err = tcp_connect(pcb, &remaddr, pcb->remote_port,
+                               tcp_connected_callback);
+       if (err == ERR_VAL)
+               panic("Wrong tcp_connect arguments");
+       if (err != ERR_OK)
+               panic("Other tcp_connect error %d\n", err);
+}
+
+static int tcp_do_accept(struct socket * listen_sock,
+                       message * m,
+                       struct tcp_pcb * newpcb)
+{
+       struct socket * newsock;
+       unsigned sock_num;
+       int ret;
+
+       debug_tcp_print("socket num %ld", get_sock_num(listen_sock));
+
+       if ((ret = copy_from_user(m->IO_ENDPT, &sock_num, sizeof(sock_num),
+                               (cp_grant_id_t) m->IO_GRANT, 0)) != OK)
+               return EFAULT;
+       if (!is_valid_sock_num(sock_num))
+               return EBADF;
+
+       newsock = get_sock(sock_num);
+       assert(newsock->pcb); /* because of previous open() */
+       
+       /* we really want to forget about this socket */
+       tcp_err((struct tcp_pcb *)newsock->pcb, NULL);
+       tcp_abandon((struct tcp_pcb *)newsock->pcb, 0);
+       
+       tcp_arg(newpcb, newsock);
+       tcp_err(newpcb, tcp_error_callback);
+       tcp_sent(newpcb, tcp_sent_callback);
+       tcp_recv(newpcb, tcp_recv_callback);
+       tcp_nagle_disable(newpcb);
+       tcp_accepted(((struct tcp_pcb *)(listen_sock->pcb)));
+       newsock->pcb = newpcb;
+
+       debug_tcp_print("Accepted new connection using socket %d\n", sock_num);
+
+       return OK;
+}
+
+static err_t tcp_accept_callback(void *arg, struct tcp_pcb *newpcb, err_t err)
+{
+       struct socket * sock = (struct socket *) arg;
+
+       debug_tcp_print("socket num %ld", get_sock_num(sock));
+
+       assert(err == ERR_OK && newpcb);
+       assert(sock->flags & SOCK_FLG_OP_LISTENING);
+
+       if (sock->flags & SOCK_FLG_OP_PENDING) {
+               int ret;
+
+               ret = tcp_do_accept(sock, &sock->mess, newpcb);
+               sock_revive(sock, ret);
+               sock->flags &= ~SOCK_FLG_OP_PENDING;
+               if (ret == OK) {
+                       return ERR_OK;
+               }
+               /* in case of an error fall through */
+       }
+
+       /* If we cannot accept rightaway we enqueue the connection for later */
+
+       debug_tcp_print("Enqueue connection sock %ld pcb %p\n",
+                       get_sock_num(sock), newpcb);
+       if (sock_enqueue_data(sock, newpcb, 1) != OK) {
+               tcp_abort(newpcb);
+               return ERR_ABRT;
+       }
+       if (sock_select_read_set(sock))
+               sock_select_notify(sock);
+
+       return ERR_OK;
+}
+
+static void tcp_op_listen(struct socket * sock, message * m)
+{
+       int backlog, err;
+       struct tcp_pcb * new_pcb;
+
+       debug_tcp_print("socket num %ld", get_sock_num(sock));
+
+       err = copy_from_user(m->IO_ENDPT, &backlog, sizeof(backlog),
+                               (cp_grant_id_t) m->IO_GRANT, 0);
+
+       new_pcb = tcp_listen_with_backlog((struct tcp_pcb *) sock->pcb,
+                                                       (u8_t) backlog);
+       debug_tcp_print("listening pcb %p", new_pcb);
+
+       if (!new_pcb) {
+               debug_tcp_print("Cannot listen on socket %ld", get_sock_num(sock));
+               sock_reply(sock, EGENERIC);
+               return;
+       }
+
+       /* advertise that this socket is willing to accept connections */
+       tcp_accept(new_pcb, tcp_accept_callback);
+       sock->flags |= SOCK_FLG_OP_LISTENING;
+
+       sock->pcb = new_pcb;
+       sock_reply(sock, OK);
+}
+
+static void tcp_op_accept(struct socket * sock, message * m)
+{
+       debug_tcp_print("socket num %ld", get_sock_num(sock));
+
+       if (!(sock->flags & SOCK_FLG_OP_LISTENING)) {
+               debug_tcp_print("socket %ld does not listen\n", get_sock_num(sock));
+               sock_reply(sock, EINVAL);
+               return;
+       }
+
+       /* there is a connection ready to be accepted */
+       if (sock->recv_head) {
+               int ret;
+               struct tcp_pcb * pcb;
+               
+               pcb = (struct tcp_pcb *) sock->recv_head->data;
+               assert(pcb);
+
+               ret = tcp_do_accept(sock, m, pcb);
+               sock_reply(sock, ret);
+               if (ret == OK)
+                       sock_dequeue_data(sock);
+               return;
+       }
+
+       debug_tcp_print("no ready connection, suspending\n");
+
+       sock_reply(sock, SUSPEND);
+       sock->flags |= SOCK_FLG_OP_PENDING;
+}
+
+static void tcp_op_shutdown_tx(struct socket * sock)
+{
+       err_t err;
+
+       debug_tcp_print("socket num %ld", get_sock_num(sock));
+
+       err = tcp_shutdown((struct tcp_pcb *) sock->pcb, 0, 1);
+
+       switch (err) {
+       case ERR_OK:
+               sock_reply(sock, OK);
+               break;
+       case ERR_CONN:
+               sock_reply(sock, ENOTCONN);
+               break;
+       default:
+               sock_reply(sock, EGENERIC);
+       }
+}
+
+static void tcp_op_get_cookie(struct socket * sock, message * m)
+{
+       tcp_cookie_t cookie;
+       unsigned sock_num;
+
+       assert(sizeof(cookie) >= sizeof(sock));
+
+       sock_num = get_sock_num(sock);
+       memcpy(&cookie, &sock_num, sizeof(sock_num));
+
+       if (copy_to_user(m->IO_ENDPT, &cookie, sizeof(sock),
+                       (cp_grant_id_t) m->IO_GRANT, 0) == OK)
+               sock_reply(sock, OK);
+       else
+               sock_reply(sock, EFAULT);
+}
+
+static void tcp_get_opt(struct socket * sock, message * m)
+{
+       int err;
+       nwio_tcpopt_t tcpopt;
+       struct tcp_pcb * pcb = (struct tcp_pcb *) sock->pcb;
+
+       debug_tcp_print("socket num %ld", get_sock_num(sock));
+
+       assert(pcb);
+
+       if ((unsigned) m->COUNT < sizeof(tcpopt)) {
+               sock_reply(sock, EINVAL);
+               return;
+       }
+
+       /* FIXME : not used by the userspace library */
+       tcpopt.nwto_flags = 0;
+       
+       err = copy_to_user(m->IO_ENDPT, &tcpopt, sizeof(tcpopt),
+                               (cp_grant_id_t) m->IO_GRANT, 0);
+
+       if (err != OK)
+               sock_reply(sock, err);
+
+       sock_reply(sock, OK);
+}
+
+static void tcp_set_opt(struct socket * sock, message * m)
+{
+       int err;
+       nwio_tcpopt_t tcpopt;
+       struct tcp_pcb * pcb = (struct tcp_pcb *) sock->pcb;
+
+       debug_tcp_print("socket num %ld", get_sock_num(sock));
+
+       assert(pcb);
+
+       err = copy_from_user(m->IO_ENDPT, &tcpopt, sizeof(tcpopt),
+                               (cp_grant_id_t) m->IO_GRANT, 0);
+
+       if (err != OK)
+               sock_reply(sock, err);
+
+       /* FIXME : The userspace library does not use this */
+
+       sock_reply(sock, OK);
+}
+
+static void tcp_op_ioctl(struct socket * sock, message * m)
+{
+       if (!sock->pcb) {
+               sock_reply(sock, ENOTCONN);
+               return;
+       }
+
+       debug_tcp_print("socket num %ld req %c %d %d",
+                       get_sock_num(sock),
+                       (m->REQUEST >> 8) & 0xff,
+                       m->REQUEST & 0xff,
+                       (m->REQUEST >> 16) & _IOCPARM_MASK);
+       
+       switch (m->REQUEST) {
+       case NWIOGTCPCONF:
+               tcp_get_conf(sock, m);
+               break;
+       case NWIOSTCPCONF:
+               tcp_set_conf(sock, m);
+               break;
+       case NWIOTCPCONN:
+               tcp_op_connect(sock);
+               break;
+       case NWIOTCPLISTENQ:
+               tcp_op_listen(sock, m);
+               break;
+       case NWIOGTCPCOOKIE:
+               tcp_op_get_cookie(sock, m);
+               break;
+       case NWIOTCPACCEPTTO:
+               tcp_op_accept(sock, m);
+               break;
+       case NWIOTCPSHUTDOWN:
+               tcp_op_shutdown_tx(sock);
+               break;
+       case NWIOGTCPOPT:
+               tcp_get_opt(sock, m);
+               break;
+       case NWIOSTCPOPT:
+               tcp_set_opt(sock, m);
+               break;
+       default:
+               sock_reply(sock, EBADIOCTL);
+               return;
+       }
+}
+
+static void tcp_op_select(struct socket * sock, __unused message * m)
+{
+       int retsel = 0, sel;
+
+       sel = m->IO_ENDPT;
+       debug_tcp_print("socket num %ld 0x%x", get_sock_num(sock), sel);
+       
+       /* in this case any operation would block, no error */
+       if (sock->flags & SOCK_FLG_OP_PENDING) {
+               debug_tcp_print("SOCK_FLG_OP_PENDING");
+               if (sel & SEL_NOTIFY) {
+                       if (sel & SEL_RD) {
+                               sock->flags |= SOCK_FLG_SEL_READ;
+                               debug_tcp_print("monitor read");
+                       }
+                       if (sel & SEL_WR) {
+                               sock->flags |= SOCK_FLG_SEL_WRITE;
+                               debug_tcp_print("monitor write");
+                       }
+                       if (sel & SEL_ERR)
+                               sock->flags |= SOCK_FLG_SEL_ERROR;
+               }
+               send_reply(m, 0);
+               return;
+       }
+
+       if (sel & SEL_RD) {
+               /*
+                * If recv_head is not NULL we can either read or accept a
+                * connection which is the same for select()
+                */
+               if (sock->pcb) {
+                       if (sock->recv_head &&
+                                       !(sock->flags & SOCK_FLG_OP_WRITING))
+                               retsel |= SEL_RD;
+                       else if (!(sock->flags & SOCK_FLG_OP_LISTENING) && 
+                                       ((struct tcp_pcb *) sock->pcb)->state != ESTABLISHED)
+                               retsel |= SEL_RD;
+                       else if (sel & SEL_NOTIFY) {
+                               sock->flags |= SOCK_FLG_SEL_READ;
+                               debug_tcp_print("monitor read");
+                       }
+               } else
+                       retsel |= SEL_RD; /* not connected read does not block */
+       }
+       if (sel & SEL_WR) {
+               if (sock->pcb) {
+                       if (((struct tcp_pcb *) sock->pcb)->state == ESTABLISHED)
+                               retsel |= SEL_WR;
+                       else if (sel & SEL_NOTIFY) {
+                               sock->flags |= SOCK_FLG_SEL_WRITE;
+                               debug_tcp_print("monitor write");
+                       }
+               } else
+                       retsel |= SEL_WR; /* not connected write does not block */
+       }
+
+       if (retsel & SEL_RD) {
+               debug_tcp_print("read won't block");
+       }
+       if (retsel & SEL_WR) {
+               debug_tcp_print("write won't block");
+       }
+
+       /* we only monitor if errors will happen in the future */
+       if (sel & SEL_ERR && sel & SEL_NOTIFY)
+               sock->flags |= SOCK_FLG_SEL_ERROR;
+
+       send_reply(m, retsel);
+}
+
+static void tcp_op_select_reply(struct socket * sock, message * m)
+{
+       assert(sock->select_ep != NONE);
+       debug_tcp_print("socket num %ld", get_sock_num(sock));
+
+
+       if (sock->flags & (SOCK_FLG_OP_PENDING | SOCK_FLG_OP_REVIVING)) {
+               debug_tcp_print("WARNING socket still blocking!");
+               return;
+       }
+
+       if (sock->flags & SOCK_FLG_SEL_READ) {
+               if (sock->pcb == NULL || (sock->recv_head &&
+                               !(sock->flags & SOCK_FLG_OP_WRITING)) ||
+                        (!(sock->flags & SOCK_FLG_OP_LISTENING) && 
+                        ((struct tcp_pcb *) sock->pcb)->state !=
+                        ESTABLISHED)) {
+                       m->DEV_SEL_OPS |= SEL_RD;
+                       debug_tcp_print("read won't block");
+               }
+       }
+
+       if (sock->flags & SOCK_FLG_SEL_WRITE  &&
+                       (sock->pcb == NULL ||
+                        ((struct tcp_pcb *) sock->pcb)->state ==
+                        ESTABLISHED)) {
+               m->DEV_SEL_OPS |= SEL_WR;
+               debug_tcp_print("write won't block");
+       }
+
+       if (m->DEV_SEL_OPS) 
+               sock->flags &= ~(SOCK_FLG_SEL_WRITE | SOCK_FLG_SEL_READ |
+                                                       SOCK_FLG_SEL_ERROR);
+}
+
+struct sock_ops sock_tcp_ops = {
+       .open           = tcp_op_open,
+       .close          = tcp_op_close,
+       .read           = tcp_op_read,
+       .write          = tcp_op_write,
+       .ioctl          = tcp_op_ioctl,
+       .select         = tcp_op_select,
+       .select_reply   = tcp_op_select_reply
+};
+
diff --git a/servers/lwip/udp.c b/servers/lwip/udp.c
new file mode 100644 (file)
index 0000000..912e368
--- /dev/null
@@ -0,0 +1,418 @@
+#include <stdlib.h>
+
+#include <minix/sysutil.h>
+
+#include <net/ioctl.h>
+#include <net/gen/in.h>
+#include <net/gen/udp.h>
+#include <net/gen/udp_io.h>
+#include <net/gen/udp_io_hdr.h>
+
+#include <lwip/udp.h>
+#include <lwip/ip_addr.h>
+
+#include "socket.h"
+#include "proto.h"
+
+#define UDP_BUF_SIZE   (4 << 10)
+
+#define sock_alloc_buf(s)      debug_malloc(s)
+#define sock_free_buf(x)       debug_free(x)
+
+#if 0
+#define debug_udp_print(str, ...) printf("LWIP %s:%d : " str "\n", \
+               __func__, __LINE__, ##__VA_ARGS__)
+#else
+#define debug_udp_print(...) debug_print(__VA_ARGS__)
+#endif
+
+struct udp_recv_data {
+       ip_addr_t       ip;
+       u16_t           port;
+       struct pbuf *   pbuf;
+};
+
+#define udp_recv_alloc()       debug_malloc(sizeof(struct udp_recv_data))
+
+static void udp_recv_free(void * data)
+{
+       if (((struct udp_recv_data *)data)->pbuf)
+               pbuf_free(((struct udp_recv_data *)data)->pbuf);
+       debug_free(data);
+}
+
+static int udp_op_open(struct socket * sock, __unused message * m)
+{
+       struct udp_pcb * pcb;
+
+       debug_udp_print("socket num %ld", get_sock_num(sock));
+
+       if (!(pcb = udp_new()))
+               return ENOMEM;
+
+       sock->buf = NULL;
+       sock->buf_size = 0;
+       
+       sock->pcb = pcb;
+       
+       return OK;
+}
+
+static void udp_op_close(struct socket * sock, __unused message * m)
+{
+       debug_udp_print("socket num %ld", get_sock_num(sock));
+
+       /* deque and free all enqueued data before closing */
+       sock_dequeue_data_all(sock, udp_recv_free);
+
+       if (sock->pcb)
+               udp_remove(sock->pcb);
+       assert(sock->buf == NULL);
+
+       /* mark it as unused */
+       sock->ops = NULL;
+
+       sock_reply(sock, OK);
+}
+
+static int udp_do_receive(struct socket * sock,
+                       message * m,
+                       struct udp_pcb *pcb,
+                       struct pbuf *pbuf,
+                       ip_addr_t *addr,
+                       u16_t port)
+{
+       struct pbuf * p;
+       unsigned rem_len = m->COUNT;
+       unsigned written = 0, hdr_sz = 0;
+       int err;
+
+       debug_udp_print("user buffer size : %d", rem_len);
+
+       /* FIXME make it both a single copy */
+       if (!(sock->usr_flags & NWUO_RWDATONLY)) {
+               udp_io_hdr_t hdr;
+
+               hdr.uih_src_addr = addr->addr;
+               hdr.uih_src_port = htons(port);
+               hdr.uih_dst_addr = pcb->local_ip.addr;
+               hdr.uih_dst_port = htons(pcb->local_port);
+
+               hdr.uih_data_len = 0;
+               hdr.uih_ip_opt_len = 0;
+
+               err = copy_to_user(m->IO_ENDPT,
+                               &hdr, sizeof(hdr),
+                               (cp_grant_id_t) m->IO_GRANT,
+                               0);
+
+               if (err != OK)
+                       return err;
+
+               rem_len -= (hdr_sz = sizeof(hdr));
+       }
+
+       for (p = pbuf; p && rem_len; p = p->next) {
+               size_t cp_len;
+
+               cp_len = (rem_len < p->len) ? rem_len : p->len;
+               err = copy_to_user(m->IO_ENDPT, p->payload, cp_len,
+                               (cp_grant_id_t) m->IO_GRANT,
+                               hdr_sz + written);
+
+               if (err != OK)
+                       return err;
+
+               written += cp_len;
+               rem_len -= cp_len;
+       }
+
+       debug_udp_print("copied %d bytes", written + hdr_sz);
+       return written + hdr_sz;
+}
+
+static void udp_recv_callback(void *arg,
+                       struct udp_pcb *pcb,
+                       struct pbuf *pbuf,
+                       ip_addr_t *addr,
+                       u16_t port)
+{
+       struct socket * sock = (struct socket *) arg;
+       struct udp_recv_data * data;
+
+       debug_udp_print("socket num : %ld addr : %x port : %d\n",
+                       get_sock_num(sock), (unsigned int) addr->addr, port);
+
+       if (sock->flags & SOCK_FLG_OP_PENDING) {
+               /* we are resuming a suspended operation */
+               int ret;
+
+               ret = udp_do_receive(sock, &sock->mess, pcb, pbuf, addr, port);
+
+               if (ret > 0) {
+                       pbuf_free(pbuf);
+                       sock_revive(sock, ret);
+                       sock->flags &= ~SOCK_FLG_OP_PENDING;
+                       return;
+               } else {
+                       sock_revive(sock, ret);
+                       sock->flags &= ~SOCK_FLG_OP_PENDING;
+               }
+       }
+
+       /* Do not enqueue more data than allowed */
+       if (sock->recv_data_size > UDP_BUF_SIZE) {
+               pbuf_free(pbuf);
+               return;
+       }
+
+       /*
+        * nobody is waiting for the data or an error occured above, we enqueue
+        * the packet
+        */
+       if (!(data = udp_recv_alloc())) {
+               pbuf_free(pbuf);
+               return;
+       }
+
+       data->ip = *addr;
+       data->port = port;
+       data->pbuf = pbuf;
+
+       if (sock_enqueue_data(sock, data, data->pbuf->tot_len) != OK) {
+               udp_recv_free(data);
+               return;
+       }
+       
+       /*
+        * We don't need to notify when somebody is already waiting, reviving
+        * read operation will do the trick for us. But we must announce new
+        * data available here.
+        */
+       if (sock_select_read_set(sock))
+               sock_select_notify(sock);
+}
+
+static void udp_op_read(struct socket * sock, message * m)
+{
+       debug_udp_print("socket num %ld", get_sock_num(sock));
+
+       if (sock->recv_head) {
+               /* data available receive immeditely */
+
+               struct udp_recv_data * data;
+               int ret;
+
+               data = (struct udp_recv_data *) sock->recv_head->data;
+
+               ret = udp_do_receive(sock, m, (struct udp_pcb *) sock->pcb,
+                                       data->pbuf, &data->ip, data->port);
+
+               if (ret > 0) {
+                       sock_dequeue_data(sock);
+                       sock->recv_data_size -= data->pbuf->tot_len;
+                       udp_recv_free(data);
+               }
+               sock_reply(sock, ret);
+       } else {
+               /* store the message so we know how to reply */
+               sock->mess = *m;
+               /* operation is being processes */
+               sock->flags |= SOCK_FLG_OP_PENDING;
+
+               debug_udp_print("no data to read, suspending\n");
+               sock_reply(sock, SUSPEND);
+       }
+}
+
+static int udp_op_send(__unused struct socket * sock,
+                       __unused struct pbuf * pbuf,
+                       __unused message * m)
+{
+       int err;
+
+       debug_udp_print("pbuf len %d\n", pbuf->len);
+
+       if ((err = udp_send(sock->pcb, pbuf)) == ERR_OK)
+               return m->COUNT;
+       else {
+               debug_udp_print("udp_send failed %d", err);
+               return EIO;
+       }
+}
+
+static int udp_op_sendto(struct socket * sock, struct pbuf * pbuf, message * m)
+{
+       int err;
+       udp_io_hdr_t hdr;
+
+       hdr = *(udp_io_hdr_t *) pbuf->payload;
+
+       pbuf_header(pbuf, -(s16_t)sizeof(udp_io_hdr_t));
+
+       debug_udp_print("data len %d pbuf len %d\n",
+                       hdr.uih_data_len, pbuf->len);
+
+       if ((err = udp_sendto(sock->pcb, pbuf, (ip_addr_t *) &hdr.uih_dst_addr,
+                                               ntohs(hdr.uih_dst_port))) == ERR_OK)
+               return m->COUNT;
+       else {
+               debug_udp_print("udp_sendto failed %d", err);
+               return EIO;
+       }
+}
+
+static void udp_op_write(struct socket * sock, message * m)
+{
+       int ret;
+       struct pbuf * pbuf;
+
+       debug_udp_print("socket num %ld data size %d",
+                       get_sock_num(sock), m->COUNT);
+
+       pbuf = pbuf_alloc(PBUF_TRANSPORT, m->COUNT, PBUF_POOL);
+       if (!pbuf) {
+               ret = ENOMEM;
+               goto write_err;
+       }
+
+       if ((ret = copy_from_user(m->IO_ENDPT, pbuf->payload, m->COUNT,
+                               (cp_grant_id_t) m->IO_GRANT, 0)) != OK) {
+               pbuf_free(pbuf);
+               goto write_err;
+       }
+
+       if (sock->usr_flags & NWUO_RWDATONLY)
+               ret = udp_op_send(sock, pbuf, m);
+       else
+               ret = udp_op_sendto(sock, pbuf, m);
+
+       if (pbuf_free(pbuf) == 0) {
+               panic("We cannot buffer udp packets yet!");
+       }
+       
+write_err:
+       sock_reply(sock, ret);
+}
+
+static void udp_set_opt(struct socket * sock, message * m)
+{
+       int err;
+       nwio_udpopt_t udpopt;
+       struct udp_pcb * pcb = (struct udp_pcb *) sock->pcb;
+       ip_addr_t loc_ip = ip_addr_any;
+
+       assert(pcb);
+
+       err = copy_from_user(m->IO_ENDPT, &udpopt, sizeof(udpopt),
+                               (cp_grant_id_t) m->IO_GRANT, 0);
+
+       if (err != OK)
+               sock_reply(sock, err);
+
+       debug_udp_print("udpopt.nwuo_flags = 0x%lx", udpopt.nwuo_flags);
+       debug_udp_print("udpopt.nwuo_remaddr = 0x%x",
+                               (unsigned int) udpopt.nwuo_remaddr);
+       debug_udp_print("udpopt.nwuo_remport = 0x%x",
+                               ntohs(udpopt.nwuo_remport));
+       debug_udp_print("udpopt.nwuo_locaddr = 0x%x",
+                               (unsigned int) udpopt.nwuo_locaddr);
+       debug_udp_print("udpopt.nwuo_locport = 0x%x",
+                               ntohs(udpopt.nwuo_locport));
+
+       sock->usr_flags = udpopt.nwuo_flags;
+
+       /*
+        * We will only get data from userspace and the remote address
+        * and port are being set which means that from now on we must
+        * know where to send data. Thus we should interpret this as
+        * connect() call
+        */
+       if (sock->usr_flags & NWUO_RWDATONLY &&
+                       sock->usr_flags & NWUO_RP_SET &&
+                       sock->usr_flags & NWUO_RA_SET)
+               udp_connect(pcb, (ip_addr_t *) &udpopt.nwuo_remaddr,
+                                               ntohs(udpopt.nwuo_remport));
+       /* Setting local address means binding */
+       if (sock->usr_flags & NWUO_LP_SET)
+               udp_bind(pcb, &loc_ip, ntohs(udpopt.nwuo_locport));
+       /* We can only bind to random local port */
+       if (sock->usr_flags & NWUO_LP_SEL)
+               udp_bind(pcb, &loc_ip, 0);
+
+       
+       /* register a receive hook */
+       udp_recv((struct udp_pcb *) sock->pcb, udp_recv_callback, sock);
+
+       sock_reply(sock, OK);
+}
+
+static void udp_get_opt(struct socket * sock, message * m)
+{
+       int err;
+       nwio_udpopt_t udpopt;
+       struct udp_pcb * pcb = (struct udp_pcb *) sock->pcb;
+
+       assert(pcb);
+
+       udpopt.nwuo_locaddr = pcb->local_ip.addr;
+       udpopt.nwuo_locport = htons(pcb->local_port);
+       udpopt.nwuo_remaddr = pcb->remote_ip.addr;
+       udpopt.nwuo_remport = htons(pcb->remote_port);
+       udpopt.nwuo_flags = sock->usr_flags;
+
+       debug_udp_print("udpopt.nwuo_flags = 0x%lx", udpopt.nwuo_flags);
+       debug_udp_print("udpopt.nwuo_remaddr = 0x%x",
+                               (unsigned int) udpopt.nwuo_remaddr);
+       debug_udp_print("udpopt.nwuo_remport = 0x%x",
+                               ntohs(udpopt.nwuo_remport));
+       debug_udp_print("udpopt.nwuo_locaddr = 0x%x",
+                               (unsigned int) udpopt.nwuo_locaddr);
+       debug_udp_print("udpopt.nwuo_locport = 0x%x",
+                               ntohs(udpopt.nwuo_locport));
+
+       if ((unsigned) m->COUNT < sizeof(udpopt)) {
+               sock_reply(sock, EINVAL);
+               return;
+       }
+
+       err = copy_to_user(m->IO_ENDPT, &udpopt, sizeof(udpopt),
+                               (cp_grant_id_t) m->IO_GRANT, 0);
+
+       if (err != OK)
+               sock_reply(sock, err);
+
+       sock_reply(sock, OK);
+}
+
+static void udp_op_ioctl(struct socket * sock, message * m)
+{
+       debug_udp_print("socket num %ld req %c %d %d",
+                       get_sock_num(sock),
+                       (m->REQUEST >> 8) & 0xff,
+                       m->REQUEST & 0xff,
+                       (m->REQUEST >> 16) & _IOCPARM_MASK);
+
+       switch (m->REQUEST) {
+       case NWIOSUDPOPT:
+               udp_set_opt(sock, m);
+               break;
+       case NWIOGUDPOPT:
+               udp_get_opt(sock, m);
+               break;
+       default:
+               sock_reply(sock, EBADIOCTL);
+               return;
+       }
+}
+
+struct sock_ops sock_udp_ops = {
+       .open           = udp_op_open,
+       .close          = udp_op_close,
+       .read           = udp_op_read,
+       .write          = udp_op_write,
+       .ioctl          = udp_op_ioctl,
+       .select         = generic_op_select,
+       .select_reply   = generic_op_select_reply
+};
+