]> Zhao Yanbai Git Server - minix.git/commitdiff
New inet with partial select implementation
authorPhilip Homburg <philip@cs.vu.nl>
Tue, 28 Jun 2005 15:19:58 +0000 (15:19 +0000)
committerPhilip Homburg <philip@cs.vu.nl>
Tue, 28 Jun 2005 15:19:58 +0000 (15:19 +0000)
71 files changed:
include/net/gen/arp_io.h [new file with mode: 0644]
include/net/gen/ether.h
include/net/gen/icmp.h
include/net/gen/icmp_hdr.h
include/net/gen/in.h
include/net/gen/ip_hdr.h
include/net/gen/ip_io.h
include/net/gen/psip_hdr.h
include/net/gen/psip_io.h
include/net/gen/tcp_hdr.h
include/net/gen/tcp_io.h
include/net/ioctl.h
servers/inet/Makefile
servers/inet/buf.c
servers/inet/clock.c
servers/inet/const.h
servers/inet/generic/arp.c
servers/inet/generic/arp.h
servers/inet/generic/assert.h
servers/inet/generic/buf.h
servers/inet/generic/eth.c
servers/inet/generic/eth.h
servers/inet/generic/eth_int.h
servers/inet/generic/event.c
servers/inet/generic/event.h
servers/inet/generic/icmp.c
servers/inet/generic/icmp.h
servers/inet/generic/icmp_lib.h
servers/inet/generic/io.c
servers/inet/generic/ip.c
servers/inet/generic/ip.h
servers/inet/generic/ip_eth.c
servers/inet/generic/ip_int.h
servers/inet/generic/ip_ioctl.c
servers/inet/generic/ip_lib.c
servers/inet/generic/ip_ps.c
servers/inet/generic/ip_read.c
servers/inet/generic/ip_write.c
servers/inet/generic/ipr.c
servers/inet/generic/ipr.h
servers/inet/generic/psip.c
servers/inet/generic/psip.h
servers/inet/generic/rand256.c [new file with mode: 0644]
servers/inet/generic/rand256.h [new file with mode: 0644]
servers/inet/generic/sr.h
servers/inet/generic/tcp.c
servers/inet/generic/tcp.h
servers/inet/generic/tcp_int.h
servers/inet/generic/tcp_lib.c
servers/inet/generic/tcp_recv.c
servers/inet/generic/tcp_send.c
servers/inet/generic/type.h
servers/inet/generic/udp.c
servers/inet/generic/udp.h
servers/inet/generic/udp_int.h [new file with mode: 0644]
servers/inet/inet.c
servers/inet/inet.h
servers/inet/inet_config.c
servers/inet/inet_config.h
servers/inet/minix3/queryparam.c [new file with mode: 0644]
servers/inet/minix3/queryparam.h [new file with mode: 0644]
servers/inet/mnx_eth.c
servers/inet/mq.c
servers/inet/osdep_eth.h
servers/inet/qp.c [new file with mode: 0644]
servers/inet/qp.h [new file with mode: 0644]
servers/inet/sha2.c [new file with mode: 0644]
servers/inet/sha2.h [new file with mode: 0644]
servers/inet/sr.c
servers/inet/sr_int.h [new file with mode: 0644]
servers/inet/version.c

diff --git a/include/net/gen/arp_io.h b/include/net/gen/arp_io.h
new file mode 100644 (file)
index 0000000..583cd6f
--- /dev/null
@@ -0,0 +1,23 @@
+/*
+net/gen/arp_io.h
+
+Created:       Jan 2001 by Philip Homburg <philip@f-mnx.phicoh.com>
+*/
+
+typedef struct nwio_arp
+{
+       int nwa_entno;
+       u32_t nwa_flags;
+       ipaddr_t nwa_ipaddr;
+       ether_addr_t nwa_ethaddr;
+} nwio_arp_t;
+
+#define NWAF_EMPTY     0
+#define NWAF_INCOMPLETE        1
+#define NWAF_DEAD      2
+#define NWAF_PERM      4
+#define NWAF_PUB       8
+
+/*
+ * $PchId: arp_io.h,v 1.2 2004/08/03 11:01:59 philip Exp $
+ */
index d2ccc35664520bdf06a3b794a06fadd2316fe508..690e76570649a63cd1d2dc23fd9a45e108421b6f 100755 (executable)
@@ -19,7 +19,17 @@ typedef struct ether_addr
 typedef u16_t ether_type_t;
 typedef U16_t Ether_type_t;
 
-#define ETH_ARP_PROTO  0x806
-#define ETH_IP_PROTO   0x800
+#define ETH_ARP_PROTO   0x806
+#define ETH_IP_PROTO    0x800
+#define ETH_VLAN_PROTO 0x8100
+
+/* Tag Control Information field for VLAN and Priority tagging */
+#define ETH_TCI_PRIO_MASK      0xe000
+#define ETH_TCI_CFI            0x1000  /* Canonical Formal Indicator */
+#define ETH_TCI_VLAN_MASK      0x0fff  /* 12-bit vlan number */
 
 #endif /* __SERVER__IP__GEN__ETHER_H__ */
+
+/*
+ * $PchId: ether.h,v 1.6 2005/01/27 17:33:35 philip Exp $
+ */
index cb3ae37b4b17ad63e4682cef21b4a7d74b015d4b..efa0819998c3c08480178eab8d225585ce015106 100755 (executable)
@@ -5,7 +5,7 @@ server/ip/gen/icmp.h
 #ifndef __SERVER__IP__GEN__ICMP_H__
 #define __SERVER__IP__GEN__ICMP_H__
 
-#define ICMP_MIN_HDR_LEN       4
+#define ICMP_MIN_HDR_SIZE      4
 
 #define ICMP_TYPE_ECHO_REPL    0
 #define ICMP_TYPE_DST_UNRCH    3
@@ -33,8 +33,17 @@ server/ip/gen/icmp.h
 #define ICMP_TYPE_INFO_REQ     15
 #define ICMP_TYPE_INFO_REPL    16
 
+/* Preferences for router advertisements. A router daemon installs itself
+ * as the default router in the router's interfaces by sending router
+ * advertisements to localhost with preference ICMP_RA_LOCAL_PREF.
+ */
+#define ICMP_RA_DEFAULT_PREF   0x00000000
+#define ICMP_RA_INVAL_PREF     0x80000000
+#define ICMP_RA_MAX_PREF       0x7fffffff
+#define ICMP_RA_LOCAL_PREF     0x10000000
+
 #endif /* __SERVER__IP__GEN__ICMP_H__ */
 
 /*
- * $PchId: icmp.h,v 1.5 1995/11/17 22:38:46 philip Exp $
+ * $PchId: icmp.h,v 1.6 2002/06/10 07:10:26 philip Exp $
  */
index dbd5dd0f0b4c7cb5380ff49b0f16b7ccf86f7a33..8e6571dcefa47d1c6e0b1e63debd87327271c3e8 100755 (executable)
@@ -29,6 +29,12 @@ typedef struct icmp_pp
        u8_t    ipp_unused[3];
 } icmp_pp_t;
 
+typedef struct icmp_mtu                /* RFC 1191 */
+{
+       u16_t   im_unused;
+       u16_t   im_mtu;
+} icmp_mtu_t;
+
 typedef struct icmp_hdr
 {
        u8_t ih_type, ih_code;
@@ -40,6 +46,7 @@ typedef struct icmp_hdr
                ipaddr_t ihh_gateway;
                icmp_ram_t ihh_ram;
                icmp_pp_t ihh_pp;
+               icmp_mtu_t ihh_mtu;
        } ih_hun;
        union
        {
@@ -51,5 +58,5 @@ typedef struct icmp_hdr
 #endif /* __SERVER__IP__GEN__ICMP_HDR_H__ */
 
 /*
- * $PchId: icmp_hdr.h,v 1.4 1995/11/17 22:28:58 philip Exp $
+ * $PchId: icmp_hdr.h,v 1.5 2002/06/10 07:10:48 philip Exp $
  */
index d86ee4e60bf9e738ac72a6eb578bca4ba0dfddda..2c5b269a4849e3f83dcfe4edbd056d859a587b6a 100755 (executable)
@@ -8,9 +8,11 @@ server/ip/gen/in.h
 #define IP_MIN_HDR_SIZE                20
 #define IP_MAX_HDR_SIZE                60              /* 15 * 4 */
 #define IP_VERSION             4
+#define IP_DEF_TTL             64
 #define IP_MAX_TTL             255
-#define IP_DEF_MSS             576
-#define IP_MAX_PACKSIZE                40000           /* 8192 */
+#define IP_DEF_MTU             576
+#define IP_MIN_MTU             (IP_MAX_HDR_SIZE+8)
+#define IP_MAX_PACKSIZE                40000
        /* Note: this restriction is not part of the IP-protocol but
           introduced by this implementation. */
 
@@ -18,6 +20,8 @@ server/ip/gen/in.h
 #define IPPROTO_TCP            6
 #define IPPROTO_UDP            17
 
+#define IP_MC_ALL_SYSTEMS      0xE0000001      /* 224.0.0.1 */
+
 typedef u32_t ipaddr_t;
 typedef u8_t ipproto_t;
 typedef struct ip_hdropt
@@ -29,5 +33,5 @@ typedef struct ip_hdropt
 #endif /* __SERVER__IP__GEN__IN_H__ */
 
 /*
- * $PchId: in.h,v 1.3 1995/11/17 22:27:50 philip Exp $
+ * $PchId: in.h,v 1.6 2002/06/10 07:11:15 philip Exp $
  */
index ba1c5336c79783519395fc88bde6f8df98196e30..f56205281c9a4bcdddb1131b6502d64136fea5bd 100755 (executable)
@@ -29,14 +29,19 @@ typedef struct ip_hdr
 #define IP_OPT_COPIED  0x80
 #define IP_OPT_NUMBER  0x1f
 
-#define IP_OPT_EOL     0x00
-#define IP_OPT_NOP     0x01
-#define IP_OPT_LSRR    0x83
-#define IP_OPT_RR      0x07
+#define IP_OPT_EOL     0x00    /* End of Options List, RFC-791 */
+#define IP_OPT_NOP     0x01    /* No Operation, RFC-791 */
+#define IP_OPT_RR      0x07    /* Record Route, RFC-791 */
+#define IP_OPT_TS      0x44    /* Timestamp, RFC-791 */
+#define IP_OPT_SEC     0x82    /* Security, RFC-1108 */
+#define IP_OPT_LSRR    0x83    /* Loose Source Route, RFC-791 */
+#define IP_OPT_SSRR    0x89    /* Strict Source Route, RFC-791 */
+#define IP_OPT_RTRALT  0x94    /* Router Alert, RFC-2113 */
+
 #define IP_OPT_RR_MIN          4
 
 #endif /* __SERVER__IP__GEN__HDR_H__ */
 
 /*
- * $PchId: ip_hdr.h,v 1.4 1995/11/17 22:26:00 philip Exp $
+ * $PchId: ip_hdr.h,v 1.5 2002/06/10 07:11:46 philip Exp $
  */
index 97fac609ac536fb94a04295a6cd283679c2ebff4..07e87f088c619e6ee9b182e7517924eefde44d8d 100755 (executable)
@@ -5,17 +5,26 @@ server/ip/gen/ip_io.h
 #ifndef __SERVER__IP__GEN__IP_IO_H__
 #define __SERVER__IP__GEN__IP_IO_H__
 
+typedef struct nwio_ipconf2
+{
+       u32_t   nwic_flags;
+       ipaddr_t nwic_ipaddr;
+       ipaddr_t nwic_netmask;
+} nwio_ipconf2_t;
+
 typedef struct nwio_ipconf
 {
        u32_t   nwic_flags;
        ipaddr_t nwic_ipaddr;
        ipaddr_t nwic_netmask;
+       u16_t nwic_mtu;
 } nwio_ipconf_t;
 
 #define NWIC_NOFLAGS           0x0
-#define NWIC_FLAGS             0x3
+#define NWIC_FLAGS             0x7
 #      define NWIC_IPADDR_SET          0x1
 #      define NWIC_NETMASK_SET         0x2
+#      define NWIC_MTU_SET             0x4
 
 typedef struct nwio_ipopt
 {
@@ -53,3 +62,7 @@ typedef struct nwio_ipopt
 #      define NWIO_RWDATALL    0x10000000l
 
 #endif /* __SERVER__IP__GEN__IP_IO_H__ */
+
+/*
+ * $PchId: ip_io.h,v 1.5 2001/03/12 22:17:25 philip Exp $
+ */
index 5f85406a2fa580ed3f6e69c1c95e57fe274cf6e6..5dc8bc4e444ae8e46b17f6ea70d77c3cdb882dd8 100755 (executable)
@@ -9,6 +9,7 @@ typedef struct psip_io_hdr
 {
        u8_t pih_flags;
        u8_t pih_dummy[3];
+       u32_t pih_nexthop;
 } psip_io_hdr_t;
 
 #define PF_LOC_REM_MASK        1
@@ -18,5 +19,5 @@ typedef struct psip_io_hdr
 #endif /* __SERVER__IP__GEN__PSIP_HDR_H__ */
 
 /*
- * $PchId: psip_hdr.h,v 1.2 1995/11/17 22:22:35 philip Exp $
+ * $PchId: psip_hdr.h,v 1.3 2001/02/19 07:35:38 philip Exp $
  */
index 3357983421286aba16a8133c8c1fefae679aec9d..ba71a4eea0a53f777c3c48f4ba8612b338c5ad04 100755 (executable)
@@ -13,9 +13,12 @@ typedef struct nwio_psipopt
 #define NWPO_PROMISC_MASK      0x0001L
 #define                NWPO_EN_PROMISC         0x00000001L
 #define                NWUO_DI_PROMISC         0x00010000L
+#define NWPO_NEXTHOP_MASK      0x0002L
+#define                NWPO_EN_NEXTHOP         0x00000002L
+#define                NWUO_DI_NEXTHOP         0x00020000L
 
 #endif /* __SERVER__IP__GEN__PSIP_IO_H__ */
 
 /*
- * $PchId: psip_io.h,v 1.2 1995/11/17 22:22:16 philip Exp $
+ * $PchId: psip_io.h,v 1.3 2001/02/19 07:35:58 philip Exp $
  */
index b0a42b2a8da534eca784912055b8411eeb30824e..f2d93780ceb145ac687a0e7a4b72f0a35732c0a2 100755 (executable)
@@ -34,12 +34,16 @@ typedef struct tcp_hdropt
        u8_t tho_data[TCP_MAX_HDR_SIZE-TCP_MIN_HDR_SIZE];
 } tcp_hdropt_t;
 
-#define TCP_OPT_EOL    0
-#define TCP_OPT_NOP    1
-#define TCP_OPT_MSS    2
+#define TCP_OPT_EOL     0
+#define TCP_OPT_NOP     1
+#define TCP_OPT_MSS     2
+#define TCP_OPT_WSOPT   3      /* RFC-1323, window scale option */
+#define TCP_OPT_SACKOK  4      /* RFC-2018, SACK permitted */
+#define TCP_OPT_TS      8      /* RFC-1323, Timestamps option */
+#define TCP_OPT_CCNEW  12      /* RFC-1644, new connection count */
 
 #endif /* __SERVER__IP__GEN__TCP_HDR_H__ */
 
 /*
- * $PchId: tcp_hdr.h,v 1.3 1995/11/17 22:18:13 philip Exp $
+ * $PchId: tcp_hdr.h,v 1.4 2002/06/10 07:12:22 philip Exp $
  */
index b41eb64acb4827e3409a0b2cfd72cff67e8e5b26..62339d6843b05a8f82c2b83ee113d98206d207d7 100755 (executable)
@@ -58,9 +58,12 @@ typedef struct nwio_tcpopt
 #      define NWTO_NOTBSD_URG  0x00040000L
 #define NWTO_DEL_RST_MASK      0x0008L
 #      define NWTO_DEL_RST     0x00000008L
+#define NWTO_BULK_MASK         0x0010L
+#      define NWTO_BULK        0x00000010L
+#      define NWTO_NOBULK      0x00100000L
 
 #endif /* __SERVER__IP__GEN__TCP_IO_H__ */
 
 /*
- * $PchId: tcp_io.h,v 1.4 1995/11/17 22:17:47 philip Exp $
+ * $PchId: tcp_io.h,v 1.5 2001/02/19 07:36:55 philip Exp $
  */
index e412809bcb6df60a865c8e99a8d2952dd7cc2982..33bb22de9f1363af9840c780193d5645213dc569 100755 (executable)
 #define NWIOGETHOPT    _IOR('n', 17, struct nwio_ethopt)
 #define NWIOGETHSTAT   _IOR('n', 18, struct nwio_ethstat)
 
+#define NWIOARPGIP     _IORW('n',20, struct nwio_arp)
+#define NWIOARPGNEXT   _IORW('n',21, struct nwio_arp)
+#define NWIOARPSIP     _IOW ('n',22, struct nwio_arp)
+#define NWIOARPDIP     _IOW ('n',23, struct nwio_arp)
+
+#define NWIOSIPCONF2   _IOW('n', 32, struct nwio_ipconf2)
 #define NWIOSIPCONF    _IOW('n', 32, struct nwio_ipconf)
+#define NWIOGIPCONF2   _IOR('n', 33, struct nwio_ipconf2)
 #define NWIOGIPCONF    _IOR('n', 33, struct nwio_ipconf)
 #define NWIOSIPOPT     _IOW('n', 34, struct nwio_ipopt)
 #define NWIOGIPOPT     _IOR('n', 35, struct nwio_ipopt)
@@ -21,7 +28,6 @@
 #define NWIOGIPOROUTE  _IORW('n', 40, struct nwio_route)
 #define NWIOSIPOROUTE  _IOW ('n', 41, struct nwio_route)
 #define NWIODIPOROUTE  _IOW ('n', 42, struct nwio_route)
-
 #define NWIOGIPIROUTE  _IORW('n', 43, struct nwio_route)
 #define NWIOSIPIROUTE  _IOW ('n', 44, struct nwio_route)
 #define NWIODIPIROUTE  _IOW ('n', 45, struct nwio_route)
 #define NWIOTCPSHUTDOWN        _IO ('n', 53)
 #define NWIOSTCPOPT    _IOW('n', 54, struct nwio_tcpopt)
 #define NWIOGTCPOPT    _IOR('n', 55, struct nwio_tcpopt)
+#define NWIOTCPPUSH    _IO ('n', 56)
 
 #define NWIOSUDPOPT    _IOW('n', 64, struct nwio_udpopt)
 #define NWIOGUDPOPT    _IOR('n', 65, struct nwio_udpopt)
+#define NWIOUDPPEEK    _IOR('n', 66, struct udp_io_hdr)
 
 #define NWIOSPSIPOPT   _IOW('n', 80, struct nwio_psipopt)
 #define NWIOGPSIPOPT   _IOR('n', 81, struct nwio_psipopt)
 
+#define NWIOQUERYPARAM _IORW('n',96, struct svrqueryparam)
+
 #endif /* _NET__IOCTL_H */
+
+/*
+ * $PchId: ioctl.h,v 1.2 2003/07/25 14:34:03 philip Exp $
+ */
index c7a1ecbe433f6b787ef84faa6ec69eaae348c810..6e7307861866e040b888efaad99d8114259d92eb 100644 (file)
@@ -1,47 +1,40 @@
-# Makefile for Network Server (INET).
-SERVER = inet
+# Makefile for inet.
 
-# directories
-u = /usr
-i = $u/include
-s = $i/sys
-h = $i/minix
-n = $i/net
+# Directories
 g = generic
 
-# programs, flags, and libraries
+# Programs, flags, and libraries
 CC =           cc
-CPPFLAGS =     -I. -I..
-CFLAGS =       $(OPT) $(CPPFLAGS) -m
-LDFLAGS =      -i
+CPPFLAGS =     -I. -D_MINIX
+CFLAGS =       $(OPT) $(CPPFLAGS)
+LDFLAGS =
 LIBS =         -lsys -lutils
 
+.c.o:
+       $(CC) $(CFLAGS) -o $@ -c $<
+
 OBJ =  buf.o clock.o inet.o inet_config.o \
-       mnx_eth.o mq.o sr.o stacktrace.o \
+       mnx_eth.o mq.o qp.o sr.o stacktrace.o \
        $g/udp.o $g/arp.o $g/eth.o $g/event.o \
        $g/icmp.o $g/io.o $g/ip.o $g/ip_ioctl.o \
        $g/ip_lib.o $g/ip_read.o $g/ip_write.o \
-       $g/ipr.o $g/tcp.o $g/tcp_lib.o \
+       $g/ipr.o $g/rand256.o $g/tcp.o $g/tcp_lib.o \
        $g/tcp_recv.o $g/tcp_send.o $g/ip_eth.o \
-       $g/ip_ps.o $g/psip.o
+       $g/ip_ps.o $g/psip.o \
+       minix3/queryparam.o sha2.o
+
+all:   inet
 
-# build local binary
-all build:     $(SERVER)
-$(SERVER):     inet.a
-       $(CC) -o $@ $(LDFLAGS) inet.a version.c $(LIBS)
-       install -S 2kw $@
+inet:  $(OBJ)
+       $(CC) -o $@ $(LDFLAGS) $(OBJ) version.c $(LIBS)
 
-inet.a:        $(OBJ)
-       @rm -f $@
-       aal cr $@ $(OBJ)
+install:       /usr/sbin/servers/inet
 
-# install with other servers
-install:       /usr/sbin/servers/$(SERVER)
-/usr/sbin/servers/$(SERVER):   $(SERVER)
-       install -o root -cs $? $@
+/usr/sbin/servers/inet:        inet
+       install -c $? $@
 
 clean:
-       rm -f $(SERVER) *.a *.o */*.o */*.a *.bak 
+       rm -f $(OBJ) inet *.bak
 
 depend: 
        /usr/bin/mkdep "$(CC) -E $(CPPFLAGS)" *.c generic/*.c > .depend
@@ -49,42 +42,6 @@ depend:
 # Include generated dependencies.
 include .depend
 
-$g/arp.o:      $g/arp.c
-       cd generic && $(CC) -c $(CFLAGS) arp.c
-$g/eth.o:      $g/eth.c
-       cd generic && $(CC) -c $(CFLAGS) eth.c
-$g/event.o:    $g/event.c
-       cd generic && $(CC) -c $(CFLAGS) event.c
-$g/icmp.o:     $g/icmp.c
-       cd generic && $(CC) -c $(CFLAGS) icmp.c
-$g/io.o:       $g/io.c
-       cd generic && $(CC) -c $(CFLAGS) io.c
-$g/ip.o:       $g/ip.c
-       cd generic && $(CC) -c $(CFLAGS) ip.c
-$g/ip_eth.o:   $g/ip_eth.c
-       cd generic && $(CC) -c $(CFLAGS) ip_eth.c
-$g/ip_ioctl.o: $g/ip_ioctl.c
-       cd generic && $(CC) -c $(CFLAGS) ip_ioctl.c
-$g/ip_lib.o:   $g/ip_lib.c
-       cd generic && $(CC) -c $(CFLAGS) ip_lib.c
-$g/ip_ps.o:    $g/ip_ps.c
-       cd generic && $(CC) -c $(CFLAGS) ip_ps.c
-$g/ip_read.o:  $g/ip_read.c
-       cd generic && $(CC) -c $(CFLAGS) ip_read.c
-$g/ip_write.o: $g/ip_write.c
-       cd generic && $(CC) -c $(CFLAGS) ip_write.c
-$g/ipr.o:      $g/ipr.c
-       cd generic && $(CC) -c $(CFLAGS) ipr.c
-$g/psip.o:     $g/psip.c
-       cd generic && $(CC) -c $(CFLAGS) psip.c
-$g/tcp.o:      $g/tcp.c
-       cd generic && $(CC) -c $(CFLAGS) tcp.c
-$g/tcp_lib.o:  $g/tcp_lib.c
-       cd generic && $(CC) -c $(CFLAGS) tcp_lib.c
-$g/tcp_recv.o: $g/tcp_recv.c
-       cd generic && $(CC) -c $(CFLAGS) tcp_recv.c
-$g/tcp_send.o: $g/tcp_send.c
-       cd generic && $(CC) -c $(CFLAGS) tcp_send.c
-$g/udp.o:      $g/udp.c
-       cd generic && $(CC) -c $(CFLAGS) udp.c
-
+#
+# $PchId: Makefile.mnx3,v 1.1 2005/06/28 14:28:45 philip Exp $
+#
index 6d9ec94e40f494d67a1c5eddd27b0b29ae1b8b9c..baa431255479a979c6491ccdab2be9f55f812ea4 100644 (file)
@@ -22,11 +22,7 @@ THIS_FILE
 #endif
 
 #ifndef BUF512_NR
-#if CRAMPED
-#define BUF512_NR      32
-#else
-#define BUF512_NR      128
-#endif
+#define BUF512_NR      512
 #endif
 #ifndef BUF2K_NR
 #define BUF2K_NR       0
@@ -35,8 +31,8 @@ THIS_FILE
 #define BUF32K_NR      0
 #endif
 
-#define ACC_NR         ((BUF512_NR+BUF2K_NR+BUF32K_NR)*3/2)
-#define CLIENT_NR      6
+#define ACC_NR         ((BUF512_NR+BUF2K_NR+BUF32K_NR)*3)
+#define CLIENT_NR      7
 
 #define DECLARE_TYPE(Tag, Type, Size)                                  \
        typedef struct Tag                                              \
@@ -92,6 +88,7 @@ PRIVATE size_t bf_buf_gran;
 
 PUBLIC size_t bf_free_bufsize;
 PUBLIC acc_t *bf_temporary_acc;
+PUBLIC acc_t *bf_linkcheck_acc;
 
 #ifdef BUF_CONSISTENCY_CHECK
 int inet_buf_debug;
@@ -115,7 +112,6 @@ FORWARD int report_buffer ARGS(( buf_t *buf, char *label, int i ));
 PUBLIC void bf_init()
 {
        int i;
-       size_t size;
        size_t buf_s;
        acc_t *acc;
 
@@ -216,7 +212,7 @@ bf_checkreq_t checkfunc;
                        return;
                }
 
-       ip_panic(( "buf.c: to many clients" ));
+       ip_panic(( "buf.c: too many clients" ));
 }
 
 /*
@@ -240,6 +236,7 @@ size_t size;
        assert (size>0);
 
        head= NULL;
+       tail= NULL;
        while (size)
        {
                new_acc= NULL;
@@ -271,7 +268,7 @@ size_t size;
 #endif
 #undef ALLOC_BUF
                {
-                       DBLOCK(1, printf("freeing buffers\n"));
+                       DBLOCK(2, printf("freeing buffers\n"));
 
                        bf_free_bufsize= 0;
                        for (i=0; bf_free_bufsize<size && i<MAX_BUFREQ_PRI;
@@ -282,13 +279,13 @@ size_t size;
                                        if (freereq[j])
                                                (*freereq[j])(i);
                                }
-#if DEBUG
+#if DEBUG && 0
  { acc_t *acc;
    j= 0; for(acc= buf512_freelist; acc; acc= acc->acc_next) j++;
    printf("# of free 512-bytes buffer is now %d\n", j); }
 #endif
                        }
-#if DEBUG
+#if DEBUG && 0
  { printf("last level was level %d\n", i-1); }
 #endif
                        if (bf_free_bufsize<size)
@@ -318,11 +315,7 @@ size_t size;
                tail->acc_length=  count;
                size -= count;
        }
-       tail->acc_next= 0;
-
-#if DEBUG
-       bf_chkbuf(head);
-#endif
+       tail->acc_next= NULL;
 
        return head;
 }
@@ -420,7 +413,6 @@ int clnt_line;
 register acc_t *acc_ptr;
 {
        register acc_t *new_acc;
-       int i, j;
 
        if (!acc_freelist)
        {
@@ -497,8 +489,8 @@ acc_t *old_acc;
        size_t size, offset_old, offset_new, block_size, block_size_old;
 
        /* Check if old acc is good enough. */
-       if (!old_acc || !old_acc->acc_next && old_acc->acc_linkC == 1 && 
-               old_acc->acc_buffer->buf_linkC == 1)
+       if (!old_acc || (!old_acc->acc_next && old_acc->acc_linkC == 1 && 
+               old_acc->acc_buffer->buf_linkC == 1))
        {
                return old_acc;
        }
@@ -556,7 +548,7 @@ register unsigned length;
        register acc_t *head, *tail;
 
        if (!data && !offset && !length)
-               return 0;
+               return NULL;
 #ifdef BUF_TRACK_ALLOC_FREE
        assert(data ||
                (printf("from %s, %d: %u, %u\n",
@@ -566,19 +558,13 @@ register unsigned length;
 #endif
 
        assert(data);
-#if DEBUG
-       bf_chkbuf(data);
-#endif
 
        if (!length)
        {
                head= bf_dupacc(data);
                bf_afree(head->acc_next);
-               head->acc_next= 0;
+               head->acc_next= NULL;
                head->acc_length= 0;
-#if DEBUG
-               bf_chkbuf(data);
-#endif
                return head;
        }
        while (data && offset>=data->acc_length)
@@ -591,7 +577,7 @@ register unsigned length;
 
        head= bf_dupacc(data);
        bf_afree(head->acc_next);
-       head->acc_next= 0;
+       head->acc_next= NULL;
        head->acc_offset += offset;
        head->acc_length -= offset;
        if (length >= head->acc_length)
@@ -608,7 +594,7 @@ register unsigned length;
                tail->acc_next= bf_dupacc(data);
                tail= tail->acc_next;
                bf_afree(tail->acc_next);
-               tail->acc_next= 0;
+               tail->acc_next= NULL;
                data= data->acc_next;
                length -= tail->acc_length;
        }
@@ -624,12 +610,9 @@ register unsigned length;
                tail->acc_next= bf_dupacc(data);
                tail= tail->acc_next;
                bf_afree(tail->acc_next);
-               tail->acc_next= 0;
+               tail->acc_next= NULL;
                tail->acc_length= length;
        }
-#if DEBUG
-       bf_chkbuf(data);
-#endif
        return head;
 }
 
@@ -706,7 +689,8 @@ acc_t  *data_second;
        if (!data_second)
                return data_first;
 
-       head= 0;
+       head= NULL;
+       tail= NULL;
        while (data_first)
        {
                if (data_first->acc_linkC == 1)
@@ -720,7 +704,7 @@ acc_t  *data_second;
                data_first= curr->acc_next;
                if (!curr->acc_length)
                {
-                       curr->acc_next= 0;
+                       curr->acc_next= NULL;
                        bf_afree(curr);
                        continue;
                }
@@ -732,7 +716,7 @@ acc_t  *data_second;
        }
        if (!head)
                return data_second;
-       tail->acc_next= 0;
+       tail->acc_next= NULL;
 
        while (data_second && !data_second->acc_length)
        {
@@ -877,7 +861,6 @@ acc_t *acc;
 PUBLIC int bf_consistency_check()
 {
        acc_t *acc;
-       buf_t *buf;
        int silent;
        int error;
        int i;
@@ -930,7 +913,7 @@ PUBLIC int bf_consistency_check()
                        if (!silent)
                        {
                                printf(
-"acc[%d] (0x%x) has been lost with count %d, last allocated at %s, %d\n",
+"acc[%d] (%p) has been lost with count %d, last allocated at %s, %d\n",
        i, acc, acc->acc_linkC, acc->acc_alloc_file, acc->acc_alloc_line);
 #if 0
                                silent= 1;
@@ -1041,7 +1024,7 @@ int i;
                assert(buf->buf_generation == buf_generation-1);
                buf->buf_generation= buf_generation;
                printf(
-"%s[%d] (0x%x) has been lost with count %d, last allocated at %s, %d\n",
+"%s[%d] (%p) has been lost with count %d, last allocated at %s, %d\n",
                        label, i, buf,
                        buf->buf_linkC, buf->buf_alloc_file,
                        buf->buf_alloc_line);
@@ -1101,6 +1084,15 @@ acc_t *acc;
        }
 }
 
+PUBLIC void _bf_mark_1acc(clnt_file, clnt_line, acc)
+char *clnt_file;
+int clnt_line;
+acc_t *acc;
+{
+       acc->acc_alloc_file= clnt_file;
+       acc->acc_alloc_line= clnt_line;
+}
+
 PUBLIC void _bf_mark_acc(clnt_file, clnt_line, acc)
 char *clnt_file;
 int clnt_line;
@@ -1119,12 +1111,68 @@ acc_t *acc;
 }
 #endif
 
+PUBLIC int bf_linkcheck(acc)
+acc_t *acc;
+{
+       int i;
+
+       buf_t *buffer;
+       for (i= 0; i<ACC_NR && acc; i++, acc= acc->acc_next)
+       {
+               if (acc->acc_linkC <= 0)
+               {
+                       printf("wrong acc_linkC (%d) for acc %p\n", 
+                               acc->acc_linkC, acc);
+                       return 0;
+               }
+               if (acc->acc_offset < 0)
+               {
+                       printf("wrong acc_offset (%d) for acc %p\n",
+                               acc->acc_offset, acc);
+                       return 0;
+               }
+               if (acc->acc_length < 0)
+               {
+                       printf("wrong acc_length (%d) for acc %p\n",
+                               acc->acc_length, acc);
+                       return 0;
+               }
+               buffer= acc->acc_buffer;
+               if (buffer == NULL)
+               {
+                       printf("no buffer for acc %p\n", acc);
+                       return 0;
+               }
+               if (buffer->buf_linkC <= 0)
+               {
+                       printf(
+                       "wrong buf_linkC (%d) for buffer %p, from acc %p\n",
+                               buffer->buf_linkC, buffer, acc);
+                       return 0;
+               }
+               if (acc->acc_offset + acc->acc_length > buffer->buf_size)
+               {
+                       printf("%d + %d > %d for buffer %p, and acc %p\n",
+                               acc->acc_offset, acc->acc_length, 
+                               buffer->buf_size, buffer, acc);
+                       return 0;
+               }
+       }
+       if (acc != NULL)
+       {
+               printf("loop\n");
+               return 0;
+       }
+       return 1;
+}
+
 PRIVATE void free_accs()
 {
        int i, j;
 
        DBLOCK(1, printf("free_accs\n"));
 
+assert(bf_linkcheck(bf_linkcheck_acc));
        for (i=0; !acc_freelist && i<MAX_BUFREQ_PRI; i++)
        {
                for (j=0; j<CLIENT_NR; j++)
@@ -1133,6 +1181,9 @@ PRIVATE void free_accs()
                        if (freereq[j])
                        {
                                (*freereq[j])(i);
+                               assert(bf_linkcheck(bf_linkcheck_acc) ||
+                                       (printf("just called %p\n",
+                                       freereq[i]),0));
                        }
                }
        }
@@ -1165,7 +1216,7 @@ size_t alignment;
        }
        buf_size= bf_bufsize(acc);
 #ifdef bf_align
-       assert(size != 0 && buf_size != 0 ||
+       assert((size != 0 && buf_size != 0) ||
                (printf("bf_align(..., %d, %d) from %s, %d\n",
                        size, alignment, clnt_file, clnt_line),0));
 #else
@@ -1201,5 +1252,5 @@ acc_t *acc;
 #endif
 
 /*
- * $PchId: buf.c,v 1.10 1995/11/23 11:25:25 philip Exp $
+ * $PchId: buf.c,v 1.19 2003/09/10 08:54:23 philip Exp $
  */
index 19777456c5fa8bd58524be21bbf2fbc8d6220e2b..8eb1cbb29c218a84ec87f662bd20fbdd6cf206eb 100644 (file)
@@ -10,26 +10,36 @@ Copyright 1995 Philip Homburg
 #include "generic/buf.h"
 #include "generic/clock.h"
 #include "generic/type.h"
-#include <minix/syslib.h>
 
 THIS_FILE
 
 PUBLIC int clck_call_expire;
 
 PRIVATE time_t curr_time;
+PRIVATE time_t prev_time;
 PRIVATE timer_t *timer_chain;
 PRIVATE time_t next_timeout;
+#ifdef __minix_vmd
+PRIVATE int clck_tasknr= ANY;
+#endif
 
 FORWARD _PROTOTYPE( void clck_fast_release, (timer_t *timer) );
 FORWARD _PROTOTYPE( void set_timer, (void) );
 
 PUBLIC void clck_init()
 {
-#if ZERO
+       int r;
+
        clck_call_expire= 0;
        curr_time= 0;
+       prev_time= 0;
        next_timeout= 0;
        timer_chain= 0;
+
+#ifdef __minix_vmd
+       r= sys_findproc(CLOCK_NAME, &clck_tasknr, 0);
+       if (r != OK)
+               ip_panic(( "unable to find clock task: %d\n", r ));
 #endif
 }
 
@@ -37,27 +47,45 @@ PUBLIC time_t get_time()
 {
        if (!curr_time)
        {
+#ifdef __minix_vmd
+               static message mess;
+
+               mess.m_type= GET_UPTIME;
+               if (sendrec (clck_tasknr, &mess) < 0)
+                       ip_panic(("unable to sendrec"));
+               if (mess.m_type != OK)
+                       ip_panic(("can't read clock"));
+               curr_time= mess.NEW_TIME;
+#else /* Minix 3 */
                int s;
                if ((s=sys_getuptime(&curr_time)) != OK)
                        ip_panic(("can't read clock"));
+#endif
+               assert(curr_time >= prev_time);
        }
        return curr_time;
 }
-       
+
 PUBLIC void set_time (tim)
 time_t tim;
 {
-       if (!curr_time)
+       if (!curr_time && tim >= prev_time)
        {
                /* Some code assumes that no time elapses while it is
                 * running.
                 */
                curr_time= tim;
        }
+       else if (!curr_time)
+       {
+               DBLOCK(0x20, printf("set_time: new time %ld < prev_time %ld\n",
+                       tim, prev_time));
+       }
 }
 
 PUBLIC void reset_time()
 {
+       prev_time= curr_time;
        curr_time= 0;
 }
 
@@ -145,12 +173,27 @@ PRIVATE void set_timer()
 
        if (next_timeout == 0 || new_time < next_timeout)
        {
+#ifdef __minix_vmd
+               static message mess;
 
                next_timeout= new_time;
+
                new_time -= curr_time;
 
-               if (sys_syncalrm(SELF, new_time, 0) != OK)
+               mess.m_type= SET_SYNC_AL;
+               mess.CLOCK_PROC_NR= this_proc;
+               mess.DELTA_TICKS= new_time;
+               if (sendrec (clck_tasknr, &mess) < 0)
+                       ip_panic(("unable to sendrec"));
+               if (mess.m_type != OK)
                        ip_panic(("can't set timer"));
+#else /* Minix 3 */
+               next_timeout= new_time;
+               new_time -= curr_time;
+
+               if (sys_syncalrm(SELF, new_time, 0) != OK)
+                       ip_panic(("can't set timer"));
+#endif
        }
 }
 
@@ -184,5 +227,5 @@ PUBLIC void clck_expire_timers()
 }
 
 /*
- * $PchId: clock.c,v 1.6 1995/11/21 06:54:39 philip Exp $
+ * $PchId: clock.c,v 1.10 2005/06/28 14:23:40 philip Exp $
  */
index a455af7fe16baef29f9f0f51a73a80cb38fa3d46..4c4e7609d29399bc2fb70fa132b949ac709ea3c6 100644 (file)
@@ -14,14 +14,12 @@ Copyright 1995 Philip Homburg
 #endif
 
 #ifndef NDEBUG
-#define NDEBUG (CRAMPED)
+#define NDEBUG 0
 #endif
 
 #define CLOCK_GRAN     1       /* in HZ */
 
-#if DEBUG
 #define where()        printf("%s, %d: ", __FILE__, __LINE__)
-#endif
 
 #define NW_SUSPEND     SUSPEND
 #define NW_WOULDBLOCK  EWOULDBLOCK
@@ -32,5 +30,5 @@ Copyright 1995 Philip Homburg
 #endif /* INET__CONST_H */
 
 /*
- * $PchId: const.h,v 1.6 1995/11/21 06:54:39 philip Exp $
+ * $PchId: const.h,v 1.7 2000/08/12 09:21:44 philip Exp $
  */
index 24e2cea3036b0bfec2371d3cc500fb3035bb160f..84ea087b07882ad528cfb20a1d0ea67fc16f7d51 100644 (file)
@@ -11,20 +11,26 @@ Copyright 1995 Philip Homburg
 #include "assert.h"
 #include "buf.h"
 #include "clock.h"
+#include "event.h"
 #include "eth.h"
 #include "io.h"
 #include "sr.h"
 
 THIS_FILE
 
-#define ARP_CACHE_NR   64
+#define ARP_CACHE_NR    256
+#define AP_REQ_NR        32
+
+#define ARP_HASH_NR    256
+#define ARP_HASH_MASK  0xff
+#define ARP_HASH_WIDTH 4
 
 #define MAX_ARP_RETRIES                5
 #define ARP_TIMEOUT            (HZ/2+1)        /* .5 seconds */
 #ifndef ARP_EXP_TIME
 #define ARP_EXP_TIME           (20L*60L*HZ)    /* 20 minutes */
 #endif
-#define ARP_NOTRCH_EXP_TIME    (5*HZ)          /* 5 seconds */
+#define ARP_NOTRCH_EXP_TIME    (30*HZ)         /* 30 seconds */
 #define ARP_INUSE_OFFSET       (60*HZ) /* an entry in the cache can be deleted
                                           if its not used for 1 minute */
 
@@ -66,37 +72,39 @@ typedef struct arp_port
        int ap_eth_port;
        int ap_ip_port;
        int ap_eth_fd;
-       ether_addr_t ap_ethaddr;
-       ipaddr_t ap_ipaddr;
-       timer_t ap_timer;
 
-       ether_addr_t ap_write_ethaddr;
-       ipaddr_t ap_write_ipaddr;
-       int ap_write_code;
+       ether_addr_t ap_ethaddr;        /* Ethernet address of this port */
+       ipaddr_t ap_ipaddr;             /* IP address of this port */
 
-       ipaddr_t ap_req_ipaddr;
-       int ap_req_count;
+       struct arp_req
+       {
+               timer_t ar_timer;
+               int ar_entry;
+               int ar_req_count;
+       } ap_req[AP_REQ_NR];
 
        arp_func_t ap_arp_func;
+
+       acc_t *ap_sendpkt;
+       acc_t *ap_sendlist;
+       acc_t *ap_reclist;
+       event_t ap_event;
 } arp_port_t;
 
-#define APF_EMPTY      0
-#define APF_ARP_RD_IP  0x4
-#define APF_ARP_RD_SP  0x8
-#define APF_ARP_WR_IP  0x10
-#define APF_ARP_WR_SP  0x20
-#define APF_INADDR_SET 0x100
-#define APF_MORE2WRITE 0x200
-#define APF_CLIENTREQ  0x400
-#define APF_CLIENTWRITE        0x1000
-#define APF_SUSPEND    0x2000
-
-#define APS_INITIAL    0x00
-#define        APS_GETADDR     0x01
-#define        APS_ARPSTART    0x10
-#define        APS_ARPPROTO    0x20
-#define        APS_ARPMAIN     0x40
-#define        APS_ERROR       0x80
+#define APF_EMPTY      0x00
+#define APF_ARP_RD_IP  0x01
+#define APF_ARP_RD_SP  0x02
+#define APF_ARP_WR_IP  0x04
+#define APF_ARP_WR_SP  0x08
+#define APF_INADDR_SET 0x10
+#define APF_SUSPEND    0x20
+
+#define APS_INITIAL    1
+#define        APS_GETADDR     2
+#define        APS_ARPSTART    3
+#define        APS_ARPPROTO    4
+#define        APS_ARPMAIN     5
+#define        APS_ERROR       6
 
 typedef struct arp_cache
 {
@@ -110,39 +118,61 @@ typedef struct arp_cache
 } arp_cache_t;
 
 #define ACF_EMPTY      0
-#define ACF_GOTREQ     1
+#define ACF_PERM       1
+#define ACF_PUB                2
 
 #define ACS_UNUSED     0
 #define ACS_INCOMPLETE 1
 #define ACS_VALID      2
 #define ACS_UNREACHABLE        3
 
+PRIVATE struct arp_hash_ent
+{
+       arp_cache_t *ahe_row[ARP_HASH_WIDTH];
+} arp_hash[ARP_HASH_NR];
+
+PRIVATE arp_port_t *arp_port_table;
+PRIVATE        arp_cache_t *arp_cache;
+PRIVATE int arp_cache_nr;
+
 FORWARD acc_t *arp_getdata ARGS(( int fd, size_t offset,
        size_t count, int for_ioctl ));
 FORWARD int arp_putdata ARGS(( int fd, size_t offset,
        acc_t *data, int for_ioctl ));
 FORWARD void arp_main ARGS(( arp_port_t *arp_port ));
-FORWARD void arp_timeout ARGS(( int fd, timer_t *timer ));
+FORWARD void arp_timeout ARGS(( int ref, timer_t *timer ));
 FORWARD void setup_write ARGS(( arp_port_t *arp_port ));
 FORWARD void setup_read ARGS(( arp_port_t *arp_port ));
-FORWARD void process_arp_req ARGS(( arp_port_t *arp_port, acc_t *data ));
+FORWARD void do_reclist ARGS(( event_t *ev, ev_arg_t ev_arg ));
+FORWARD void process_arp_pkt ARGS(( arp_port_t *arp_port, acc_t *data ));
 FORWARD void client_reply ARGS(( arp_port_t *arp_port,
        ipaddr_t ipaddr, ether_addr_t *ethaddr ));
 FORWARD arp_cache_t *find_cache_ent ARGS(( arp_port_t *arp_port,
        ipaddr_t ipaddr ));
-FORWARD arp_cache_t *alloc_cache_ent ARGS(( void ));
-
-PRIVATE arp_port_t *arp_port_table;
-PRIVATE        arp_cache_t arp_cache[ARP_CACHE_NR];
+FORWARD arp_cache_t *alloc_cache_ent ARGS(( int flags ));
+FORWARD void arp_buffree ARGS(( int priority ));
+#ifdef BUF_CONSISTENCY_CHECK
+FORWARD void arp_bufcheck ARGS(( void ));
+#endif
 
 PUBLIC void arp_prep()
 {
        arp_port_table= alloc(eth_conf_nr * sizeof(arp_port_table[0]));
+
+       arp_cache_nr= ARP_CACHE_NR;
+       if (arp_cache_nr < (eth_conf_nr+1)*AP_REQ_NR)
+       {
+               arp_cache_nr= (eth_conf_nr+1)*AP_REQ_NR;
+               printf("arp: using %d cache entries instead of %d\n",
+                       arp_cache_nr, ARP_CACHE_NR);
+       }
+       arp_cache= alloc(arp_cache_nr * sizeof(arp_cache[0]));
 }
 
 PUBLIC void arp_init()
 {
        arp_port_t *arp_port;
+       arp_cache_t *cache;
        int i;
 
        assert (BUF_S >= sizeof(struct nwio_ethstat));
@@ -155,6 +185,20 @@ PUBLIC void arp_init()
                                                 * unavailable */
        }
 
+       cache= arp_cache;
+       for (i=0; i<arp_cache_nr; i++, cache++)
+       {
+               cache->ac_state= ACS_UNUSED;
+               cache->ac_flags= ACF_EMPTY;
+               cache->ac_expire= 0;
+               cache->ac_lastuse= 0;
+       }
+
+#ifndef BUF_CONSISTENCY_CHECK
+       bf_logon(arp_buffree);
+#else
+       bf_logon(arp_buffree, arp_bufcheck);
+#endif
 }
 
 PRIVATE void arp_main(arp_port)
@@ -166,11 +210,14 @@ arp_port_t *arp_port;
        {
        case APS_INITIAL:
                arp_port->ap_eth_fd= eth_open(arp_port->ap_eth_port,
-                       arp_port->ap_eth_port, arp_getdata, arp_putdata, 0);
+                       arp_port->ap_eth_port, arp_getdata, arp_putdata,
+                       0 /* no put_pkt */, 0 /* no select_res */);
 
                if (arp_port->ap_eth_fd<0)
                {
-                       DBLOCK(1, printf("arp.c: unable to open ethernet\n"));
+                       DBLOCK(1, printf("arp[%d]: unable to open eth[%d]\n",
+                               arp_port-arp_port_table,
+                               arp_port->ap_eth_port));
                        return;
                }
 
@@ -195,19 +242,6 @@ arp_port_t *arp_port;
        case APS_ARPSTART:
                arp_port->ap_state= APS_ARPPROTO;
 
-               {
-                       arp_cache_t *cache;
-                       int i;
-
-                       cache= arp_cache;
-                       for (i=0; i<ARP_CACHE_NR; i++, cache++)
-                       {
-                               cache->ac_state= ACS_UNUSED;
-                               cache->ac_flags= ACF_EMPTY;
-                               cache->ac_expire= 0;
-                               cache->ac_lastuse= 0;
-                       }
-               }
                result= eth_ioctl (arp_port->ap_eth_fd, NWIOSETHOPT);
 
                if (result==NW_SUSPEND)
@@ -220,17 +254,14 @@ arp_port_t *arp_port;
                /* fall through */
        case APS_ARPPROTO:
                arp_port->ap_state= APS_ARPMAIN;
-               if (arp_port->ap_flags & APF_MORE2WRITE)
-                       setup_write(arp_port);
+               setup_write(arp_port);
                setup_read(arp_port);
                return;
 
-#if !CRAMPED
        default:
                ip_panic((
                 "arp_main(&arp_port_table[%d]) called but ap_state=0x%x\n",
                        arp_port->ap_eth_port, arp_port->ap_state ));
-#endif
        }
 }
 
@@ -241,7 +272,6 @@ size_t count;
 int for_ioctl;
 {
        arp_port_t *arp_port;
-       arp46_t *arp;
        acc_t *data;
        int result;
 
@@ -281,13 +311,18 @@ int for_ioctl;
                assert (arp_port->ap_flags & APF_ARP_WR_IP);
                if (!count)
                {
+                       data= arp_port->ap_sendpkt;
+                       arp_port->ap_sendpkt= NULL;
+                       assert(data);
+                       bf_afree(data); data= NULL;
+
                        result= (int)offset;
                        if (result<0)
                        {
                                DIFBLOCK(1, (result != NW_SUSPEND),
                                        printf(
-                               "arp.c: write error on port %d: error %d\n",
-                                       fd, result));
+                               "arp[%d]: write error on port %d: error %d\n",
+                                       fd, arp_port->ap_eth_fd, result));
 
                                arp_port->ap_state= APS_ERROR;
                                break;
@@ -298,37 +333,14 @@ int for_ioctl;
                        return NW_OK;
                }
                assert (offset+count <= sizeof(arp46_t));
-               data= bf_memreq(sizeof(arp46_t));
-               arp= (arp46_t *)ptr2acc_data(data);
-               data->acc_offset += offset;
-               data->acc_length= count;
-               if (arp_port->ap_write_code == ARP_REPLY)
-                       arp->a46_dstaddr= arp_port->ap_write_ethaddr;
-               else
-               {
-                       arp->a46_dstaddr.ea_addr[0]= 0xff;
-                       arp->a46_dstaddr.ea_addr[1]= 0xff;
-                       arp->a46_dstaddr.ea_addr[2]= 0xff;
-                       arp->a46_dstaddr.ea_addr[3]= 0xff;
-                       arp->a46_dstaddr.ea_addr[4]= 0xff;
-                       arp->a46_dstaddr.ea_addr[5]= 0xff;
-               }
-               arp->a46_hdr= HTONS(ARP_ETHERNET);
-               arp->a46_pro= HTONS(ETH_IP_PROTO);
-               arp->a46_hln= 6;
-               arp->a46_pln= 4;
-               arp->a46_op= htons(arp_port->ap_write_code);
-               arp->a46_sha= arp_port->ap_ethaddr;
-               memcpy (arp->a46_spa, &arp_port->ap_ipaddr, sizeof(ipaddr_t));
-               arp->a46_tha= arp_port->ap_write_ethaddr;
-               memcpy (arp->a46_tpa, &arp_port->ap_write_ipaddr,
-                       sizeof(ipaddr_t));
+               data= arp_port->ap_sendpkt;
+               assert(data);
+               data= bf_cut(data, offset, count);
+
                return data;
        default:
-#if !CRAMPED
                printf("arp_getdata(%d, 0x%d, 0x%d) called but ap_state=0x%x\n",
                        fd, offset, count, arp_port->ap_state);
-#endif
                break;
        }
        return 0;
@@ -343,6 +355,8 @@ int for_ioctl;
        arp_port_t *arp_port;
        int result;
        struct nwio_ethstat *ethstat;
+       ev_arg_t ev_arg;
+       acc_t *tmpacc;
 
        arp_port= &arp_port_table[fd];
 
@@ -354,8 +368,8 @@ int for_ioctl;
                        if (result<0)
                        {
                                DIFBLOCK(1, (result != NW_SUSPEND), printf(
-                               "arp.c: read error on port %d: error %d\n",
-                                       fd, result));
+                               "arp[%d]: read error on port %d: error %d\n",
+                                       fd, arp_port->ap_eth_fd, result));
 
                                return NW_OK;
                        }
@@ -374,11 +388,29 @@ int for_ioctl;
                /* Warning: the above assertion is illegal; puts and gets of
                   data can be brokenup in any piece the server likes. However
                   we assume that the server is eth.c and it transfers only
-                  whole packets. */
+                  whole packets.
+                  */
                data= bf_packIffLess(data, sizeof(arp46_t));
                if (data->acc_length >= sizeof(arp46_t))
-                       process_arp_req(arp_port,data);
-               bf_afree(data);
+               {
+                       if (!arp_port->ap_reclist)
+                       {
+                               ev_arg.ev_ptr= arp_port;
+                               ev_enqueue(&arp_port->ap_event, do_reclist,
+                                       ev_arg);
+                       }
+                       if (data->acc_linkC != 1)
+                       {
+                               tmpacc= bf_dupacc(data);
+                               bf_afree(data);
+                               data= tmpacc;
+                               tmpacc= NULL;
+                       }
+                       data->acc_ext_link= arp_port->ap_reclist;
+                       arp_port->ap_reclist= data;
+               }
+               else
+                       bf_afree(data);
                return NW_OK;
        }
        switch (arp_port->ap_state)
@@ -407,10 +439,8 @@ int for_ioctl;
                bf_afree(data);
                return NW_OK;
        default:
-#if !CRAMPED
                printf("arp_putdata(%d, 0x%d, 0x%lx) called but ap_state=0x%x\n",
                        fd, offset, (unsigned long)data, arp_port->ap_state);
-#endif
                break;
        }
        return EGENERIC;
@@ -431,74 +461,83 @@ arp_port_t *arp_port;
                        return;
                }
                DIFBLOCK(1, (result != NW_OK),
-                       printf("arp.c: eth_read(..,%d)=%d\n",
-                       ETH_MAX_PACK_SIZE, result));
+                       printf("arp[%d]: eth_read(..,%d)=%d\n",
+                       arp_port-arp_port_table, ETH_MAX_PACK_SIZE, result));
        }
 }
 
 PRIVATE void setup_write(arp_port)
 arp_port_t *arp_port;
 {
-       int i, result;
+       int result;
+       acc_t *data;
 
-       while (arp_port->ap_flags & APF_MORE2WRITE)
+       for(;;)
        {
-               if (arp_port->ap_flags & APF_CLIENTWRITE)
+               data= arp_port->ap_sendlist;
+               if (!data)
+                       break;
+               arp_port->ap_sendlist= data->acc_ext_link;
+
+               if (arp_port->ap_ipaddr == HTONL(0x00000000))
                {
-                       arp_port->ap_flags &= ~APF_CLIENTWRITE;
-                       arp_port->ap_write_ipaddr= arp_port->ap_req_ipaddr;
-                       arp_port->ap_write_code= ARP_REQUEST;
-                       clck_timer(&arp_port->ap_timer,
-                               get_time() + ARP_TIMEOUT,
-                               arp_timeout, arp_port->ap_eth_port);
+                       /* Interface is down */
+                       printf(
+               "arp[%d]: not sending ARP packet, interface is down\n",
+                               arp_port-arp_port_table);
+                       bf_afree(data); data= NULL;
+                       continue;
                }
-               else
-               {
-                       arp_cache_t *cache;
 
-                       cache= arp_cache;
-                       for (i=0; i<ARP_CACHE_NR; i++, cache++)
-                       {
-                               if ((cache->ac_flags & ACF_GOTREQ) &&
-                                       cache->ac_port == arp_port)
-                               {
-                                       cache->ac_flags &= ~ACF_GOTREQ;
-                                       arp_port->ap_write_ethaddr= cache->
-                                               ac_ethaddr;
-                                       arp_port->ap_write_ipaddr= cache->
-                                               ac_ipaddr;
-                                       arp_port->ap_write_code= ARP_REPLY;
-                                       break;
-                               }
-                       }
-                       if (i>=ARP_CACHE_NR)
-                       {
-                               arp_port->ap_flags &= ~APF_MORE2WRITE;
-                               break;
-                       }
-               }
+               assert(!arp_port->ap_sendpkt);
+               arp_port->ap_sendpkt= data; data= NULL;
+                       
                arp_port->ap_flags= (arp_port->ap_flags & ~APF_ARP_WR_SP) |
                        APF_ARP_WR_IP;
                result= eth_write(arp_port->ap_eth_fd, sizeof(arp46_t));
                if (result == NW_SUSPEND)
+               {
                        arp_port->ap_flags |= APF_ARP_WR_SP;
+                       break;
+               }
                if (result<0)
                {
                        DIFBLOCK(1, (result != NW_SUSPEND),
-                               printf("arp.c: eth_write(..,%d)=%d\n",
-                               sizeof(arp46_t), result));
+                               printf("arp[%d]: eth_write(..,%d)=%d\n",
+                               arp_port-arp_port_table, sizeof(arp46_t),
+                               result));
                        return;
                }
        }
 }
 
-PRIVATE void process_arp_req (arp_port, data)
+PRIVATE void do_reclist(ev, ev_arg)
+event_t *ev;
+ev_arg_t ev_arg;
+{
+       arp_port_t *arp_port;
+       acc_t *data;
+
+       arp_port= ev_arg.ev_ptr;
+       assert(ev == &arp_port->ap_event);
+
+       while (data= arp_port->ap_reclist, data != NULL)
+       {
+               arp_port->ap_reclist= data->acc_ext_link;
+               process_arp_pkt(arp_port, data);
+               bf_afree(data);
+       }
+}
+
+PRIVATE void process_arp_pkt (arp_port, data)
 arp_port_t *arp_port;
 acc_t *data;
 {
+       int i, entry, do_reply;
        arp46_t *arp;
-       arp_cache_t *ce;
-       int level;
+       u16_t *p;
+       arp_cache_t *ce, *cache;
+       struct arp_req *reqp;
        time_t curr_time;
        ipaddr_t spa, tpa;
 
@@ -513,23 +552,53 @@ acc_t *data;
                arp->a46_pro != HTONS(ETH_IP_PROTO) ||
                arp->a46_pln != 4)
                return;
+       if (arp_port->ap_ipaddr == HTONL(0x00000000))
+       {
+               /* Interface is down */
+#if DEBUG
+               printf("arp[%d]: dropping ARP packet, interface is down\n",
+                       arp_port-arp_port_table);
+#endif
+               return;
+       }
+
        ce= find_cache_ent(arp_port, spa);
-       if (ce && ce->ac_expire < curr_time)
+       cache= NULL;    /* lint */
+
+       do_reply= 0;
+       if (arp->a46_op != HTONS(ARP_REQUEST))
+               ;       /* No need to reply */
+       else if (tpa == arp_port->ap_ipaddr)
+               do_reply= 1;
+       else
        {
-               DBLOCK(0x10, printf("arp: expiring entry for ");
-                       writeIpAddr(ce->ac_ipaddr); printf("\n"));
-               ce->ac_state= ACS_UNUSED;
-               ce= NULL;
+               /* Look for a published entry */
+               cache= find_cache_ent(arp_port, tpa);
+               if (cache)
+               {
+                       if (cache->ac_flags & ACF_PUB)
+                       {
+                               /* Published entry */
+                               do_reply= 1;
+                       }
+                       else
+                       {
+                               /* Nothing to do */
+                               cache= NULL;
+                       }
+               }
        }
+
        if (ce == NULL)
        {
-               if (tpa != arp_port->ap_ipaddr)
+               if (!do_reply)
                        return;
 
-               DBLOCK(0x10, printf("arp: allocating entry for ");
+               DBLOCK(0x10, printf("arp[%d]: allocating entry for ",
+                       arp_port-arp_port_table);
                        writeIpAddr(spa); printf("\n"));
 
-               ce= alloc_cache_ent();
+               ce= alloc_cache_ent(ACF_EMPTY);
                ce->ac_flags= ACF_EMPTY;
                ce->ac_state= ACS_VALID;
                ce->ac_ethaddr= arp->a46_sha;
@@ -544,6 +613,18 @@ acc_t *data;
                ce->ac_ethaddr= arp->a46_sha;
                if (ce->ac_state == ACS_INCOMPLETE)
                {
+                       /* Find request entry */
+                       entry= ce-arp_cache;
+                       for (i= 0, reqp= arp_port->ap_req; i<AP_REQ_NR; 
+                               i++, reqp++)
+                       {
+                               if (reqp->ar_entry == entry)
+                                       break;
+                       }
+                       assert(i < AP_REQ_NR);
+                       clck_untimer(&reqp->ar_timer);
+                       reqp->ar_entry= -1;
+                       
                        ce->ac_state= ACS_VALID;
                        client_reply(arp_port, spa, &arp->a46_sha);
                }
@@ -552,11 +633,11 @@ acc_t *data;
        }
 
        /* Update fields in the arp cache. */
-#if !CRAMPED
        if (memcmp(&ce->ac_ethaddr, &arp->a46_sha,
                sizeof(ce->ac_ethaddr)) != 0)
        {
-               printf("arp: ethernet address for IP address ");
+               printf("arp[%d]: ethernet address for IP address ",
+                       arp_port-arp_port_table);
                writeIpAddr(spa);
                printf(" changed from ");
                writeEtherAddr(&ce->ac_ethaddr);
@@ -565,15 +646,46 @@ acc_t *data;
                printf("\n");
                ce->ac_ethaddr= arp->a46_sha;
        }
-#else
-       ce->ac_ethaddr= arp->a46_sha;
-#endif
        ce->ac_expire= curr_time+ARP_EXP_TIME;
 
-       if (arp->a46_op == HTONS(ARP_REQUEST) && (tpa == arp_port->ap_ipaddr))
+       if (do_reply)
        {
-               ce->ac_flags |= ACF_GOTREQ;
-               arp_port->ap_flags |= APF_MORE2WRITE;
+               data= bf_memreq(sizeof(arp46_t));
+               arp= (arp46_t *)ptr2acc_data(data);
+
+               /* Clear padding */
+               assert(sizeof(arp->a46_data.a46_dummy) % sizeof(*p) == 0);
+               for (i= 0, p= (u16_t *)arp->a46_data.a46_dummy;
+                       i < sizeof(arp->a46_data.a46_dummy)/sizeof(*p);
+                       i++, p++)
+               {
+                       *p= 0xdead;
+               }
+
+               arp->a46_dstaddr= ce->ac_ethaddr;
+               arp->a46_hdr= HTONS(ARP_ETHERNET);
+               arp->a46_pro= HTONS(ETH_IP_PROTO);
+               arp->a46_hln= 6;
+               arp->a46_pln= 4;
+
+               arp->a46_op= htons(ARP_REPLY);
+               if (tpa == arp_port->ap_ipaddr)
+               {
+                       arp->a46_sha= arp_port->ap_ethaddr;
+               }
+               else
+               {
+                       assert(cache);
+                       arp->a46_sha= cache->ac_ethaddr;
+               }
+               memcpy (arp->a46_spa, &tpa, sizeof(ipaddr_t));
+               arp->a46_tha= ce->ac_ethaddr;
+               memcpy (arp->a46_tpa, &ce->ac_ipaddr, sizeof(ipaddr_t));
+
+               assert(data->acc_linkC == 1);
+               data->acc_ext_link= arp_port->ap_sendlist;
+               arp_port->ap_sendlist= data; data= NULL;
+
                if (!(arp_port->ap_flags & APF_ARP_WR_IP))
                        setup_write(arp_port);
        }
@@ -584,12 +696,6 @@ arp_port_t *arp_port;
 ipaddr_t ipaddr;
 ether_addr_t *ethaddr;
 {
-       if ((arp_port->ap_flags & APF_CLIENTREQ) &&
-               ipaddr == arp_port->ap_req_ipaddr)
-       {
-               arp_port->ap_flags &= ~(APF_CLIENTREQ|APF_CLIENTWRITE);
-               clck_untimer(&arp_port->ap_timer);
-       }
        (*arp_port->ap_arp_func)(arp_port->ap_ip_port, ipaddr, ethaddr);
 }
 
@@ -597,37 +703,113 @@ PRIVATE arp_cache_t *find_cache_ent (arp_port, ipaddr)
 arp_port_t *arp_port;
 ipaddr_t ipaddr;
 {
-       arp_cache_t *cache;
+       arp_cache_t *ce;
        int i;
+       unsigned hash;
 
-       for (i=0, cache= arp_cache; i<ARP_CACHE_NR; i++, cache++)
+       hash= (ipaddr >> 24) ^ (ipaddr >> 16) ^ (ipaddr >> 8) ^ ipaddr;
+       hash &= ARP_HASH_MASK;
+
+       ce= arp_hash[hash].ahe_row[0];
+       if (ce && ce->ac_ipaddr == ipaddr && ce->ac_port == arp_port &&
+               ce->ac_state != ACS_UNUSED)
+       {
+               return ce;
+       }
+       for (i= 1; i<ARP_HASH_WIDTH; i++)
        {
-               if (cache->ac_state != ACS_UNUSED &&
-                       cache->ac_port == arp_port &&
-                       cache->ac_ipaddr == ipaddr)
+               ce= arp_hash[hash].ahe_row[i];
+               if (!ce || ce->ac_ipaddr != ipaddr || ce->ac_port != arp_port
+                       || ce->ac_state == ACS_UNUSED)
                {
-                       return cache;
+                       continue;
+               }
+               arp_hash[hash].ahe_row[i]= arp_hash[hash].ahe_row[0];
+               arp_hash[hash].ahe_row[0]= ce;
+               return ce;
+       }
+
+       for (i=0, ce= arp_cache; i<arp_cache_nr; i++, ce++)
+       {
+               if (ce->ac_state != ACS_UNUSED &&
+                       ce->ac_port == arp_port &&
+                       ce->ac_ipaddr == ipaddr)
+               {
+                       for (i= ARP_HASH_WIDTH-1; i>0; i--)
+                       {
+                               arp_hash[hash].ahe_row[i]=
+                                       arp_hash[hash].ahe_row[i-1];
+                       }
+                       assert(i == 0);
+                       arp_hash[hash].ahe_row[0]= ce;
+                       return ce;
                }
        }
        return NULL;
 }
 
-PRIVATE arp_cache_t *alloc_cache_ent()
+PRIVATE arp_cache_t *alloc_cache_ent(flags)
+int flags;
 {
        arp_cache_t *cache, *old;
        int i;
 
        old= NULL;
-       for (i=0, cache= arp_cache; i<ARP_CACHE_NR; i++, cache++)
+       for (i=0, cache= arp_cache; i<arp_cache_nr; i++, cache++)
        {
                if (cache->ac_state == ACS_UNUSED)
-                       return cache;
+               {
+                       old= cache;
+                       break;
+               }
                if (cache->ac_state == ACS_INCOMPLETE)
                        continue;
+               if (cache->ac_flags & ACF_PERM)
+                       continue;
                if (!old || cache->ac_lastuse < old->ac_lastuse)
                        old= cache;
        }
        assert(old);
+
+       if (!flags)
+               return old;
+
+       /* Get next permanent entry */
+       for (i=0, cache= arp_cache; i<arp_cache_nr; i++, cache++)
+       {
+               if (cache->ac_state == ACS_UNUSED)
+                       break;
+               if (cache->ac_flags & ACF_PERM)
+                       continue;
+               break;
+       }
+       if (i >= arp_cache_nr/2)
+               return NULL; /* Too many entries */
+       if (cache != old)
+       {
+               assert(old > cache);
+               *old= *cache;
+               old= cache;
+       }
+
+       if (!(flags & ACF_PUB))
+               return old;
+
+       /* Get first nonpublished entry */
+       for (i=0, cache= arp_cache; i<arp_cache_nr; i++, cache++)
+       {
+               if (cache->ac_state == ACS_UNUSED)
+                       break;
+               if (cache->ac_flags & ACF_PUB)
+                       continue;
+               break;
+       }
+       if (cache != old)
+       {
+               assert(old > cache);
+               *old= *cache;
+               old= cache;
+       }
        return old;
 }
 
@@ -636,7 +818,6 @@ int eth_port;
 ipaddr_t ipaddr;
 {
        arp_port_t *arp_port;
-       int i;
 
        if (eth_port < 0 || eth_port >= eth_conf_nr)
                return;
@@ -654,8 +835,8 @@ int eth_port;
 int ip_port;
 arp_func_t arp_func;
 {
-       arp_port_t *arp_port;
        int i;
+       arp_port_t *arp_port;
 
        assert(eth_port >= 0);
        if (eth_port >= eth_conf_nr)
@@ -667,6 +848,12 @@ arp_func_t arp_func;
        arp_port->ap_state= APS_INITIAL;
        arp_port->ap_flags= APF_EMPTY;
        arp_port->ap_arp_func= arp_func;
+       arp_port->ap_sendpkt= NULL;
+       arp_port->ap_sendlist= NULL;
+       arp_port->ap_reclist= NULL;
+       for (i= 0; i<AP_REQ_NR; i++)
+               arp_port->ap_req[i].ar_entry= -1;
+       ev_init(&arp_port->ap_event);
 
        arp_main(arp_port);
 
@@ -678,23 +865,48 @@ int eth_port;
 ipaddr_t ipaddr;
 ether_addr_t *ethaddr;
 {
+       int i, ref;
        arp_port_t *arp_port;
-       int i;
+       struct arp_req *reqp;
        arp_cache_t *ce;
        time_t curr_time;
 
        assert(eth_port >= 0 && eth_port < eth_conf_nr);
        arp_port= &arp_port_table[eth_port];
        assert(arp_port->ap_state == APS_ARPMAIN ||
-               (printf("ap_state= %d\n", arp_port->ap_state), 0));
+               (printf("arp[%d]: ap_state= %d\n", arp_port-arp_port_table,
+               arp_port->ap_state), 0));
 
        curr_time= get_time();
 
        ce= find_cache_ent (arp_port, ipaddr);
        if (ce && ce->ac_expire < curr_time)
        {
-               ce->ac_state= ACS_UNUSED;
-               ce= NULL;
+               assert(ce->ac_state != ACS_INCOMPLETE);
+
+               /* Check whether there is enough space for an ARP
+                * request or not.
+                */
+               for (i= 0, reqp= arp_port->ap_req; i<AP_REQ_NR; i++, reqp++)
+               {
+                       if (reqp->ar_entry < 0)
+                               break;
+               }
+               if (i < AP_REQ_NR)
+               {
+                       /* Okay, expire this entry. */
+                       ce->ac_state= ACS_UNUSED;
+                       ce= NULL;
+               }
+               else
+               {
+                       /* Continue using this entry for a while */
+                       printf("arp[%d]: Overloaded! Keeping entry for ",
+                               arp_port-arp_port_table);
+                       writeIpAddr(ipaddr);
+                       printf("\n");
+                       ce->ac_expire= curr_time+ARP_NOTRCH_EXP_TIME;
+               }
        }
        if (ce)
        {
@@ -710,67 +922,433 @@ ether_addr_t *ethaddr;
                if (ce->ac_state == ACS_UNREACHABLE)
                        return EDSTNOTRCH;
                assert(ce->ac_state == ACS_INCOMPLETE);
+
                return NW_SUSPEND;
        }
 
-       if (arp_port->ap_flags & APF_CLIENTREQ)
+       /* Find an empty slot for an ARP request */
+       for (i= 0, reqp= arp_port->ap_req; i<AP_REQ_NR; i++, reqp++)
+       {
+               if (reqp->ar_entry < 0)
+                       break;
+       }
+       if (i >= AP_REQ_NR)
        {
-               /* We should implement something to be able to do
-                * multiple arp lookups at the same time. At the moment
-                * we just return SUSPEND.
+               /* We should be able to report that this ARP request
+                * cannot be accepted. At the moment we just return SUSPEND.
                 */
                return NW_SUSPEND;
        }
-       ce= alloc_cache_ent();
+       ref= (eth_port*AP_REQ_NR + i);
+
+       ce= alloc_cache_ent(ACF_EMPTY);
        ce->ac_flags= 0;
        ce->ac_state= ACS_INCOMPLETE;
        ce->ac_ipaddr= ipaddr;
        ce->ac_port= arp_port;
        ce->ac_expire= curr_time+ARP_EXP_TIME;
        ce->ac_lastuse= curr_time;
-       arp_port->ap_flags |= APF_CLIENTREQ|APF_MORE2WRITE | APF_CLIENTWRITE;
-       arp_port->ap_req_ipaddr= ipaddr;
-       arp_port->ap_req_count= 0;
-       if (!(arp_port->ap_flags & APF_ARP_WR_IP))
-               setup_write(arp_port);
+
+       reqp->ar_entry= ce-arp_cache;
+       reqp->ar_req_count= -1;
+
+       /* Send the first packet by expiring the timer */
+       clck_timer(&reqp->ar_timer, 1, arp_timeout, ref);
+
        return NW_SUSPEND;
 }
 
-PRIVATE void arp_timeout (fd, timer)
+PUBLIC int arp_ioctl (eth_port, fd, req, get_userdata, put_userdata)
+int eth_port;
 int fd;
+ioreq_t req;
+get_userdata_t get_userdata;
+put_userdata_t put_userdata;
+{
+       arp_port_t *arp_port;
+       arp_cache_t *ce, *cache;
+       acc_t *data;
+       nwio_arp_t *arp_iop;
+       int entno, result, ac_flags;
+       u32_t flags;
+       ipaddr_t ipaddr;
+       time_t curr_time;
+
+       assert(eth_port >= 0 && eth_port < eth_conf_nr);
+       arp_port= &arp_port_table[eth_port];
+       assert(arp_port->ap_state == APS_ARPMAIN ||
+               (printf("arp[%d]: ap_state= %d\n", arp_port-arp_port_table,
+               arp_port->ap_state), 0));
+
+       switch(req)
+       {
+       case NWIOARPGIP:
+               data= (*get_userdata)(fd, 0, sizeof(*arp_iop), TRUE);
+               if (data == NULL)
+                       return EFAULT;
+               data= bf_packIffLess(data, sizeof(*arp_iop));
+               arp_iop= (nwio_arp_t *)ptr2acc_data(data);
+               ipaddr= arp_iop->nwa_ipaddr;
+               ce= NULL;       /* lint */
+               for (entno= 0; entno < arp_cache_nr; entno++)
+               {
+                       ce= &arp_cache[entno];
+                       if (ce->ac_state == ACS_UNUSED ||
+                               ce->ac_port != arp_port)
+                       {
+                               continue;
+                       }
+                       if (ce->ac_ipaddr == ipaddr)
+                               break;
+               }
+               if (entno == arp_cache_nr)
+               {
+                       /* Also report the address of this interface */
+                       if (ipaddr != arp_port->ap_ipaddr)
+                       {
+                               bf_afree(data);
+                               return ENOENT;
+                       }
+                       arp_iop->nwa_entno= arp_cache_nr;
+                       arp_iop->nwa_ipaddr= ipaddr;
+                       arp_iop->nwa_ethaddr= arp_port->ap_ethaddr;
+                       arp_iop->nwa_flags= NWAF_PERM | NWAF_PUB;
+               }
+               else
+               {
+                       arp_iop->nwa_entno= entno+1;
+                       arp_iop->nwa_ipaddr= ce->ac_ipaddr;
+                       arp_iop->nwa_ethaddr= ce->ac_ethaddr;
+                       arp_iop->nwa_flags= 0;
+                       if (ce->ac_state == ACS_INCOMPLETE)
+                               arp_iop->nwa_flags |= NWAF_INCOMPLETE;
+                       if (ce->ac_state == ACS_UNREACHABLE)
+                               arp_iop->nwa_flags |= NWAF_DEAD;
+                       if (ce->ac_flags & ACF_PERM)
+                               arp_iop->nwa_flags |= NWAF_PERM;
+                       if (ce->ac_flags & ACF_PUB)
+                               arp_iop->nwa_flags |= NWAF_PUB;
+               }
+
+               result= (*put_userdata)(fd, 0, data, TRUE);
+               return result;
+
+       case NWIOARPGNEXT:
+               data= (*get_userdata)(fd, 0, sizeof(*arp_iop), TRUE);
+               if (data == NULL)
+                       return EFAULT;
+               data= bf_packIffLess(data, sizeof(*arp_iop));
+               arp_iop= (nwio_arp_t *)ptr2acc_data(data);
+               entno= arp_iop->nwa_entno;
+               if (entno < 0)
+                       entno= 0;
+               ce= NULL;       /* lint */
+               for (; entno < arp_cache_nr; entno++)
+               {
+                       ce= &arp_cache[entno];
+                       if (ce->ac_state == ACS_UNUSED ||
+                               ce->ac_port != arp_port)
+                       {
+                               continue;
+                       }
+                       break;
+               }
+               if (entno == arp_cache_nr)
+               {
+                       bf_afree(data);
+                       return ENOENT;
+               }
+               arp_iop->nwa_entno= entno+1;
+               arp_iop->nwa_ipaddr= ce->ac_ipaddr;
+               arp_iop->nwa_ethaddr= ce->ac_ethaddr;
+               arp_iop->nwa_flags= 0;
+               if (ce->ac_state == ACS_INCOMPLETE)
+                       arp_iop->nwa_flags |= NWAF_INCOMPLETE;
+               if (ce->ac_state == ACS_UNREACHABLE)
+                       arp_iop->nwa_flags |= NWAF_DEAD;
+               if (ce->ac_flags & ACF_PERM)
+                       arp_iop->nwa_flags |= NWAF_PERM;
+               if (ce->ac_flags & ACF_PUB)
+                       arp_iop->nwa_flags |= NWAF_PUB;
+
+               result= (*put_userdata)(fd, 0, data, TRUE);
+               return result;
+
+       case NWIOARPSIP:
+               data= (*get_userdata)(fd, 0, sizeof(*arp_iop), TRUE);
+               if (data == NULL)
+                       return EFAULT;
+               data= bf_packIffLess(data, sizeof(*arp_iop));
+               arp_iop= (nwio_arp_t *)ptr2acc_data(data);
+               ipaddr= arp_iop->nwa_ipaddr;
+               if (find_cache_ent(arp_port, ipaddr))
+               {
+                       bf_afree(data);
+                       return EEXIST;
+               }
+
+               flags= arp_iop->nwa_flags;
+               ac_flags= ACF_EMPTY;
+               if (flags & NWAF_PERM)
+                       ac_flags |= ACF_PERM;
+               if (flags & NWAF_PUB)
+                       ac_flags |= ACF_PUB|ACF_PERM;
+
+               /* Allocate a cache entry */
+               ce= alloc_cache_ent(ac_flags);
+               if (ce == NULL)
+               {
+                       bf_afree(data);
+                       return ENOMEM;
+               }
+
+               ce->ac_flags= ac_flags;
+               ce->ac_state= ACS_VALID;
+               ce->ac_ethaddr= arp_iop->nwa_ethaddr;
+               ce->ac_ipaddr= arp_iop->nwa_ipaddr;
+               ce->ac_port= arp_port;
+
+               curr_time= get_time();
+               ce->ac_expire= curr_time+ARP_EXP_TIME;
+               ce->ac_lastuse= curr_time;
+
+               bf_afree(data);
+               return 0;
+
+       case NWIOARPDIP:
+               data= (*get_userdata)(fd, 0, sizeof(*arp_iop), TRUE);
+               if (data == NULL)
+                       return EFAULT;
+               data= bf_packIffLess(data, sizeof(*arp_iop));
+               arp_iop= (nwio_arp_t *)ptr2acc_data(data);
+               ipaddr= arp_iop->nwa_ipaddr;
+               bf_afree(data); data= NULL;
+               ce= find_cache_ent(arp_port, ipaddr);
+               if (!ce)
+                       return ENOENT;
+               if (ce->ac_state == ACS_INCOMPLETE)
+                       return EINVAL;
+
+               ac_flags= ce->ac_flags;
+               if (ac_flags & ACF_PUB)
+               {
+                       /* Make sure entry is at the end of published
+                        * entries.
+                        */
+                       for (entno= 0, cache= arp_cache;
+                               entno<arp_cache_nr; entno++, cache++)
+                       {
+                               if (cache->ac_state == ACS_UNUSED)
+                                       break;
+                               if (cache->ac_flags & ACF_PUB)
+                                       continue;
+                               break;
+                       }
+                       assert(cache > arp_cache);
+                       cache--;
+                       if (cache != ce)
+                       {
+                               assert(cache > ce);
+                               *ce= *cache;
+                               ce= cache;
+                       }
+               }
+               if (ac_flags & ACF_PERM)
+               {
+                       /* Make sure entry is at the end of permanent
+                        * entries.
+                        */
+                       for (entno= 0, cache= arp_cache;
+                               entno<arp_cache_nr; entno++, cache++)
+                       {
+                               if (cache->ac_state == ACS_UNUSED)
+                                       break;
+                               if (cache->ac_flags & ACF_PERM)
+                                       continue;
+                               break;
+                       }
+                       assert(cache > arp_cache);
+                       cache--;
+                       if (cache != ce)
+                       {
+                               assert(cache > ce);
+                               *ce= *cache;
+                               ce= cache;
+                       }
+               }
+
+               /* Clear entry */
+               ce->ac_state= ACS_UNUSED;
+
+               return 0;
+
+       default:
+               ip_panic(("arp_ioctl: unknown request 0x%lx",
+                       (unsigned long)req));
+       }
+       return 0;
+}
+
+PRIVATE void arp_timeout (ref, timer)
+int ref;
 timer_t *timer;
 {
+       int i, port, reqind, acind;
        arp_port_t *arp_port;
        arp_cache_t *ce;
-       int level;
+       struct arp_req *reqp;
        time_t curr_time;
+       acc_t *data;
+       arp46_t *arp;
+       u16_t *p;
 
-       arp_port= &arp_port_table[fd];
+       port= ref / AP_REQ_NR;
+       reqind= ref % AP_REQ_NR;
+
+       assert(port >= 0 && port <eth_conf_nr);
+       arp_port= &arp_port_table[port];
+
+       reqp= &arp_port->ap_req[reqind];
+       assert (timer == &reqp->ar_timer);
 
-       assert (timer == &arp_port->ap_timer);
+       acind= reqp->ar_entry;
 
-       if (++arp_port->ap_req_count < MAX_ARP_RETRIES)
+       assert(acind >= 0 && acind < arp_cache_nr);
+       ce= &arp_cache[acind];
+
+       assert(ce->ac_port == arp_port);
+       assert(ce->ac_state == ACS_INCOMPLETE);
+
+       if (++reqp->ar_req_count >= MAX_ARP_RETRIES)
        {
-               arp_port->ap_flags |= APF_CLIENTWRITE|APF_MORE2WRITE;
-               if (!(arp_port->ap_flags & APF_ARP_WR_IP))
-                       setup_write(arp_port);
+               curr_time= get_time();
+               ce->ac_state= ACS_UNREACHABLE;
+               ce->ac_expire= curr_time+ ARP_NOTRCH_EXP_TIME;
+               ce->ac_lastuse= curr_time;
+
+               clck_untimer(&reqp->ar_timer);
+               reqp->ar_entry= -1;
+               client_reply(arp_port, ce->ac_ipaddr, NULL);
+               return;
        }
-       else
+
+       data= bf_memreq(sizeof(arp46_t));
+       arp= (arp46_t *)ptr2acc_data(data);
+
+       /* Clear padding */
+       assert(sizeof(arp->a46_data.a46_dummy) % sizeof(*p) == 0);
+       for (i= 0, p= (u16_t *)arp->a46_data.a46_dummy;
+               i < sizeof(arp->a46_data.a46_dummy)/sizeof(*p);
+               i++, p++)
        {
-               ce= find_cache_ent(arp_port, arp_port->ap_req_ipaddr);
-               if (ce) {
-                       assert(ce->ac_state == ACS_INCOMPLETE ||
-                               (printf("ce->ac_state= %d\n", ce->ac_state),0));
-                       curr_time= get_time();
-                       ce->ac_state= ACS_UNREACHABLE;
-                       ce->ac_expire= curr_time+ ARP_NOTRCH_EXP_TIME;
-                       ce->ac_lastuse= curr_time;
+               *p= 0xdead;
+       }
 
-                       client_reply(arp_port, ce->ac_ipaddr, NULL);
+       arp->a46_dstaddr.ea_addr[0]= 0xff;
+       arp->a46_dstaddr.ea_addr[1]= 0xff;
+       arp->a46_dstaddr.ea_addr[2]= 0xff;
+       arp->a46_dstaddr.ea_addr[3]= 0xff;
+       arp->a46_dstaddr.ea_addr[4]= 0xff;
+       arp->a46_dstaddr.ea_addr[5]= 0xff;
+       arp->a46_hdr= HTONS(ARP_ETHERNET);
+       arp->a46_pro= HTONS(ETH_IP_PROTO);
+       arp->a46_hln= 6;
+       arp->a46_pln= 4;
+       arp->a46_op= HTONS(ARP_REQUEST);
+       arp->a46_sha= arp_port->ap_ethaddr;
+       memcpy (arp->a46_spa, &arp_port->ap_ipaddr, sizeof(ipaddr_t));
+       memset(&arp->a46_tha, '\0', sizeof(ether_addr_t));
+       memcpy (arp->a46_tpa, &ce->ac_ipaddr, sizeof(ipaddr_t));
+
+       assert(data->acc_linkC == 1);
+       data->acc_ext_link= arp_port->ap_sendlist;
+       arp_port->ap_sendlist= data; data= NULL;
+
+       if (!(arp_port->ap_flags & APF_ARP_WR_IP))
+               setup_write(arp_port);
+
+       clck_timer(&reqp->ar_timer, get_time() + ARP_TIMEOUT,
+               arp_timeout, ref);
+}
+
+PRIVATE void arp_buffree(priority)
+int priority;
+{
+       int i;
+       acc_t *pack, *next_pack;
+       arp_port_t *arp_port;
+
+       for (i= 0, arp_port= arp_port_table; i<eth_conf_nr; i++, arp_port++)
+       {
+               if (priority == ARP_PRI_REC)
+               {
+                       next_pack= arp_port->ap_reclist;
+                       while(next_pack && next_pack->acc_ext_link)
+                       {
+                               pack= next_pack;
+                               next_pack= pack->acc_ext_link;
+                               bf_afree(pack);
+                       }
+                       if (next_pack)
+                       {
+                               if (ev_in_queue(&arp_port->ap_event))
+                               {
+                                       DBLOCK(1, printf(
+                       "not freeing ap_reclist, ap_event enqueued\n"));
+                               }
+                               else
+                               {
+                                       bf_afree(next_pack);
+                                       next_pack= NULL;
+                               }
+                       }
+                       arp_port->ap_reclist= next_pack;
+               }
+               if (priority == ARP_PRI_SEND)
+               {
+                       next_pack= arp_port->ap_sendlist;
+                       while(next_pack && next_pack->acc_ext_link)
+                       {
+                               pack= next_pack;
+                               next_pack= pack->acc_ext_link;
+                               bf_afree(pack);
+                       }
+                       if (next_pack)
+                       {
+                               if (ev_in_queue(&arp_port->ap_event))
+                               {
+                                       DBLOCK(1, printf(
+                       "not freeing ap_sendlist, ap_event enqueued\n"));
+                               }
+                               else
+                               {
+                                       bf_afree(next_pack);
+                                       next_pack= NULL;
+                               }
+                       }
+                       arp_port->ap_sendlist= next_pack;
+               }
+       }
+}
+
+#ifdef BUF_CONSISTENCY_CHECK
+PRIVATE void arp_bufcheck()
+{
+       int i;
+       arp_port_t *arp_port;
+       acc_t *pack;
+
+       for (i= 0, arp_port= arp_port_table; i<eth_conf_nr; i++, arp_port++)
+       {
+               for (pack= arp_port->ap_reqlist; pack;
+                       pack= pack->acc_ext_link)
+               {
+                       bf_check_acc(pack);
                }
        }
 }
+#endif /* BUF_CONSISTENCY_CHECK */
 
 /*
- * $PchId: arp.c,v 1.6 1995/11/21 06:45:27 philip Exp $
+ * $PchId: arp.c,v 1.22 2005/06/28 14:15:06 philip Exp $
  */
index 5dadb19292024692539407990c3e4e6908facf34..1edfe5394b4e1ab9bd04fe8dca0e804086885e7f 100644 (file)
@@ -22,8 +22,11 @@ void arp_set_ipaddr ARGS(( int eth_port, ipaddr_t ipaddr ));
 int arp_set_cb ARGS(( int eth_port, int ip_port, arp_func_t arp_func ));
 int arp_ip_eth ARGS(( int eth_port, ipaddr_t ipaddr, ether_addr_t *ethaddr ));
 
+int arp_ioctl ARGS(( int eth_port, int fd, ioreq_t req,
+       get_userdata_t get_userdata, put_userdata_t put_userdata ));
+
 #endif /* ARP_H */
 
 /*
- * $PchId: arp.h,v 1.5 1995/11/21 06:45:27 philip Exp $
+ * $PchId: arp.h,v 1.7 2001/04/19 18:58:17 philip Exp $
  */
index 929cc1a3b2b63dc298ae9fdef2eef2df33b9fdbb..e6d6492b014e82957372feaca7957cb2a26b5915 100644 (file)
@@ -8,11 +8,11 @@ Copyright 1995 Philip Homburg
 
 #if !NDEBUG
 
-void bad_assertion(char *file, int line, char *what);
-void bad_compare(char *file, int line, int lhs, char *what, int rhs);
+void bad_assertion(char *file, int line, char *what) _NORETURN;
+void bad_compare(char *file, int line, int lhs, char *what, int rhs) _NORETURN;
 
-#define assert(x)      (!(x) ? bad_assertion(this_file, __LINE__, #x) \
-                                                               : (void) 0)
+#define assert(x)      ((void)(!(x) ? bad_assertion(this_file, __LINE__, \
+                       #x),0 : 0))
 #define compare(a,t,b) (!((a) t (b)) ? bad_compare(this_file, __LINE__, \
                                (a), #a " " #t " " #b, (b)) : (void) 0)
 
@@ -27,5 +27,5 @@ void bad_compare(char *file, int line, int lhs, char *what, int rhs);
 
 
 /*
- * $PchId: assert.h,v 1.4 1995/11/21 06:45:27 philip Exp $
+ * $PchId: assert.h,v 1.8 2002/03/18 21:50:32 philip Exp $
  */
index 6b0223039b8e36073ace56ccb253ee9a15112a57..95c89351080b2b8c2642d2a4420d93a971966f9c 100644 (file)
@@ -11,6 +11,9 @@ Copyright 1995 Philip Homburg
 
 #define MAX_BUFREQ_PRI 10
 
+#define ARP_PRI_REC            3
+#define ARP_PRI_SEND           3
+
 #define ETH_PRI_PORTBUFS       3
 #define ETH_PRI_FDBUFS_EXTRA   5
 #define ETH_PRI_FDBUFS         6
@@ -79,6 +82,7 @@ typedef struct acc
 } acc_t;
 
 extern acc_t *bf_temporary_acc;
+extern acc_t *bf_linkcheck_acc;
 
 /* For debugging... */
 
@@ -95,7 +99,10 @@ extern acc_t *bf_temporary_acc;
 #define bf_pack(a) _bf_pack(this_file, __LINE__, a)
 #define bf_append(a,b) _bf_append(this_file, __LINE__, a, b)
 #define bf_dupacc(a) _bf_dupacc(this_file, __LINE__, a)
+#if 0
+#define bf_mark_1acc(a) _bf_mark_1acc(this_file, __LINE__, a)
 #define bf_mark_acc(a) _bf_mark_acc(this_file, __LINE__, a)
+#endif
 #define bf_align(a,s,al) _bf_align(this_file, __LINE__, a, s, al)
 
 #else /* BUF_IMPLEMENTATION */
@@ -112,6 +119,7 @@ extern acc_t *bf_temporary_acc;
 
 #else
 
+#define bf_mark_1acc(acc)      ((void)0)
 #define bf_mark_acc(acc)       ((void)0)
 
 #endif /* BUF_TRACK_ALLOC_FREE */
@@ -214,22 +222,28 @@ acc_t *_bf_align ARGS(( char *clnt_file, int clnt_line,
        Size must be less than or equal to BUF_S.
 */
 
+int bf_linkcheck ARGS(( acc_t *acc ));
+/* check if all link count are positive, and offsets and sizes are within 
+ * the underlying buffer.
+ */
+
 #define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
        (&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
                acc_offset]))
 
 #define bf_chkbuf(buf) ((buf)? (compare((buf)->acc_linkC,>,0), \
        compare((buf)->acc_buffer, !=, 0), \
-       compare((buf)->acc_buffer->buf_linkC,>,0)) : 0)
+       compare((buf)->acc_buffer->buf_linkC,>,0)) : (void)0)
 
 #ifdef BUF_CONSISTENCY_CHECK
 int bf_consistency_check ARGS(( void ));
 void bf_check_acc ARGS(( acc_t *acc ));
+void _bf_mark_1acc ARGS(( char *clnt_file, int clnt_line, acc_t *acc ));
 void _bf_mark_acc ARGS(( char *clnt_file, int clnt_line, acc_t *acc ));
 #endif
 
 #endif /* BUF_H */
 
 /*
- * $PchId: buf.h,v 1.8 1995/11/21 06:45:27 philip Exp $
+ * $PchId: buf.h,v 1.13 2003/09/10 08:52:09 philip Exp $
  */
index 23fe72f07ecb78eaf66ec7d1c8e9db225dad20b8..9705d472f6a0f3b9eb828ffde433fde439276d92 100644 (file)
@@ -29,6 +29,7 @@ typedef struct eth_fd
        nwio_ethopt_t ef_ethopt;
        eth_port_t *ef_port;
        struct eth_fd *ef_type_next;
+       struct eth_fd *ef_send_next;
        int ef_srfd;
        acc_t *ef_rdbuf_head;
        acc_t *ef_rdbuf_tail;
@@ -47,6 +48,15 @@ typedef struct eth_fd
 #              define  EFF_WRITE_IP    0x4
 #      define EFF_OPTSET       0x8
 
+/* Note that the vh_type field is normally considered part of the ethernet
+ * header.
+ */
+typedef struct 
+{
+       u16_t vh_type;
+       u16_t vh_vlan;
+} vlan_hdr_t;
+
 FORWARD int eth_checkopt ARGS(( eth_fd_t *eth_fd ));
 FORWARD void hash_fd ARGS(( eth_fd_t *eth_fd ));
 FORWARD void unhash_fd ARGS(( eth_fd_t *eth_fd ));
@@ -59,9 +69,12 @@ FORWARD void reply_thr_get ARGS(( eth_fd_t *eth_fd,
        size_t result, int for_ioctl ));
 FORWARD void reply_thr_put ARGS(( eth_fd_t *eth_fd,
        size_t result, int for_ioctl ));
+FORWARD void do_rec_conf ARGS(( eth_port_t *eth_port ));
 FORWARD u32_t compute_rec_conf ARGS(( eth_port_t *eth_port ));
+FORWARD acc_t *insert_vlan_hdr ARGS(( eth_port_t *eth_port, acc_t *pack ));
 
 PUBLIC eth_port_t *eth_port_table;
+PUBLIC int no_ethWritePort= 0;
 
 PRIVATE eth_fd_t eth_fd_table[ETH_FD_NR];
 PRIVATE ether_addr_t broadcast= { { 255, 255, 255, 255, 255, 255 } };
@@ -80,18 +93,21 @@ PUBLIC void eth_init()
                                           thus a good compiler doesn't
                                           generate any code for this */
 
-#if ZERO
+
        for (i=0; i<ETH_FD_NR; i++)
                eth_fd_table[i].ef_flags= EFF_EMPTY;
        for (i=0; i<eth_conf_nr; i++)
        {
                eth_port_table[i].etp_flags= EFF_EMPTY;
+               eth_port_table[i].etp_sendq_head= NULL;
+               eth_port_table[i].etp_sendq_tail= NULL;
                eth_port_table[i].etp_type_any= NULL;
                ev_init(&eth_port_table[i].etp_sendev);
                for (j= 0; j<ETH_TYPE_HASH_NR; j++)
                        eth_port_table[i].etp_type[j]= NULL;
+               for (j= 0; j<ETH_VLAN_HASH_NR; j++)
+                       eth_port_table[i].etp_vlan_tab[j]= NULL;
        }
-#endif
 
 #ifndef BUF_CONSISTENCY_CHECK
        bf_logon(eth_buffree);
@@ -102,11 +118,13 @@ PUBLIC void eth_init()
        osdep_eth_init();
 }
 
-PUBLIC int eth_open(port, srfd, get_userdata, put_userdata, put_pkt)
+PUBLIC int eth_open(port, srfd, get_userdata, put_userdata, put_pkt,
+       select_res)
 int port, srfd;
 get_userdata_t get_userdata;
 put_userdata_t put_userdata;
 put_pkt_t put_pkt;
+select_res_t select_res;
 {
        int i;
        eth_port_t *eth_port;
@@ -148,7 +166,7 @@ ioreq_t req;
        eth_fd_t *eth_fd;
        eth_port_t *eth_port;
 
-       DBLOCK(0x20, printf("eth_ioctl (%d, %lu)\n", fd, req));
+       DBLOCK(0x20, printf("eth_ioctl (%d, 0x%lx)\n", fd, (unsigned long)req));
        eth_fd= &eth_fd_table[fd];
        eth_port= eth_fd->ef_port;
 
@@ -163,7 +181,6 @@ ioreq_t req;
                        int result;
                        u32_t new_en_flags, new_di_flags,
                                old_en_flags, old_di_flags;
-                       u32_t flags;
 
                        data= (*eth_fd->ef_get_userdata)(eth_fd->
                                ef_srfd, 0, sizeof(nwio_ethopt_t), TRUE);
@@ -270,8 +287,7 @@ ioreq_t req;
                                if (changes & (NWEO_BROAD_MASK |
                                        NWEO_MULTI_MASK | NWEO_PROMISC_MASK))
                                {
-                                       flags= compute_rec_conf(eth_port);
-                                       eth_set_rec_conf(eth_port, flags);
+                                       do_rec_conf(eth_port);
                                }
                        }
 
@@ -307,7 +323,7 @@ ioreq_t req;
                        acc_t *acc;
                        int result;
 
-assert (sizeof(nwio_ethstat_t) <= BUF_S);
+                       assert (sizeof(nwio_ethstat_t) <= BUF_S);
 
                        eth_port= eth_fd->ef_port;
                        if (!(eth_port->etp_flags & EPF_ENABLED))
@@ -317,15 +333,24 @@ assert (sizeof(nwio_ethstat_t) <= BUF_S);
                        }
 
                        acc= bf_memreq(sizeof(nwio_ethstat_t));
-compare (bf_bufsize(acc), ==, sizeof(*ethstat));
+                       compare (bf_bufsize(acc), ==, sizeof(*ethstat));
 
                        ethstat= (nwio_ethstat_t *)ptr2acc_data(acc);
-
                        ethstat->nwes_addr= eth_port->etp_ethaddr;
 
-                       result= eth_get_stat(eth_port, &ethstat->nwes_stat);
-assert (result == 0);
-compare (bf_bufsize(acc), ==, sizeof(*ethstat));
+                       if (!eth_port->etp_vlan)
+                       {
+                               result= eth_get_stat(eth_port,
+                                       &ethstat->nwes_stat);
+                               assert (result == 0);
+                       }
+                       else
+                       {
+                               /* No statistics */
+                               memset(&ethstat->nwes_stat, '\0',
+                                       sizeof(ethstat->nwes_stat));
+                       }
+
                        result= (*eth_fd->ef_put_userdata)(eth_fd->
                                ef_srfd, 0, acc, TRUE);
                        if (result >= 0)
@@ -344,7 +369,7 @@ int fd;
 size_t count;
 {
        eth_fd_t *eth_fd;
-       eth_port_t *eth_port;
+       eth_port_t *eth_port, *rep;
        acc_t *user_data;
        int r;
 
@@ -370,9 +395,19 @@ size_t count;
                return NW_OK;
        }
        eth_fd->ef_flags |= EFF_WRITE_IP;
-       if (eth_port->etp_wr_pack)
+
+       /* Enqueue at the real ethernet port */
+       rep= eth_port->etp_vlan_port;
+       if (!rep)
+               rep= eth_port;
+       if (rep->etp_wr_pack)
        {
-               eth_port->etp_flags |= EPF_MORE2WRITE;
+               eth_fd->ef_send_next= NULL;
+               if (rep->etp_sendq_head)
+                       rep->etp_sendq_tail->ef_send_next= eth_fd;
+               else
+                       rep->etp_sendq_head= eth_fd;
+               rep->etp_sendq_tail= eth_fd;
                return NW_SUSPEND;
        }
 
@@ -398,7 +433,7 @@ acc_t *data;
 size_t data_len;
 {
        eth_fd_t *eth_fd;
-       eth_port_t *eth_port;
+       eth_port_t *eth_port, *rep;
        eth_hdr_t *eth_hdr;
        acc_t *eth_pack;
        unsigned long nweo_flags;
@@ -420,11 +455,14 @@ size_t data_len;
                DBLOCK(1, printf("illegal packetsize (%d)\n",count));
                return EPACKSIZE;
        }
-       if (eth_port->etp_wr_pack)
+       rep= eth_port->etp_vlan_port;
+       if (!rep)
+               rep= eth_port;
+
+       if (rep->etp_wr_pack)
                return NW_WOULDBLOCK;
        
        nweo_flags= eth_fd->ef_ethopt.nweo_flags;
-
        if (nweo_flags & NWEO_RWDATONLY)
        {
                eth_pack= bf_memreq(ETH_HDR_SIZE);
@@ -450,9 +488,20 @@ size_t data_len;
                eth_port->etp_wr_pack= eth_pack;
                ev_arg.ev_ptr= eth_port;
                ev_enqueue(&eth_port->etp_sendev, eth_loop_ev, ev_arg);
+               return NW_OK;
        }
-       else
-               eth_write_port(eth_port, eth_pack);
+
+       if (rep != eth_port)
+       {
+               eth_pack= insert_vlan_hdr(eth_port, eth_pack);
+               if (!eth_pack)
+               {
+                       /* Packet is silently discarded */
+                       return NW_OK;
+               }
+       }
+
+       eth_write_port(rep, eth_pack);
        return NW_OK;
 }
 
@@ -506,29 +555,34 @@ int which_operation;
        switch (which_operation)
        {
        case SR_CANCEL_READ:
-assert (eth_fd->ef_flags & EFF_READ_IP);
+               assert (eth_fd->ef_flags & EFF_READ_IP);
                eth_fd->ef_flags &= ~EFF_READ_IP;
                reply_thr_put(eth_fd, EINTR, FALSE);
                break;
        case SR_CANCEL_WRITE:
-assert (eth_fd->ef_flags & EFF_WRITE_IP);
+               assert (eth_fd->ef_flags & EFF_WRITE_IP);
                eth_fd->ef_flags &= ~EFF_WRITE_IP;
                reply_thr_get(eth_fd, EINTR, FALSE);
                break;
-#if !CRAMPED
        default:
                ip_panic(( "got unknown cancel request" ));
-#endif
        }
        return NW_OK;
 }
 
+PUBLIC int eth_select(fd, operations)
+int fd;
+unsigned operations;
+{
+       printf("eth_select: not implemented\n");
+       return 0;
+}
+
 PUBLIC void eth_close(fd)
 int fd;
 {
        eth_fd_t *eth_fd;
        eth_port_t *eth_port;
-       u32_t flags;
        acc_t *pack;
 
        eth_fd= &eth_fd_table[fd];
@@ -547,8 +601,7 @@ int fd;
        eth_fd->ef_flags= EFF_EMPTY;
 
        eth_port= eth_fd->ef_port;
-       flags= compute_rec_conf(eth_port);
-       eth_set_rec_conf(eth_port, flags);
+       do_rec_conf(eth_port);
 }
 
 PUBLIC void eth_loop_ev(ev, ev_arg)
@@ -562,7 +615,13 @@ ev_arg_t ev_arg;
        assert(ev == &eth_port->etp_sendev);
 
        pack= eth_port->etp_wr_pack;
+
+       assert(!no_ethWritePort);
+       no_ethWritePort= 1;
        eth_arrive(eth_port, pack, bf_bufsize(pack));
+       assert(no_ethWritePort);
+       no_ethWritePort= 0;
+
        eth_port->etp_wr_pack= NULL;
        eth_restart_write(eth_port);
 }
@@ -665,30 +724,14 @@ PUBLIC void eth_restart_write(eth_port)
 eth_port_t *eth_port;
 {
        eth_fd_t *eth_fd;
-       int i, r;
-
-       if (eth_port->etp_wr_pack)
-               return;
-
-       if (!(eth_port->etp_flags & EPF_MORE2WRITE))
-               return;
-       eth_port->etp_flags &= ~EPF_MORE2WRITE;
+       int r;
 
-       for (i=0, eth_fd= eth_fd_table; i<ETH_FD_NR; i++, eth_fd++)
+       assert(eth_port->etp_wr_pack == NULL);
+       while (eth_fd= eth_port->etp_sendq_head, eth_fd != NULL)
        {
-               if ((eth_fd->ef_flags & (EFF_INUSE|EFF_WRITE_IP)) !=
-                       (EFF_INUSE|EFF_WRITE_IP))
-               {
-                       continue;
-               }
-               if (eth_fd->ef_port != eth_port)
-                       continue;
-
                if (eth_port->etp_wr_pack)
-               {
-                       eth_port->etp_flags |= EPF_MORE2WRITE;
                        return;
-               }
+               eth_port->etp_sendq_head= eth_fd->ef_send_next;
 
                eth_fd->ef_flags &= ~EFF_WRITE_IP;
                r= eth_write(eth_fd-eth_fd_table, eth_fd->ef_write_count);
@@ -708,7 +751,12 @@ size_t pack_size;
        ether_type_t type;
        eth_fd_t *eth_fd, *first_fd, *share_fd;
        int hash, i;
+       u16_t vlan, temp;
        time_t exp_time;
+       acc_t *vlan_pack, *hdr_acc, *tmp_acc;
+       eth_port_t *vp;
+       vlan_hdr_t vh;
+       u32_t *p;
 
        exp_time= get_time() + EXPIRE_TIME;
 
@@ -741,6 +789,46 @@ size_t pack_size;
        hash ^= (hash >> 8);
        hash &= (ETH_TYPE_HASH_NR-1);
 
+       if (type == HTONS(ETH_VLAN_PROTO))
+       {
+               /* VLAN packet. Extract original ethernet packet */
+
+               vlan_pack= pack;
+               vlan_pack->acc_linkC++;
+               hdr_acc= bf_cut(vlan_pack, 0, 2*sizeof(ether_addr_t));
+               vlan_pack= bf_delhead(vlan_pack, 2*sizeof(ether_addr_t));
+               vlan_pack= bf_packIffLess(vlan_pack, sizeof(vh));
+               vh= *(vlan_hdr_t *)ptr2acc_data(vlan_pack);
+               vlan_pack= bf_delhead(vlan_pack, sizeof(vh));
+               hdr_acc= bf_append(hdr_acc, vlan_pack);
+               vlan_pack= hdr_acc; hdr_acc= NULL;
+               if (bf_bufsize(vlan_pack) < ETH_MIN_PACK_SIZE)
+               {
+                       tmp_acc= bf_memreq(sizeof(vh));
+
+                       /* Clear padding */
+                       assert(sizeof(vh) <= sizeof(*p));
+                       p= (u32_t *)ptr2acc_data(tmp_acc);
+                       *p= 0xdeadbeef;
+
+                       vlan_pack= bf_append(vlan_pack, tmp_acc);
+                       tmp_acc= NULL;
+               }
+               vlan= ntohs(vh.vh_vlan);
+               if (vlan & ETH_TCI_CFI)
+               {
+                       /* No support for extended address formats */
+                       bf_afree(vlan_pack); vlan_pack= NULL;
+               }
+               vlan &= ETH_TCI_VLAN_MASK;
+       }
+       else
+       {
+               /* No VLAN processing */
+               vlan_pack= NULL;
+               vlan= 0;        /* lint */
+       }
+
        first_fd= NULL;
        for (i= 0; i<2; i++)
        {
@@ -813,6 +901,40 @@ size_t pack_size;
                }                       
                bf_afree(pack);
        }
+       if (vlan_pack)
+       {
+               hash= ETH_HASH_VLAN(vlan, temp);
+               for (vp= eth_port->etp_vlan_tab[hash]; vp;
+                       vp= vp->etp_vlan_next)
+               {
+                       if (vp->etp_vlan == vlan)
+                               break;
+               }
+               if (vp)
+               {
+                       eth_arrive(vp, vlan_pack, pack_size-sizeof(vh));
+                       vlan_pack= NULL;
+               }
+               else
+               {
+                       /* No device for VLAN */
+                       bf_afree(vlan_pack);
+                       vlan_pack= NULL;
+               }
+       }
+}
+
+PUBLIC void eth_reg_vlan(eth_port, vlan_port)
+eth_port_t *eth_port;
+eth_port_t *vlan_port;
+{
+       u16_t t, vlan;
+       int h;
+
+       vlan= vlan_port->etp_vlan;
+       h= ETH_HASH_VLAN(vlan, t);
+       vlan_port->etp_vlan_next= eth_port->etp_vlan_tab[h];
+       eth_port->etp_vlan_tab[h]= vlan_port;
 }
 
 PRIVATE void packet2user (eth_fd, pack, exp_time)
@@ -923,6 +1045,27 @@ PRIVATE void eth_bufcheck()
 }
 #endif
 
+PRIVATE void do_rec_conf(eth_port)
+eth_port_t *eth_port;
+{
+       int i;
+       u32_t flags;
+       eth_port_t *vp;
+
+       if (eth_port->etp_vlan)
+       {
+               /* Configure underlying device */
+               eth_port= eth_port->etp_vlan_port;
+       }
+       flags= compute_rec_conf(eth_port);
+       for (i= 0; i<ETH_VLAN_HASH_NR; i++)
+       {
+               for (vp= eth_port->etp_vlan_tab[i]; vp; vp= vp->etp_vlan_next)
+                       flags |= compute_rec_conf(vp);
+       }
+       eth_set_rec_conf(eth_port, flags);
+}
+
 PRIVATE u32_t compute_rec_conf(eth_port)
 eth_port_t *eth_port;
 {
@@ -968,6 +1111,41 @@ int for_ioctl;
        assert(error == NW_OK);
 }
 
+PRIVATE acc_t *insert_vlan_hdr(eth_port, pack)
+eth_port_t *eth_port;
+acc_t *pack;
+{
+       acc_t *head_acc, *vh_acc;
+       u16_t type, vlan;
+       vlan_hdr_t *vp;
+
+       head_acc= bf_cut(pack, 0, 2*sizeof(ether_addr_t));
+       pack= bf_delhead(pack, 2*sizeof(ether_addr_t));
+       pack= bf_packIffLess(pack, sizeof(type));
+       type= *(u16_t *)ptr2acc_data(pack);
+       if (type == HTONS(ETH_VLAN_PROTO))
+       {
+               /* Packeted is already tagged. Should update vlan number.
+                * For now, just discard packet.
+                */
+               printf("insert_vlan_hdr: discarding vlan packet\n");
+               bf_afree(head_acc); head_acc= NULL;
+               bf_afree(pack); pack= NULL;
+               return NULL;
+       }
+       vlan= eth_port->etp_vlan;       /* priority and CFI are zero */
+
+       vh_acc= bf_memreq(sizeof(vlan_hdr_t));
+       vp= (vlan_hdr_t *)ptr2acc_data(vh_acc);
+       vp->vh_type= HTONS(ETH_VLAN_PROTO);
+       vp->vh_vlan= htons(vlan);
+
+       head_acc= bf_append(head_acc, vh_acc); vh_acc= NULL;
+       head_acc= bf_append(head_acc, pack); pack= NULL;
+       pack= head_acc; head_acc= NULL;
+       return pack;
+}
+
 /*
- * $PchId: eth.c,v 1.11 1996/08/02 07:04:58 philip Exp $
+ * $PchId: eth.c,v 1.23 2005/06/28 14:15:58 philip Exp $
  */
index b084c3f981685792b64b594e3c2bdfd8f1436352..be712c730aa55c538616e8bb3bb225fb96a12a31 100644 (file)
@@ -23,16 +23,17 @@ void eth_prep ARGS(( void ));
 void eth_init ARGS(( void ));
 int eth_open ARGS(( int port, int srfd,
        get_userdata_t get_userdata, put_userdata_t put_userdata,
-       put_pkt_t put_pkt ));
+       put_pkt_t put_pkt, select_res_t sel_res ));
 int eth_ioctl ARGS(( int fd, ioreq_t req));
 int eth_read ARGS(( int port, size_t count ));
 int eth_write ARGS(( int port, size_t count ));
 int eth_cancel ARGS(( int fd, int which_operation ));
+int eth_select ARGS(( int fd, unsigned operations ));
 void eth_close ARGS(( int fd ));
 int eth_send ARGS(( int port, struct acc *data, size_t data_len ));
 
 #endif /* ETH_H */
 
 /*
- * $PchId: eth.h,v 1.6 1996/05/07 20:49:07 philip Exp $
+ * $PchId: eth.h,v 1.8 2005/06/28 14:16:10 philip Exp $
  */
index 5b49c9255e068e8a740f72b8640f8efd84628e23..d9fc74d90eeef032f1371de9acd49b21efd26d6a 100644 (file)
@@ -8,35 +8,43 @@ Copyright 1995 Philip Homburg
 #define ETH_INT_H
 
 #define ETH_TYPE_HASH_NR       16
-
+#define ETH_VLAN_HASH_NR       16
+
+/* Assume that the arguments are a local variable */
+#define ETH_HASH_VLAN(v,t)     \
+       ((t)= (((v) >> 8) ^ (v)), \
+       (t)= (((t) >> 4) ^ (t)), \
+       (t) & (ETH_VLAN_HASH_NR-1))
+       
 typedef struct eth_port
 {
        int etp_flags;
        ether_addr_t etp_ethaddr;
        acc_t *etp_wr_pack, *etp_rd_pack;
+       struct eth_fd *etp_sendq_head;
+       struct eth_fd *etp_sendq_tail;
        struct eth_fd *etp_type_any;
        struct eth_fd *etp_type[ETH_TYPE_HASH_NR];
        event_t etp_sendev;
 
+       /* VLAN support */
+       u16_t etp_vlan;
+       struct eth_port *etp_vlan_port;
+       struct eth_port *etp_vlan_tab[ETH_VLAN_HASH_NR];
+       struct eth_port *etp_vlan_next;
+
        osdep_eth_port_t etp_osdep;
 } eth_port_t;
 
 #define EPF_EMPTY       0x0
 #define EPF_ENABLED     0x1
-#define EPF_MORE2WRITE 0x10
 #define EPF_READ_IP    0x20
 #define EPF_READ_SP    0x40
 
-#if 0
-#define EPS_EMPTY      0x0
-#define EPS_LOC                0x1
-#define EPS_BROAD      0x2
-#define EPS_MULTI      0x4
-#define EPS_PROMISC    0x8
-#endif
-
 extern eth_port_t *eth_port_table;
 
+extern int no_ethWritePort;    /* debug, consistency check */
+
 void osdep_eth_init ARGS(( void ));
 int eth_get_stat ARGS(( eth_port_t *eth_port, eth_stat_t *eth_stat ));
 void eth_write_port ARGS(( eth_port_t *eth_port, acc_t *pack ));
@@ -44,9 +52,10 @@ void eth_arrive ARGS(( eth_port_t *port, acc_t *pack, size_t pack_size ));
 void eth_set_rec_conf ARGS(( eth_port_t *eth_port, u32_t flags ));
 void eth_restart_write ARGS(( eth_port_t *eth_port ));
 void eth_loop_ev ARGS(( event_t *ev, ev_arg_t ev_arg ));
+void eth_reg_vlan ARGS(( eth_port_t *eth_port, eth_port_t *vlan_port ));
 
 #endif /* ETH_INT_H */
 
 /*
- * $PchId: eth_int.h,v 1.6 1995/11/21 06:45:27 philip Exp $
+ * $PchId: eth_int.h,v 1.9 2001/04/23 08:04:06 philip Exp $
  */
index ae0b2faf4fae29f7153cef5d68d1786218f5ed59..e59d97706f062fe972104eb3c99d084df800032c 100644 (file)
@@ -1,7 +1,7 @@
 /*
 inet/generic/event.c
 
-Created:       April 1995 by Philip Homburg <philip@cs.vu.nl>
+Created:       April 1995 by Philip Homburg <philip@f-mnx.phicoh.com>
 
 Implementation of an event queue.
 
@@ -65,5 +65,5 @@ event_t *ev;
 
 
 /*
- * $PchId: event.c,v 1.4 1995/11/21 06:45:27 philip Exp $
+ * $PchId: event.c,v 1.6 2004/08/03 16:23:32 philip Exp $
  */
index 1c50c962f0a3b70126dc3888a2cace853693d340..568371268e4a1f4f1a19b3353a4a9bedd86034ad 100644 (file)
@@ -1,7 +1,7 @@
 /*
 inet/generic/event.h
 
-Created:       April 1995 by Philip Homburg <philip@cs.vu.nl>
+Created:       April 1995 by Philip Homburg <philip@f-mnx.phicoh.com>
 
 Header file for an event mechanism.
 
@@ -38,5 +38,5 @@ int ev_in_queue ARGS(( event_t *ev ));
 #endif /* INET__GENERIC__EVENT_H */
 
 /*
- * $PchId: event.h,v 1.4 1995/11/21 06:45:27 philip Exp $
+ * $PchId: event.h,v 1.5 2004/08/03 16:23:49 philip Exp $
  */
index b05b1aa9511441c3dcb631a6893e17cb063effa8..a24507b1695b275f900f6f941bec58f4cab9ff8d 100644 (file)
@@ -10,6 +10,7 @@ Copyright 1995 Philip Homburg
 #include "type.h"
 
 #include "assert.h"
+#include "clock.h"
 #include "icmp.h"
 #include "icmp_lib.h"
 #include "io.h"
@@ -25,9 +26,13 @@ typedef struct icmp_port
        int icp_state;
        int icp_ipport;
        int icp_ipfd;
+       unsigned icp_rate_count;
+       unsigned icp_rate_report;
+       time_t icp_rate_lasttime;
        acc_t *icp_head_queue;
        acc_t *icp_tail_queue;
        acc_t *icp_write_pack;
+       event_t icp_event;
 } icmp_port_t;
 
 #define ICPF_EMPTY     0x0
@@ -71,9 +76,11 @@ FORWARD acc_t *make_repl_ip ARGS(( ip_hdr_t *ip_hdr,
        int ip_len ));
 FORWARD void enqueue_pack ARGS(( icmp_port_t *icmp_port,
        acc_t *reply_ip_hdr ));
-FORWARD void icmp_write ARGS(( icmp_port_t *icmp_port ));
+FORWARD int icmp_rate_limit ARGS(( icmp_port_t *icmp_port,
+       acc_t *reply_ip_hdr ));
+FORWARD void icmp_write ARGS(( event_t *ev, ev_arg_t ev_arg ));
 FORWARD void icmp_buffree ARGS(( int priority ));
-FORWARD acc_t *icmp_err_pack ARGS(( acc_t *pack, icmp_hdr_t **icmp_hdr ));
+FORWARD acc_t *icmp_err_pack ARGS(( acc_t *pack, icmp_hdr_t **icmp_hdr_pp ));
 #ifdef BUF_CONSISTENCY_CHECK
 FORWARD void icmp_bufcheck ARGS(( void ));
 #endif
@@ -92,11 +99,13 @@ PUBLIC void icmp_init()
 
        for (i= 0, icmp_port= icmp_port_table; i<ip_conf_nr; i++, icmp_port++)
        {
-#if ZERO
                icmp_port->icp_flags= ICPF_EMPTY;
                icmp_port->icp_state= ICPS_BEGIN;
-#endif
                icmp_port->icp_ipport= i;
+               icmp_port->icp_rate_count= 0;
+               icmp_port->icp_rate_report= ICMP_MAX_RATE;
+               icmp_port->icp_rate_lasttime= 0;
+               ev_init(&icmp_port->icp_event);
        }
 
 #ifndef BUF_CONSISTENCY_CHECK
@@ -119,8 +128,9 @@ icmp_port_t *icmp_port;
        {
        case ICPS_BEGIN:
                icmp_port->icp_head_queue= 0;
-               icmp_port->icp_ipfd= ip_open (icmp_port->icp_ipport,
-                       icmp_port->icp_ipport, icmp_getdata, icmp_putdata, 0);
+               icmp_port->icp_ipfd= ip_open(icmp_port->icp_ipport,
+                       icmp_port->icp_ipport, icmp_getdata, icmp_putdata,
+                       0 /* no put_pkt */, 0 /* no select_res */);
                if (icmp_port->icp_ipfd<0)
                {
                        DBLOCK(1, printf("unable to open ip_port %d\n",
@@ -159,6 +169,7 @@ int for_ioctl;
        nwio_ipopt_t *ipopt;
        acc_t *data;
        int result;
+       ev_arg_t ev_arg;
 
        icmp_port= &icmp_port_table[port];
 
@@ -177,9 +188,10 @@ int for_ioctl;
                        }
                        if (icmp_port->icp_flags & ICPF_WRITE_SP)
                        {
-                               icmp_port->icp_flags &=
-                                       ~(ICPF_WRITE_IP|ICPF_WRITE_SP);
-                               icmp_write (icmp_port);
+                               icmp_port->icp_flags &= ~ICPF_WRITE_SP;
+                               ev_arg.ev_ptr= icmp_port;
+                               ev_enqueue(&icmp_port->icp_event, icmp_write,
+                                       ev_arg);
                        }
                        return NW_OK;
                }
@@ -202,9 +214,7 @@ int for_ioctl;
                        return NW_OK;
                }
 
-assert (count == sizeof (*ipopt));
                data= bf_memreq (sizeof (*ipopt));
-assert (data->acc_length == sizeof(*ipopt));
                ipopt= (nwio_ipopt_t *)ptr2acc_data(data);
                ipopt->nwio_flags= NWIO_COPY | NWIO_EN_LOC |
                        NWIO_EN_BROAD |
@@ -213,10 +223,10 @@ assert (data->acc_length == sizeof(*ipopt));
                ipopt->nwio_proto= IPPROTO_ICMP;
                return data;
        default:
-               DBLOCK(1, printf("unknown state %d\n",
-                       icmp_port->icp_state));
-               return 0;
+               break;
        }
+       DBLOCK(1, printf("unknown state %d\n", icmp_port->icp_state));
+       return NULL;
 }
 
 PRIVATE int icmp_putdata(port, offset, data, for_ioctl)
@@ -232,7 +242,6 @@ int for_ioctl;
 
        if (icmp_port->icp_flags & ICPF_READ_IP)
        {
-assert (!for_ioctl);
                if (!data)
                {
                        result= (int)offset;
@@ -266,10 +275,6 @@ icmp_port_t *icmp_port;
 {
        int result;
 
-assert (!(icmp_port->icp_flags & (ICPF_READ_IP|ICPF_READ_SP) || 
-       (icmp_port->icp_flags & (ICPF_READ_IP|ICPF_READ_SP)) ==
-       (ICPF_READ_IP|ICPF_READ_SP)));
-
        for (;;)
        {
                icmp_port->icp_flags |= ICPF_READ_IP;
@@ -289,12 +294,17 @@ int port_nr;
 acc_t *pack;
 int code;
 {
-       acc_t *icmp_acc;
        icmp_hdr_t *icmp_hdr;
        icmp_port_t *icmp_port;
 
-       assert(0 <= port_nr && port_nr < ip_conf_nr);
-       icmp_port= &icmp_port_table[port_nr];
+       if (port_nr >= 0 && port_nr < ip_conf_nr)
+               icmp_port= &icmp_port_table[port_nr];
+       else
+       {
+               printf("icmp_snd_time_exceeded: strange port %d\n", port_nr);
+               bf_afree(pack);
+               return;
+       }
        pack= icmp_err_pack(pack, &icmp_hdr);
        if (pack == NULL)
                return;
@@ -311,12 +321,17 @@ acc_t *pack;
 int code;
 ipaddr_t gw;
 {
-       acc_t *icmp_acc;
        icmp_hdr_t *icmp_hdr;
        icmp_port_t *icmp_port;
 
-       assert(0 <= port_nr && port_nr < ip_conf_nr);
-       icmp_port= &icmp_port_table[port_nr];
+       if (port_nr >= 0 && port_nr < ip_conf_nr)
+               icmp_port= &icmp_port_table[port_nr];
+       else
+       {
+               printf("icmp_snd_redirect: strange port %d\n", port_nr);
+               bf_afree(pack);
+               return;
+       }
        pack= icmp_err_pack(pack, &icmp_hdr);
        if (pack == NULL)
                return;
@@ -335,12 +350,17 @@ int port_nr;
 acc_t *pack;
 int code;
 {
-       acc_t *icmp_acc;
        icmp_hdr_t *icmp_hdr;
        icmp_port_t *icmp_port;
 
-       assert(0 <= port_nr && port_nr < ip_conf_nr);
-       icmp_port= &icmp_port_table[port_nr];
+       if (port_nr >= 0 && port_nr < ip_conf_nr)
+               icmp_port= &icmp_port_table[port_nr];
+       else
+       {
+               printf("icmp_snd_unreachable: strange port %d\n", port_nr);
+               bf_afree(pack);
+               return;
+       }
        pack= icmp_err_pack(pack, &icmp_hdr);
        if (pack == NULL)
                return;
@@ -351,6 +371,36 @@ int code;
        enqueue_pack(icmp_port, pack);
 }
 
+PUBLIC void icmp_snd_mtu(port_nr, pack, mtu)
+int port_nr;
+acc_t *pack;
+u16_t mtu;
+{
+       icmp_hdr_t *icmp_hdr;
+       icmp_port_t *icmp_port;
+
+       if (port_nr >= 0 && port_nr < ip_conf_nr)
+               icmp_port= &icmp_port_table[port_nr];
+       else
+       {
+               printf("icmp_snd_mtu: strange port %d\n", port_nr);
+               bf_afree(pack);
+               return;
+       }
+
+       pack= icmp_err_pack(pack, &icmp_hdr);
+       if (pack == NULL)
+               return;
+       icmp_hdr->ih_type= ICMP_TYPE_DST_UNRCH;
+       icmp_hdr->ih_code= ICMP_FRAGM_AND_DF;
+       icmp_hdr->ih_hun.ihh_mtu.im_mtu= htons(mtu);
+       icmp_hdr->ih_chksum= ~oneC_sum(~icmp_hdr->ih_chksum,
+               (u16_t *)&icmp_hdr->ih_type, 2);
+       icmp_hdr->ih_chksum= ~oneC_sum(~icmp_hdr->ih_chksum,
+               (u16_t *)&icmp_hdr->ih_hun.ihh_mtu.im_mtu, 2);
+       enqueue_pack(icmp_port, pack);
+}
+
 PRIVATE void process_data(icmp_port, data)
 icmp_port_t *icmp_port;
 acc_t *data;
@@ -378,16 +428,23 @@ acc_t *data;
 
        pack_len= bf_bufsize(data);
        pack_len -= ip_hdr_len;
-       if (pack_len < ICMP_MIN_HDR_LEN)
+       if (pack_len < ICMP_MIN_HDR_SIZE)
        {
-               DBLOCK(1, printf("got an incomplete icmp packet\n"));
+               if (pack_len == 0 && ip_hdr->ih_proto == 0)
+               {
+                       /* IP layer reports new ip address, which can be
+                        * ignored.
+                        */
+               }
+               else
+                       DBLOCK(1, printf("got an incomplete icmp packet\n"));
                bf_afree(data);
                return;
        }
 
        icmp_data= bf_cut(data, ip_hdr_len, pack_len);
 
-       icmp_data= bf_packIffLess (icmp_data, ICMP_MIN_HDR_LEN);
+       icmp_data= bf_packIffLess (icmp_data, ICMP_MIN_HDR_SIZE);
        icmp_hdr= (icmp_hdr_t *)ptr2acc_data(icmp_data);
 
        if ((u16_t)~icmp_pack_oneCsum(icmp_data))
@@ -450,9 +507,10 @@ ip_hdr_t *ip_hdr;
 icmp_hdr_t *icmp_hdr;
 {
        acc_t *repl_ip_hdr, *repl_icmp;
+       ipaddr_t tmpaddr, locaddr, netmask;
        icmp_hdr_t *repl_icmp_hdr;
        i32_t tmp_chksum;
-       u16_t u16;
+       ip_port_t *ip_port;
 
        if (icmp_hdr->ih_code != 0)
        {
@@ -463,16 +521,44 @@ icmp_hdr_t *icmp_hdr;
                bf_afree(icmp_data);
                return;
        }
-       if (icmp_len < ICMP_MIN_HDR_LEN + sizeof(icmp_id_seq_t))
+       if (icmp_len < ICMP_MIN_HDR_SIZE + sizeof(icmp_id_seq_t))
        {
                DBLOCK(1, printf("got an incomplete icmp echo request\n"));
                bf_afree(ip_data);
                bf_afree(icmp_data);
                return;
        }
+       tmpaddr= ntohl(ip_hdr->ih_dst);
+       if ((tmpaddr & 0xe0000000) == 0xe0000000 &&
+               tmpaddr != 0xffffffff)
+       {
+               /* Respond only to the all hosts multicast address until
+                * a decent listening service has been implemented
+                */
+               if (tmpaddr != 0xe0000001)
+               {
+                       bf_afree(ip_data);
+                       bf_afree(icmp_data);
+                       return;
+               }
+       }
+
+       /* Limit subnet broadcasts to the local net */
+       ip_port= &ip_port_table[icmp_port->icp_ipport];
+       locaddr= ip_port->ip_ipaddr;
+       netmask= ip_port->ip_subnetmask;
+       if (ip_hdr->ih_dst == (locaddr | ~netmask) &&
+               (ip_port->ip_flags & IPF_SUBNET_BCAST) &&
+               ((ip_hdr->ih_src ^ locaddr) & netmask) != 0)
+       {
+               /* Directed broadcast */
+               bf_afree(ip_data);
+               bf_afree(icmp_data);
+               return;
+       }
+
        repl_ip_hdr= make_repl_ip(ip_hdr, ip_len);
-       repl_icmp= bf_memreq (ICMP_MIN_HDR_LEN);
-assert (repl_icmp->acc_length == ICMP_MIN_HDR_LEN);
+       repl_icmp= bf_memreq (ICMP_MIN_HDR_SIZE);
        repl_icmp_hdr= (icmp_hdr_t *)ptr2acc_data(repl_icmp);
        repl_icmp_hdr->ih_type= ICMP_TYPE_ECHO_REPL;
        repl_icmp_hdr->ih_code= 0;
@@ -490,8 +576,8 @@ assert (repl_icmp->acc_length == ICMP_MIN_HDR_LEN);
        DBLOCK(2, printf("sending chksum 0x%x\n", repl_icmp_hdr->ih_chksum));
 
        repl_ip_hdr->acc_next= repl_icmp;
-       repl_icmp->acc_next= bf_cut (icmp_data, ICMP_MIN_HDR_LEN,
-               icmp_len - ICMP_MIN_HDR_LEN);
+       repl_icmp->acc_next= bf_cut (icmp_data, ICMP_MIN_HDR_SIZE,
+               icmp_len - ICMP_MIN_HDR_SIZE);
 
        bf_afree(ip_data);
        bf_afree(icmp_data);
@@ -508,8 +594,6 @@ acc_t *icmp_pack;
        int length;
        char byte_buf[2];
 
-       assert (icmp_pack);
-
        prev= 0;
 
        odd_byte= FALSE;
@@ -560,7 +644,6 @@ int ip_len;
        repl_hdr_len= IP_MIN_HDR_SIZE;
 
        repl= bf_memreq(repl_hdr_len);
-assert (repl->acc_length == repl_hdr_len);
 
        repl_ip_hdr= (ip_hdr_t *)ptr2acc_data(repl);
 
@@ -578,6 +661,25 @@ PRIVATE void enqueue_pack(icmp_port, reply_ip_hdr)
 icmp_port_t *icmp_port;
 acc_t *reply_ip_hdr;
 {
+       int r;
+       ev_arg_t ev_arg;
+
+       /* Check rate */
+       if (icmp_port->icp_rate_count >= ICMP_MAX_RATE)
+       {
+               /* Something is going wrong; check policy */
+               r= icmp_rate_limit(icmp_port, reply_ip_hdr);
+               if (r == -1)
+               {
+                       bf_afree(reply_ip_hdr);
+                       reply_ip_hdr= NULL;
+                       return;
+               }
+
+               /* OK, continue */
+       }
+       icmp_port->icp_rate_count++;
+
        reply_ip_hdr->acc_ext_link= 0;
 
        if (icmp_port->icp_head_queue)
@@ -593,25 +695,116 @@ acc_t *reply_ip_hdr;
        icmp_port->icp_tail_queue= reply_ip_hdr;
 
        if (!(icmp_port->icp_flags & ICPF_WRITE_IP))
-               icmp_write(icmp_port);
+       {
+               icmp_port->icp_flags |= ICPF_WRITE_IP;
+               ev_arg.ev_ptr= icmp_port;
+               ev_enqueue(&icmp_port->icp_event, icmp_write, ev_arg);
+       }
 }
 
-PRIVATE void icmp_write(icmp_port)
+PRIVATE int icmp_rate_limit(icmp_port, reply_ip_hdr)
 icmp_port_t *icmp_port;
+acc_t *reply_ip_hdr;
+{
+       time_t t;
+       acc_t *pack;
+       ip_hdr_t *ip_hdr;
+       icmp_hdr_t *icmp_hdr;
+       int hdrlen, icmp_hdr_len, type;
+
+       /* Check the time first */
+       t= get_time();
+       if (t >= icmp_port->icp_rate_lasttime + ICMP_RATE_INTERVAL)
+       {
+               icmp_port->icp_rate_lasttime= t;
+               icmp_port->icp_rate_count= 0;
+               return 0;
+       }
+
+       icmp_port->icp_rate_count++;
+
+       /* Adjust report limit if necessary */
+       if (icmp_port->icp_rate_count >
+               icmp_port->icp_rate_report+ICMP_RATE_WARN)
+       {
+               icmp_port->icp_rate_report *= 2;
+               return -1;
+       }
+
+       /* Do we need to report */
+       if (icmp_port->icp_rate_count < icmp_port->icp_rate_report)
+               return -1;
+
+       pack= bf_dupacc(reply_ip_hdr);
+       pack= bf_packIffLess(pack, IP_MIN_HDR_SIZE);
+       ip_hdr= (ip_hdr_t *)ptr2acc_data(pack);
+       printf("icmp[%d]: dropping ICMP packet #%d to ",
+               icmp_port->icp_ipport, icmp_port->icp_rate_count);
+       writeIpAddr(ip_hdr->ih_dst);
+       hdrlen= (ip_hdr->ih_vers_ihl & IH_IHL_MASK)*4;
+       pack= bf_packIffLess(pack, hdrlen+ICMP_MIN_HDR_SIZE);
+       ip_hdr= (ip_hdr_t *)ptr2acc_data(pack);
+       icmp_hdr= (icmp_hdr_t *)(ptr2acc_data(pack)+hdrlen);
+       type= icmp_hdr->ih_type;
+       printf(" type %d, code %d\n", type, icmp_hdr->ih_code);
+       switch(type)
+       {
+       case ICMP_TYPE_DST_UNRCH:
+       case ICMP_TYPE_SRC_QUENCH:
+       case ICMP_TYPE_REDIRECT:
+       case ICMP_TYPE_TIME_EXCEEDED:
+       case ICMP_TYPE_PARAM_PROBLEM:
+               icmp_hdr_len= offsetof(struct icmp_hdr, ih_dun);
+               pack= bf_packIffLess(pack,
+                       hdrlen+icmp_hdr_len+IP_MIN_HDR_SIZE);
+               ip_hdr= (ip_hdr_t *)(ptr2acc_data(pack)+hdrlen+icmp_hdr_len);
+               icmp_hdr= (icmp_hdr_t *)(ptr2acc_data(pack)+hdrlen);
+               printf("\tinfo %08x, original dst ",
+                       ntohs(icmp_hdr->ih_hun.ihh_unused));
+               writeIpAddr(ip_hdr->ih_dst);
+               printf(", proto %d, length %u\n",
+                       ip_hdr->ih_proto, ntohs(ip_hdr->ih_length));
+               break;
+       default:
+               break;
+       }
+       bf_afree(pack); pack= NULL;
+
+       return -1;
+}
+
+PRIVATE void icmp_write(ev, ev_arg)
+event_t *ev;
+ev_arg_t ev_arg;
 {
        int result;
+       icmp_port_t *icmp_port;
+       acc_t *data;
+
+       icmp_port= ev_arg.ev_ptr;
+       assert(ev == &icmp_port->icp_event);
 
-assert (!(icmp_port->icp_flags & ICPF_WRITE_IP));
+       assert (icmp_port->icp_flags & ICPF_WRITE_IP);
+       assert (!(icmp_port->icp_flags & ICPF_WRITE_SP));
 
        while (icmp_port->icp_head_queue != NULL)
        {
-               assert(icmp_port->icp_write_pack == NULL);
-               icmp_port->icp_write_pack= icmp_port->icp_head_queue;
-               icmp_port->icp_head_queue= icmp_port->icp_head_queue->
-                       acc_ext_link;
+               data= icmp_port->icp_head_queue;
+               icmp_port->icp_head_queue= data->acc_ext_link;
 
-               icmp_port->icp_flags |= ICPF_WRITE_IP;
+               result= ip_send(icmp_port->icp_ipfd, data,
+                       bf_bufsize(data));
+               if (result != NW_WOULDBLOCK)
+               {
+                       if (result == NW_OK)
+                               continue;
+                       DBLOCK(1, printf("icmp_write: error %d\n", result););
+                       continue;
+               }
 
+               assert(icmp_port->icp_write_pack == NULL);
+               icmp_port->icp_write_pack= data;
+                       
                result= ip_write(icmp_port->icp_ipfd,
                        bf_bufsize(icmp_port->icp_write_pack));
                if (result == NW_SUSPEND)
@@ -619,8 +812,8 @@ assert (!(icmp_port->icp_flags & ICPF_WRITE_IP));
                        icmp_port->icp_flags |= ICPF_WRITE_SP;
                        return;
                }
-               icmp_port->icp_flags &= ~ICPF_WRITE_IP;
        }
+       icmp_port->icp_flags &= ~ICPF_WRITE_IP;
 }
 
 PRIVATE void icmp_buffree(priority)
@@ -679,6 +872,8 @@ icmp_hdr_t *icmp_hdr;
        ip_hdr_t *old_ip_hdr;
        int ip_port_nr;
        ipaddr_t dst, mask;
+       size_t old_pack_size;
+       u16_t new_mtu;
 
        if (icmp_len < 8 + IP_MIN_HDR_SIZE)
        {
@@ -715,6 +910,24 @@ icmp_hdr_t *icmp_hdr;
                 * It should be handed to the appropriate transport layer.
                 */
                break;
+       case ICMP_FRAGM_AND_DF:
+
+               DBLOCK(1, printf("icmp_dst_unreach: got mtu icmp from ");
+                       writeIpAddr(ip_hdr->ih_src);
+                       printf("; original destination: ");
+                       writeIpAddr(old_ip_hdr->ih_dst);
+                       printf("; protocol: %d\n",
+                       old_ip_hdr->ih_proto));
+               old_pack_size= ntohs(old_ip_hdr->ih_length);
+               if (!old_pack_size)
+                       break;
+               new_mtu= ntohs(icmp_hdr->ih_hun.ihh_mtu.im_mtu);
+               if (!new_mtu || new_mtu > old_pack_size)
+                       new_mtu= old_pack_size-1;
+               ipr_mtu(ip_port_nr, old_ip_hdr->ih_dst, new_mtu,
+                       IPR_MTU_TIMEOUT);
+               break;
+
        default:
                DBLOCK(1, printf("icmp_dst_unreach: got strange code %d from ",
                        icmp_hdr->ih_code);
@@ -785,9 +998,12 @@ icmp_hdr_t *icmp_hdr;
 {
        int entries;
        int entry_size;
+       u32_t addr;
+       i32_t pref;
        u16_t lifetime;
        int i;
        char *bufp;
+       ip_port_t *ip_port;
 
        if (icmp_len < 8)
        {
@@ -836,12 +1052,15 @@ icmp_hdr_t *icmp_hdr;
                        lifetime));
                return;
        }
+       ip_port= &ip_port_table[icmp_port->icp_ipport];
        for (i= 0, bufp= (char *)&icmp_hdr->ih_dun.uhd_data[0]; i< entries; i++,
                bufp += entry_size)
        {
+               addr= *(ipaddr_t *)bufp;
+               pref= ntohl(*(u32_t *)(bufp+4));
                ipr_add_oroute(icmp_port->icp_ipport, HTONL(0L), HTONL(0L), 
-                       *(ipaddr_t *)bufp, lifetime * HZ, 1, 0, 
-                       ntohl(*(i32_t *)(bufp+4)), NULL);
+                       addr, lifetime ? lifetime * HZ : 1,
+                       1, 0, 0, pref, NULL);
        }
 }
                
@@ -890,55 +1109,78 @@ icmp_hdr_t *icmp_hdr;
        bf_afree(old_ip_pack);
 }
 
-PRIVATE acc_t *icmp_err_pack(pack, icmp_hdr)
+PRIVATE acc_t *icmp_err_pack(pack, icmp_hdr_pp)
 acc_t *pack;
-icmp_hdr_t **icmp_hdr;
+icmp_hdr_t **icmp_hdr_pp;
 {
        ip_hdr_t *ip_hdr;
+       icmp_hdr_t *icmp_hdr_p;
        acc_t *ip_pack, *icmp_pack, *tmp_pack;
-       int ip_hdr_len, icmp_hdr_len;
-       size_t size;
+       int ip_hdr_len, icmp_hdr_len, ih_type;
+       size_t size, pack_len;
        ipaddr_t dest, netmask;
        nettype_t nettype;
 
        pack= bf_packIffLess(pack, IP_MIN_HDR_SIZE);
        ip_hdr= (ip_hdr_t *)ptr2acc_data(pack);
+       ip_hdr_len= (ip_hdr->ih_vers_ihl & IH_IHL_MASK) << 2;
+       pack_len= bf_bufsize(pack);
 
-       /* If the IP protocol is ICMP or the fragment offset is non-zero,
+       /* If the IP protocol is ICMP (except echo request/reply) or the
+        * fragment offset is non-zero,
         * drop the packet. Also check if the source address is valid.
         */
-       if (ip_hdr->ih_proto == IPPROTO_ICMP || 
-               (ntohs(ip_hdr->ih_flags_fragoff) & IH_FRAGOFF_MASK) != 0)
+       if ((ntohs(ip_hdr->ih_flags_fragoff) & IH_FRAGOFF_MASK) != 0)
        {
                bf_afree(pack);
                return NULL;
        }
+       if (ip_hdr->ih_proto == IPPROTO_ICMP)
+       {
+               if (ip_hdr_len>IP_MIN_HDR_SIZE)
+               {
+                       pack= bf_packIffLess(pack, ip_hdr_len);
+                       ip_hdr= (ip_hdr_t *)ptr2acc_data(pack);
+               }
+
+               if (pack_len < ip_hdr_len+ICMP_MIN_HDR_SIZE)
+               {
+                       bf_afree(pack);
+                       return NULL;
+               }
+               icmp_pack= bf_cut(pack, ip_hdr_len, ICMP_MIN_HDR_SIZE);
+               icmp_pack= bf_packIffLess (icmp_pack, ICMP_MIN_HDR_SIZE);
+               icmp_hdr_p= (icmp_hdr_t *)ptr2acc_data(icmp_pack);
+               ih_type= icmp_hdr_p->ih_type;
+               bf_afree(icmp_pack); icmp_pack= NULL;
+
+               if (ih_type != ICMP_TYPE_ECHO_REQ &&
+                       ih_type != ICMP_TYPE_ECHO_REPL)
+               {
+                       bf_afree(pack);
+                       return NULL;
+               }
+       }
        dest= ip_hdr->ih_src;
        nettype= ip_nettype(dest);
        netmask= ip_netmask(nettype);
-       if ((nettype != IPNT_CLASS_A && nettype != IPNT_LOCAL &&
-               nettype != IPNT_CLASS_B && nettype != IPNT_CLASS_C) ||
-               (dest & ~netmask) == 0 || (dest & ~netmask) == ~netmask)
+       if (nettype != IPNT_CLASS_A && nettype != IPNT_LOCAL &&
+               nettype != IPNT_CLASS_B && nettype != IPNT_CLASS_C)
        {
-#if !CRAMPED
                printf("icmp_err_pack: invalid source address: ");
                writeIpAddr(dest);
                printf("\n");
-#endif
                bf_afree(pack);
                return NULL;
        }
 
        /* Take the IP header and the first 64 bits of user data. */
        size= ntohs(ip_hdr->ih_length);
-       ip_hdr_len= (ip_hdr->ih_vers_ihl & IH_IHL_MASK) << 2;
-       if (size < ip_hdr_len || bf_bufsize(pack) < size)
+       if (size < ip_hdr_len || pack_len < size)
        {
-#if !CRAMPED
                printf("icmp_err_pack: wrong packet size:\n");
                printf("\thdrlen= %d, ih_length= %d, bufsize= %d\n",
-                       ip_hdr_len, size, bf_bufsize(pack));
-#endif
+                       ip_hdr_len, size, pack_len);
                bf_afree(pack);
                return NULL;
        }
@@ -955,12 +1197,13 @@ icmp_hdr_t **icmp_hdr;
        pack= bf_append(icmp_pack, pack);
        size += icmp_hdr_len;
        pack= bf_packIffLess(pack, icmp_hdr_len);
-       *icmp_hdr= (icmp_hdr_t *)ptr2acc_data(pack);
-       (*icmp_hdr)->ih_type= 0;
-       (*icmp_hdr)->ih_code= 0;
-       (*icmp_hdr)->ih_chksum= 0;
-       (*icmp_hdr)->ih_hun.ihh_unused= 0;
-       (*icmp_hdr)->ih_chksum= ~icmp_pack_oneCsum(pack);
+       icmp_hdr_p= (icmp_hdr_t *)ptr2acc_data(pack);
+       icmp_hdr_p->ih_type= 0;
+       icmp_hdr_p->ih_code= 0;
+       icmp_hdr_p->ih_chksum= 0;
+       icmp_hdr_p->ih_hun.ihh_unused= 0;
+       icmp_hdr_p->ih_chksum= ~icmp_pack_oneCsum(pack);
+       *icmp_hdr_pp= icmp_hdr_p;
 
        /* Create an IP header */
        ip_hdr_len= IP_MIN_HDR_SIZE;
@@ -982,5 +1225,5 @@ icmp_hdr_t **icmp_hdr;
 }
 
 /*
- * $PchId: icmp.c,v 1.8 1996/12/17 07:53:34 philip Exp $
+ * $PchId: icmp.c,v 1.23 2005/06/28 14:16:56 philip Exp $
  */
index 7bd6b0b6b96add4b09a4d3e9d1865e9858bda74b..12fe3b954e63faf5d92bfbfe3af2d098d18d46cc 100644 (file)
@@ -8,7 +8,14 @@ Copyright 1995 Philip Homburg
 #define ICMP_H
 
 #define ICMP_MAX_DATAGRAM      8196
-#define ICMP_DEF_TTL           60
+#define ICMP_DEF_TTL           96
+
+/* Rate limit. The implementation is a bit sloppy and may send twice the
+ * number of packets. 
+ */
+#define ICMP_MAX_RATE          100     /* This many per interval */
+#define ICMP_RATE_INTERVAL     (1*HZ)  /* Interval in ticks */
+#define ICMP_RATE_WARN         10      /* Report this many dropped packets */
 
 /* Prototypes */
 
@@ -19,5 +26,5 @@ void icmp_init ARGS(( void ));
 #endif /* ICMP_H */
 
 /*
- * $PchId: icmp.h,v 1.4 1995/11/21 06:45:27 philip Exp $
+ * $PchId: icmp.h,v 1.7 2001/04/19 19:06:18 philip Exp $
  */
index 9174f4cb20faf140ccdb5fa9b8d6e3b36c8f6c8b..7501f1a7aecdadf00a81834b18245912fbbe9eb3 100644 (file)
@@ -16,9 +16,10 @@ void icmp_snd_time_exceeded ARGS(( int port_nr, acc_t *pack, int code ));
 void icmp_snd_unreachable ARGS(( int port_nr, acc_t *pack, int code ));
 void icmp_snd_redirect ARGS(( int port_nr, acc_t *pack, int code,
                                                        ipaddr_t gw ));
+void icmp_snd_mtu ARGS(( int port_nr, acc_t *pack, U16_t mtu ));
 
 #endif /* ICMP_LIB_H */
 
 /*
- * $PchId: icmp_lib.h,v 1.5 1996/12/17 07:54:09 philip Exp $
+ * $PchId: icmp_lib.h,v 1.6 2002/06/08 21:32:44 philip Exp $
  */
index df641b3ededdfdf38ef35fe12c67f40e564dcb52..13c252ca1b4aaca2b82177b5ac08e54d8edaea20 100644 (file)
@@ -30,5 +30,5 @@ ether_addr_t *addr;
 }
 
 /*
- * $PchId: io.c,v 1.5 1995/11/21 06:45:27 philip Exp $
+ * $PchId: io.c,v 1.6 1998/10/23 20:24:34 philip Exp $
  */
index f04bac6ec25f3e8de4b2ec2e83a6737f486af097..11771c987948c90f343bbce0e754041b2b292b3c 100644 (file)
@@ -25,6 +25,7 @@ THIS_FILE
 
 FORWARD void ip_close ARGS(( int fd ));
 FORWARD int ip_cancel ARGS(( int fd, int which_operation ));
+FORWARD int ip_select ARGS(( int fd, unsigned operations ));
 
 FORWARD void ip_buffree ARGS(( int priority ));
 #ifdef BUF_CONSISTENCY_CHECK
@@ -55,7 +56,6 @@ PUBLIC void ip_init()
        assert (BUF_S >= sizeof(nwio_ipopt_t));
        assert (BUF_S >= sizeof(nwio_route_t));
 
-#if ZERO
        for (i=0, ip_ass= ip_ass_table; i<IP_ASS_NR; i++, ip_ass++)
        {
                ip_ass->ia_frags= 0;
@@ -68,20 +68,18 @@ PUBLIC void ip_init()
                ip_fd->if_flags= IFF_EMPTY;
                ip_fd->if_rdbuf_head= 0;
        }
-#endif
 
        for (i=0, ip_port= ip_port_table, icp= ip_conf;
                i<ip_conf_nr; i++, ip_port++, icp++)
        {
                ip_port->ip_port= i;
-#if ZERO
                ip_port->ip_flags= IPF_EMPTY;
-#endif
                ip_port->ip_dev_main= (ip_dev_t)ip_bad_callback;
                ip_port->ip_dev_set_ipaddr= (ip_dev_t)ip_bad_callback;
                ip_port->ip_dev_send= (ip_dev_send_t)ip_bad_callback;
                ip_port->ip_dl_type= icp->ic_devtype;
-               ip_port->ip_mss= IP_DEF_MSS;
+               ip_port->ip_mtu= IP_DEF_MTU;
+               ip_port->ip_mtu_max= IP_MAX_PACKSIZE;
 
                switch(ip_port->ip_dl_type)
                {
@@ -92,7 +90,6 @@ PUBLIC void ip_init()
                                continue;
                        assert(result == NW_OK);
                        break;
-#if ENABLE_PSIP
                case IPDL_PSIP:
                        ip_port->ip_dl.dl_ps.ps_port= icp->ic_port;
                        result= ipps_init(ip_port);
@@ -100,24 +97,21 @@ PUBLIC void ip_init()
                                continue;
                        assert(result == NW_OK);
                        break;
-#endif
-#if !CRAMPED
                default:
                        ip_panic(( "unknown ip_dl_type %d", 
                                                        ip_port->ip_dl_type ));
-#endif
+                       break;
                }
-#if ZERO
                ip_port->ip_loopb_head= NULL;
                ip_port->ip_loopb_tail= NULL;
                ev_init(&ip_port->ip_loopb_event);
-#endif
+               ip_port->ip_routeq_head= NULL;
+               ip_port->ip_routeq_tail= NULL;
+               ev_init(&ip_port->ip_routeq_event);
                ip_port->ip_flags |= IPF_CONFIGURED;
-#if ZERO
                ip_port->ip_proto_any= NULL;
                for (j= 0; j<IP_PROTO_HASH_NR; j++)
                        ip_port->ip_proto[j]= NULL;
-#endif
        }
 
 #ifndef BUF_CONSISTENCY_CHECK
@@ -137,7 +131,7 @@ PUBLIC void ip_init()
 
                sr_add_minor(if2minor(ip_conf[i].ic_ifno, IP_DEV_OFF),
                        i, ip_open, ip_close, ip_read,
-                       ip_write, ip_ioctl, ip_cancel);
+                       ip_write, ip_ioctl, ip_cancel, ip_select);
 
                (*ip_port->ip_dev_main)(ip_port);
        }
@@ -156,8 +150,8 @@ int which_operation;
        switch (which_operation)
        {
        case SR_CANCEL_IOCTL:
-               assert (ip_fd->if_flags & IFF_GIPCONF_IP);
-               ip_fd->if_flags &= ~IFF_GIPCONF_IP;
+               assert (ip_fd->if_flags & IFF_IOCTL_IP);
+               ip_fd->if_flags &= ~IFF_IOCTL_IP;
                repl_res= (*ip_fd->if_get_userdata)(ip_fd->if_srfd, 
                        (size_t)EINTR, (size_t)0, TRUE);
                assert (!repl_res);
@@ -179,21 +173,29 @@ int which_operation;
                assert (!repl_res);
                break;
 #endif
-#if !CRAMPED
        default:
                ip_panic(( "unknown cancel request" ));
-#endif
+               break;
        }
        return NW_OK;
 }
 
+PRIVATE int ip_select(fd, operations)
+int fd;
+unsigned operations;
+{
+       printf("ip_select: not implemented\n");
+       return 0;
+}
 
-PUBLIC int ip_open (port, srfd, get_userdata, put_userdata, put_pkt)
+PUBLIC int ip_open (port, srfd, get_userdata, put_userdata, put_pkt,
+       select_res)
 int port;
 int srfd;
 get_userdata_t get_userdata;
 put_userdata_t put_userdata;
 put_pkt_t put_pkt;
+select_res_t select_res;
 {
        int i;
        ip_fd_t *ip_fd;
@@ -228,6 +230,7 @@ put_pkt_t put_pkt;
        ip_fd->if_get_userdata= get_userdata;
        ip_fd->if_put_userdata= put_userdata;
        ip_fd->if_put_pkt= put_pkt;
+
        return i;
 }
 
@@ -295,7 +298,7 @@ int priority;
                        if (priority == IP_PRI_PORTBUFS)
                        {
                                next_pack= ip_port->ip_dl.dl_ps.ps_send_head;
-                               while(next_pack != NULL)
+                               while (next_pack != NULL)
                                {
                                        pack= next_pack;
                                        next_pack= pack->acc_ext_link;
@@ -317,7 +320,7 @@ int priority;
                        {
                                if (ev_in_queue(&ip_port->ip_loopb_event))
                                {
-#if !CRAMPED
+#if DEBUG
                                        printf(
 "not freeing ip_loopb_head, ip_loopb_event enqueued\n");
 #endif
@@ -329,6 +332,30 @@ int priority;
                                }
                        }
                        ip_port->ip_loopb_head= next_pack;
+
+                       next_pack= ip_port->ip_routeq_head;
+                       while(next_pack && next_pack->acc_ext_link)
+                       {
+                               pack= next_pack;
+                               next_pack= pack->acc_ext_link;
+                               bf_afree(pack);
+                       }
+                       if (next_pack)
+                       {
+                               if (ev_in_queue(&ip_port->ip_routeq_event))
+                               {
+#if DEBUG
+                                       printf(
+"not freeing ip_loopb_head, ip_routeq_event enqueued\n");
+#endif
+                               }
+                               else
+                               {
+                                       bf_afree(next_pack);
+                                       next_pack= NULL;
+                               }
+                       }
+                       ip_port->ip_routeq_head= next_pack;
                }
        }
        if (priority == IP_PRI_FDBUFS_EXTRA)
@@ -410,6 +437,11 @@ PRIVATE void ip_bufcheck()
                {
                        bf_check_acc(pack);
                }
+               for (pack= ip_port->ip_routeq_head; pack;
+                       pack= pack->acc_ext_link)
+               {
+                       bf_check_acc(pack);
+               }
        }
        for (i= 0, ip_fd= ip_fd_table; i<IP_FD_NR; i++, ip_fd++)
        {
@@ -430,11 +462,9 @@ PRIVATE void ip_bufcheck()
 PRIVATE void ip_bad_callback(ip_port)
 struct ip_port *ip_port;
 {
-#if !CRAMPED
        ip_panic(( "no callback filled in for port %d", ip_port->ip_port ));
-#endif
 }
 
 /*
- * $PchId: ip.c,v 1.7 1996/12/17 07:54:47 philip Exp $
+ * $PchId: ip.c,v 1.19 2005/06/28 14:17:40 philip Exp $
  */
index 927eced83ac8153b44bbd2fb473bfc720b38d056..8d30f8723a720f0193eee88b0682f067f0cf0418 100644 (file)
@@ -15,7 +15,7 @@ void ip_prep ARGS(( void ));
 void ip_init ARGS(( void ));
 int  ip_open ARGS(( int port, int srfd,
        get_userdata_t get_userdata, put_userdata_t put_userdata,
-       put_pkt_t put_pkt ));
+       put_pkt_t put_pkt, select_res_t select_res ));
 int ip_ioctl ARGS(( int fd, ioreq_t req ));
 int ip_read ARGS(( int fd, size_t count ));
 int ip_write ARGS(( int fd, size_t count ));
@@ -24,5 +24,5 @@ int ip_send ARGS(( int fd, struct acc *data, size_t data_len ));
 #endif /* INET_IP_H */
 
 /*
- * $PchId: ip.h,v 1.6 1996/05/07 20:49:28 philip Exp $
+ * $PchId: ip.h,v 1.8 2005/06/28 14:17:57 philip Exp $
  */
index d05d77758585468b359194099778f5310b35c210..b94633f148ba499817c07bf6c9cd81fc0a006450 100644 (file)
@@ -16,6 +16,8 @@ Copyright 1995 Philip Homburg
 #include "clock.h"
 #include "eth.h"
 #include "event.h"
+#include "icmp_lib.h"
+#include "io.h"
 #include "ip.h"
 #include "ip_int.h"
 
@@ -27,8 +29,14 @@ typedef struct xmit_hdr
        ipaddr_t xh_ipaddr;
 } xmit_hdr_t;
 
-PRIVATE ether_addr_t broadcast_ethaddr= { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
-PRIVATE ipaddr_t broadcast_ipaddr= 0xFFFFFFFFL;
+PRIVATE ether_addr_t broadcast_ethaddr=
+{
+       { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }
+};
+PRIVATE ether_addr_t ipmulticast_ethaddr=
+{
+       { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 }
+};
 
 FORWARD void do_eth_read ARGS(( ip_port_t *port ));
 FORWARD acc_t *get_eth_data ARGS(( int fd, size_t offset,
@@ -39,7 +47,7 @@ FORWARD void ipeth_main ARGS(( ip_port_t *port ));
 FORWARD void ipeth_set_ipaddr ARGS(( ip_port_t *port ));
 FORWARD void ipeth_restart_send ARGS(( ip_port_t *ip_port ));
 FORWARD int ipeth_send ARGS(( struct ip_port *ip_port, ipaddr_t dest, 
-       acc_t *pack, int broadcast ));
+       acc_t *pack, int type ));
 FORWARD void ipeth_arp_reply ARGS(( int ip_port_nr, ipaddr_t ipaddr,
        ether_addr_t *dst_ether_ptr ));
 FORWARD int ipeth_update_ttl ARGS(( time_t enq_time, time_t now,
@@ -56,7 +64,8 @@ ip_port_t *ip_port;
 
        ip_port->ip_dl.dl_eth.de_fd= eth_open(ip_port->
                ip_dl.dl_eth.de_port, ip_port->ip_port,
-               get_eth_data, put_eth_data, ip_eth_arrived);
+               get_eth_data, put_eth_data, ip_eth_arrived,
+               0 /* no select_res */);
        if (ip_port->ip_dl.dl_eth.de_fd < 0)
        {
                DBLOCK(1, printf("ip.c: unable to open eth port\n"));
@@ -71,15 +80,15 @@ ip_port_t *ip_port;
        ip_port->ip_dev_main= ipeth_main;
        ip_port->ip_dev_set_ipaddr= ipeth_set_ipaddr;
        ip_port->ip_dev_send= ipeth_send;
-       ip_port->ip_mss= ETH_MAX_PACK_SIZE-ETH_HDR_SIZE;
+       ip_port->ip_mtu= ETH_MAX_PACK_SIZE-ETH_HDR_SIZE;
+       ip_port->ip_mtu_max= ip_port->ip_mtu;
        return 0;
 }
 
 PRIVATE void ipeth_main(ip_port)
 ip_port_t *ip_port;
 {
-       int result, i;
-       ip_fd_t *ip_fd;
+       int result;
 
        switch (ip_port->ip_dl.dl_eth.de_state)
        {
@@ -91,8 +100,8 @@ ip_port_t *ip_port;
                        ip_port->ip_dl.dl_eth.de_flags |= IEF_SUSPEND;
                if (result<0)
                {
-                       DBLOCK(1, printf("eth_ioctl(..,%lx)=%d\n",
-                               NWIOSETHOPT, result));
+                       DBLOCK(1, printf("eth_ioctl(..,0x%lx)=%d\n",
+                               (unsigned long)NWIOSETHOPT, result));
                        return;
                }
                if (ip_port->ip_dl.dl_eth.de_state != IES_SETPROTO)
@@ -104,10 +113,8 @@ ip_port_t *ip_port;
                        ipeth_arp_reply);
                if (result != NW_OK)
                {
-#if !CRAMPED
                        printf("ipeth_main: arp_set_cb failed: %d\n",
                                result);
-#endif
                        return;
                }
 
@@ -122,28 +129,11 @@ ip_port_t *ip_port;
                /* fall through */
        case IES_GETIPADDR:
                ip_port->ip_dl.dl_eth.de_state= IES_MAIN;
-               for (i=0, ip_fd= ip_fd_table; i<IP_FD_NR; i++, ip_fd++)
-               {
-                       if (!(ip_fd->if_flags & IFF_INUSE))
-                       {
-                               continue;
-                       }
-                       if (ip_fd->if_port != ip_port)
-                       {
-                               continue;
-                       }
-                       if (ip_fd->if_flags & IFF_GIPCONF_IP)
-                       {
-                               ip_ioctl (i, NWIOGIPCONF);
-                       }
-               }
                do_eth_read(ip_port);
                return;
-#if !CRAMPED
        default:
                ip_panic(( "unknown state: %d",
                        ip_port->ip_dl.dl_eth.de_state));
-#endif
        }
 }
 
@@ -208,11 +198,9 @@ int for_ioctl;
                assert (data);
                return data;
        default:
-#if !CRAMPED
                printf(
                "get_eth_data(%d, 0x%d, 0x%d) called but ip_state=0x%x\n",
                        fd, offset, count, ip_port->ip_dl.dl_eth.de_state);
-#endif
                break;
        }
        return 0;
@@ -225,7 +213,6 @@ acc_t *data;
 int for_ioctl;
 {
        ip_port_t *ip_port;
-       acc_t *pack;
        int result;
 
        ip_port= &ip_port_table[port];
@@ -261,11 +248,9 @@ int for_ioctl;
                ip_eth_arrived(port, data, bf_bufsize(data));
                return NW_OK;
        }
-#if !CRAMPED
        printf("ip_port->ip_dl.dl_eth.de_state= 0x%x",
                ip_port->ip_dl.dl_eth.de_state);
        ip_panic (( "strange status" ));
-#endif
 }
 
 PRIVATE void ipeth_set_ipaddr(ip_port)
@@ -276,19 +261,20 @@ ip_port_t *ip_port;
                ipeth_main(ip_port);
 }
 
-PRIVATE int ipeth_send(ip_port, dest, pack, broadcast)
+PRIVATE int ipeth_send(ip_port, dest, pack, type)
 struct ip_port *ip_port;
 ipaddr_t dest;
 acc_t *pack;
-int broadcast;
+int type;
 {
-       int r;
+       int i, r;
        acc_t *eth_pack, *tail;
        size_t pack_size;
        eth_hdr_t *eth_hdr;
        xmit_hdr_t *xmit_hdr;
-       ipaddr_t hostpart;
+       ipaddr_t hostpart, tmpaddr;
        time_t t;
+       u32_t *p;
 
        /* Start optimistic: the arp will succeed without blocking and the
         * ethernet packet can be sent without blocking also. Start with
@@ -301,26 +287,41 @@ int broadcast;
        if (pack_size<ETH_MIN_PACK_SIZE)
        {
                tail= bf_memreq(ETH_MIN_PACK_SIZE-pack_size);
+
+               /* Clear padding */
+               for (i= (ETH_MIN_PACK_SIZE-pack_size)/sizeof(*p),
+                       p= (u32_t *)ptr2acc_data(tail);
+                       i >= 0; i--, p++)
+               {
+                       *p= 0xdeadbeef;
+               }
+
                eth_pack= bf_append(eth_pack, tail);
        }
        eth_hdr= (eth_hdr_t *)ptr2acc_data(eth_pack);
 
        /* Lookup the ethernet address */
-       if (broadcast)
-               eth_hdr->eh_dst= broadcast_ethaddr;
+       if (type != IP_LT_NORMAL)
+       {
+               if (type == IP_LT_BROADCAST)
+                       eth_hdr->eh_dst= broadcast_ethaddr;
+               else
+               {
+                       tmpaddr= ntohl(dest);
+                       eth_hdr->eh_dst= ipmulticast_ethaddr;
+                       eth_hdr->eh_dst.ea_addr[5]= tmpaddr & 0xff;
+                       eth_hdr->eh_dst.ea_addr[4]= (tmpaddr >> 8) & 0xff;
+                       eth_hdr->eh_dst.ea_addr[3]= (tmpaddr >> 16) & 0x7f;
+               }
+       }
        else
        {
-               if ((dest & ip_port->ip_subnetmask) != 
-                       (ip_port->ip_ipaddr & ip_port->ip_subnetmask))
+               if ((dest ^ ip_port->ip_ipaddr) & ip_port->ip_subnetmask)
                {
-#if !CRAMPED
                        ip_panic(( "invalid destination" ));
-#endif
                }
 
                hostpart= (dest & ~ip_port->ip_subnetmask);
-
-               assert(hostpart != 0);
                assert(dest != ip_port->ip_ipaddr);
 
                r= arp_ip_eth(ip_port->ip_dl.dl_eth.de_port,
@@ -354,12 +355,12 @@ int broadcast;
        }
 
        /* If we have no write in progress, we can try to send the ethernet
-        * packet using eth_send. If the IP packet is larger than mss,
-        * unqueue the packet and let ipeth_restart_send deal with it. 
+        * packet using eth_send. If the IP packet is larger than mtu,
+        * enqueue the packet and let ipeth_restart_send deal with it. 
         */
        pack_size= bf_bufsize(eth_pack);
        if (ip_port->ip_dl.dl_eth.de_frame == NULL && pack_size <=
-               ip_port->ip_mss + sizeof(*eth_hdr))
+               ip_port->ip_mtu + sizeof(*eth_hdr))
        {
                r= eth_send(ip_port->ip_dl.dl_eth.de_fd,
                        eth_pack, pack_size);
@@ -383,7 +384,7 @@ int broadcast;
        }
 
        /* Enqueue the packet, and store the current time, in the
-        * room for the ethernet source address.
+        * space for the ethernet source address.
         */
        t= get_time();
        assert(sizeof(t) <= sizeof(eth_hdr->eh_src));
@@ -406,10 +407,11 @@ PRIVATE void ipeth_restart_send(ip_port)
 ip_port_t *ip_port;
 {
        time_t now, enq_time;
-       int r;
+       int i, r;
        acc_t *eth_pack, *ip_pack, *next_eth_pack, *next_part, *tail;
        size_t pack_size;
        eth_hdr_t *eth_hdr, *next_eth_hdr;
+       u32_t *p;
 
        now= get_time();
 
@@ -422,19 +424,22 @@ ip_port_t *ip_port;
 
                pack_size= bf_bufsize(eth_pack);
 
-               if (pack_size > ip_port->ip_mss+sizeof(*eth_hdr))
+               if (pack_size > ip_port->ip_mtu+sizeof(*eth_hdr))
                {
                        /* Split the IP packet */
-                       ip_pack= eth_pack->acc_next;
-                       next_part= ip_pack;
+                       assert(eth_pack->acc_linkC == 1);
+                       ip_pack= eth_pack->acc_next; eth_pack->acc_next= NULL;
+                       next_part= ip_pack; ip_pack= NULL;
                        ip_pack= ip_split_pack(ip_port, &next_part, 
-                                                       ip_port->ip_mss);
+                                                       ip_port->ip_mtu);
                        if (ip_pack == NULL)
                        {
                                bf_afree(eth_pack);
                                continue;
                        }
 
+                       eth_pack->acc_next= ip_pack; ip_pack= NULL;
+
                        /* Allocate new ethernet header */
                        next_eth_pack= bf_memreq(sizeof(*next_eth_hdr));
                        next_eth_hdr= (eth_hdr_t *)ptr2acc_data(next_eth_pack);
@@ -445,11 +450,12 @@ ip_port_t *ip_port;
                        if (ip_port->ip_dl.dl_eth.de_q_head == NULL)
                                ip_port->ip_dl.dl_eth.de_q_head= next_eth_pack;
                        else
+                       {
                                ip_port->ip_dl.dl_eth.de_q_tail->acc_ext_link= 
                                                                next_eth_pack;
+                       }
                        ip_port->ip_dl.dl_eth.de_q_tail= next_eth_pack;
 
-                       eth_pack->acc_next= ip_pack;
                        pack_size= bf_bufsize(eth_pack);
                }
 
@@ -459,8 +465,10 @@ ip_port_t *ip_port;
                        r= ipeth_update_ttl(enq_time, now, eth_pack);
                        if (r == ETIMEDOUT)
                        {       
-                               ip_warning(( "should send ICMP ttl exceded" ));
-                               bf_afree(eth_pack);
+                               ip_pack= bf_delhead(eth_pack, sizeof(*eth_hdr));
+                               eth_pack= NULL;
+                               icmp_snd_time_exceeded(ip_port->ip_port,
+                                       ip_pack, ICMP_TTL_EXC);
                                continue;
                        }
                        assert(r == NW_OK);
@@ -469,7 +477,17 @@ ip_port_t *ip_port;
                if (pack_size<ETH_MIN_PACK_SIZE)
                {
                        tail= bf_memreq(ETH_MIN_PACK_SIZE-pack_size);
+
+                       /* Clear padding */
+                       for (i= (ETH_MIN_PACK_SIZE-pack_size)/sizeof(*p),
+                               p= (u32_t *)ptr2acc_data(tail);
+                               i >= 0; i--, p++)
+                       {
+                               *p= 0xdeadbeef;
+                       }
+
                        eth_pack= bf_append(eth_pack, tail);
+                       pack_size= ETH_MIN_PACK_SIZE;
                }
 
                assert(ip_port->ip_dl.dl_eth.de_frame == NULL);
@@ -700,5 +718,5 @@ size_t pack_size;
 }
 
 /*
- * $PchId: ip_eth.c,v 1.9 1996/12/17 07:55:21 philip Exp $
+ * $PchId: ip_eth.c,v 1.25 2005/06/28 14:18:10 philip Exp $
  */
index d09932df3f393c6522baae3fe7ac5de37c2bf16f..a9bbfa7039b9993073395eb68788a0d0c4f0b031 100644 (file)
@@ -13,11 +13,15 @@ Copyright 1995 Philip Homburg
 #define IP_42BSD_BCAST         1       /* hostnumber 0 is also network
                                           broadcast */
 
+#define IP_LT_NORMAL           0       /* Normal */
+#define IP_LT_BROADCAST                1       /* Broadcast */
+#define IP_LT_MULTICAST                2       /* Multicast */
+
 struct ip_port;
 struct ip_fd;
 typedef void (*ip_dev_t) ARGS(( struct ip_port *ip_port ));
 typedef int (*ip_dev_send_t) ARGS(( struct ip_port *ip_port, ipaddr_t dest, 
-                                               acc_t *pack, int broadcast ));
+                                               acc_t *pack, int type ));
 
 #define IP_PROTO_HASH_NR       32
 
@@ -47,16 +51,20 @@ typedef struct ip_port
                } dl_ps;
        } ip_dl;
        ipaddr_t ip_ipaddr;
-       ipaddr_t ip_netmask;
        ipaddr_t ip_subnetmask;
+       ipaddr_t ip_classfulmask;
        u16_t ip_frame_id;
-       u16_t ip_mss;
+       u16_t ip_mtu;
+       u16_t ip_mtu_max;               /* Max MTU for this kind of network */
        ip_dev_t ip_dev_main;
        ip_dev_t ip_dev_set_ipaddr;
        ip_dev_send_t ip_dev_send;
        acc_t *ip_loopb_head;
        acc_t *ip_loopb_tail;
        event_t ip_loopb_event;
+       acc_t *ip_routeq_head;
+       acc_t *ip_routeq_tail;
+       event_t ip_routeq_event;
        struct ip_fd *ip_proto_any;
        struct ip_fd *ip_proto[IP_PROTO_HASH_NR];
 } ip_port_t;
@@ -73,10 +81,11 @@ typedef struct ip_port
 #define IEF_READ_SP    0x20
 #define IEF_WRITE_SP   0x80
 
-#define IPF_EMPTY      0x0
-#define IPF_CONFIGURED 0x1
-#define IPF_IPADDRSET  0x2
-#define IPF_NETMASKSET 0x4
+#define IPF_EMPTY              0x0
+#define IPF_CONFIGURED         0x1
+#define IPF_IPADDRSET          0x2
+#define IPF_NETMASKSET         0x4
+#define IPF_SUBNET_BCAST       0x8     /* Subset support subnet broadcasts  */
 
 #define IPDL_ETH       NETTYPE_ETH
 #define IPDL_PSIP      NETTYPE_PSIP
@@ -105,14 +114,15 @@ typedef struct ip_fd
        put_pkt_t if_put_pkt;
        time_t if_exp_time;
        size_t if_rd_count;
+       ioreq_t if_ioctl;
 } ip_fd_t;
 
-#define IFF_EMPTY      0x0
-#define IFF_INUSE      0x1
-#define IFF_OPTSET     0x2
-#define IFF_BUSY       0xC
-#      define IFF_READ_IP      0x4
-#      define IFF_GIPCONF_IP   0x8
+#define IFF_EMPTY      0x00
+#define IFF_INUSE      0x01
+#define IFF_OPTSET     0x02
+#define IFF_BUSY       0x1C
+#      define IFF_READ_IP      0x04
+#      define IFF_IOCTL_IP     0x08
 
 typedef enum nettype
 {
@@ -127,12 +137,15 @@ typedef enum nettype
        IPNT_BROADCAST          /* 255.255.255.255 */
 } nettype_t;
 
+struct nwio_ipconf;
+
 /* ip_eth.c */
 int ipeth_init ARGS(( ip_port_t *ip_port ));
 
 /* ip_ioctl.c */
 void ip_hash_proto ARGS(( ip_fd_t *ip_fd ));
 void ip_unhash_proto ARGS(( ip_fd_t *ip_fd ));
+int ip_setconf ARGS(( int ip_port, struct nwio_ipconf *ipconfp ));
 
 /* ip_lib.c */
 ipaddr_t ip_get_netmask ARGS(( ipaddr_t hostaddr ));
@@ -146,18 +159,19 @@ char *ip_nettoa ARGS(( nettype_t nettype ));
 /* ip_ps.c */
 int ipps_init ARGS(( ip_port_t *ip_port ));
 void ipps_get ARGS(( int ip_port_nr ));
-void ipps_put ARGS(( int ip_port_nr, acc_t *pack ));
+void ipps_put ARGS(( int ip_port_nr, ipaddr_t nexthop, acc_t *pack ));
 
 /* ip_read.c */
 void ip_port_arrive ARGS(( ip_port_t *port, acc_t *pack, ip_hdr_t *ip_hdr ));
 void ip_arrived ARGS(( ip_port_t *port, acc_t *pack ));
 void ip_arrived_broadcast ARGS(( ip_port_t *port, acc_t *pack ));
 void ip_process_loopb ARGS(( event_t *ev, ev_arg_t arg ));
+void ip_packet2user ARGS(( ip_fd_t *ip_fd, acc_t *pack, time_t exp_time,
+       size_t data_len ));
 
 /* ip_write.c */
 void dll_eth_write_frame ARGS(( ip_port_t *port ));
-acc_t *ip_split_pack ARGS(( ip_port_t *ip_port, acc_t **ref_last, 
-                                                       int first_size ));
+acc_t *ip_split_pack ARGS(( ip_port_t *ip_port, acc_t **ref_last, int mtu ));
 void ip_hdr_chksum ARGS(( ip_hdr_t *ip_hdr, int ip_hdr_len ));
 
 
@@ -171,5 +185,5 @@ extern ip_ass_t ip_ass_table[IP_ASS_NR];
 #endif /* INET_IP_INT_H */
 
 /*
- * $PchId: ip_int.h,v 1.6 1996/12/17 07:59:36 philip Exp $
+ * $PchId: ip_int.h,v 1.19 2004/08/03 16:24:23 philip Exp $
  */
index a65ede67a9439ec7de89ccec2ad44bd743e05293..bbfaf247a217658deaa4a4d6247c2c4bf9dc7d93 100644 (file)
@@ -22,6 +22,7 @@ THIS_FILE
 FORWARD int ip_checkopt ARGS(( ip_fd_t *ip_fd ));
 FORWARD void reply_thr_get ARGS(( ip_fd_t *ip_fd, size_t
        reply, int for_ioctl ));
+FORWARD void report_addr ARGS(( ip_port_t *ip_port ));
 
 PUBLIC int ip_ioctl (fd, req)
 int fd;
@@ -31,6 +32,7 @@ ioreq_t req;
        ip_port_t *ip_port;
        nwio_ipopt_t *ipopt;
        nwio_ipopt_t oldopt, newopt;
+       nwio_ipconf2_t *ipconf2;
        nwio_ipconf_t *ipconf;
        nwio_route_t *route_ent;
        acc_t *data;
@@ -38,8 +40,8 @@ ioreq_t req;
        unsigned int new_en_flags, new_di_flags,
                old_en_flags, old_di_flags;
        unsigned long new_flags;
-       int old_ip_flags;
-       int ent_no;
+       int ent_no, r;
+       nwio_ipconf_t ipconf_var;
 
        assert (fd>=0 && fd<=IP_FD_NR);
        ip_fd= &ip_fd_table[fd];
@@ -49,6 +51,16 @@ ioreq_t req;
        switch (req)
        {
        case NWIOSIPOPT:
+               ip_port= ip_fd->if_port;
+
+               if (!(ip_port->ip_flags & IPF_IPADDRSET))
+               {
+                       ip_fd->if_ioctl= NWIOSIPOPT;
+                       ip_fd->if_flags |= IFF_IOCTL_IP;
+                       return NW_SUSPEND;
+               }
+               ip_fd->if_flags &= ~IFF_IOCTL_IP;
+
                data= (*ip_fd->if_get_userdata)(ip_fd->if_srfd, 0,
                        sizeof(nwio_ipopt_t), TRUE);
 
@@ -167,70 +179,87 @@ ioreq_t req;
                return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, result, 
                                                        (acc_t *)0, TRUE);
 
+       case NWIOSIPCONF2:
        case NWIOSIPCONF:
                ip_port= ip_fd->if_port;
 
-               data= (*ip_fd->if_get_userdata)(ip_fd->if_srfd, 0, 
-                                               sizeof(nwio_ipconf_t), TRUE);
-
-               data= bf_packIffLess (data, sizeof(nwio_ipconf_t));
-               assert (data->acc_length == sizeof(nwio_ipconf_t));
-
-               old_ip_flags= ip_port->ip_flags;
-
-               ipconf= (nwio_ipconf_t *)ptr2acc_data(data);
-
-               if (ipconf->nwic_flags & ~NWIC_FLAGS)
+               if (req == NWIOSIPCONF2)
                {
-                       bf_afree(data);
-                       return (*ip_fd->if_put_userdata)(ip_fd-> if_srfd, 
-                                               EBADMODE, (acc_t *)0, TRUE);
+                       data= (*ip_fd->if_get_userdata)(ip_fd->if_srfd, 0, 
+                               sizeof(*ipconf2), TRUE);
+                       data= bf_packIffLess (data, sizeof(*ipconf2));
+                       assert (data->acc_length == sizeof(*ipconf2));
+
+                       ipconf2= (nwio_ipconf2_t *)ptr2acc_data(data);
+
+                       ipconf= &ipconf_var;
+                       ipconf->nwic_flags= ipconf2->nwic_flags;
+                       ipconf->nwic_ipaddr= ipconf2->nwic_ipaddr;
+                       ipconf->nwic_netmask= ipconf2->nwic_netmask;
+                       ipconf->nwic_flags &= ~NWIC_MTU_SET;
                }
-
-               if (ipconf->nwic_flags & NWIC_IPADDR_SET)
+               else
                {
-                       ip_port->ip_ipaddr= ipconf->nwic_ipaddr;
-                       ip_port->ip_flags |= IPF_IPADDRSET;
-                       ip_port->ip_netmask=
-                               ip_netmask(ip_nettype(ipconf->nwic_ipaddr));
-                       if (!(ip_port->ip_flags & IPF_NETMASKSET)) {
-                               ip_port->ip_subnetmask= ip_port->ip_netmask;
-                       }
-                       (*ip_port->ip_dev_set_ipaddr)(ip_port);
+                       data= (*ip_fd->if_get_userdata)(ip_fd->if_srfd, 0, 
+                               sizeof(*ipconf), TRUE);
+                       data= bf_packIffLess (data, sizeof(*ipconf));
+                       assert (data->acc_length == sizeof(*ipconf));
+
+                       ipconf= (nwio_ipconf_t *)ptr2acc_data(data);
                }
-               if (ipconf->nwic_flags & NWIC_NETMASK_SET)
+               r= ip_setconf(ip_port-ip_port_table, ipconf);
+               bf_afree(data);
+               return (*ip_fd->if_put_userdata)(ip_fd-> if_srfd, r, 
+                                                       (acc_t *)0, TRUE);
+
+       case NWIOGIPCONF2:
+               ip_port= ip_fd->if_port;
+
+               if (!(ip_port->ip_flags & IPF_IPADDRSET))
                {
-                       ip_port->ip_subnetmask= ipconf->nwic_netmask;
-                       ip_port->ip_flags |= IPF_NETMASKSET;
+                       ip_fd->if_ioctl= NWIOGIPCONF2;
+                       ip_fd->if_flags |= IFF_IOCTL_IP;
+                       return NW_SUSPEND;
                }
+               ip_fd->if_flags &= ~IFF_IOCTL_IP;
+               data= bf_memreq(sizeof(nwio_ipconf_t));
+               ipconf2= (nwio_ipconf2_t *)ptr2acc_data(data);
+               ipconf2->nwic_flags= NWIC_IPADDR_SET;
+               ipconf2->nwic_ipaddr= ip_port->ip_ipaddr;
+               ipconf2->nwic_netmask= ip_port->ip_subnetmask;
+               if (ip_port->ip_flags & IPF_NETMASKSET)
+                       ipconf2->nwic_flags |= NWIC_NETMASK_SET;
 
-               bf_afree(data);
-               return (*ip_fd->if_put_userdata)(ip_fd-> if_srfd, NW_OK, 
+               result= (*ip_fd->if_put_userdata)(ip_fd->if_srfd, 0, data, 
+                                                                       TRUE);
+               return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, result, 
                                                        (acc_t *)0, TRUE);
-
+       
        case NWIOGIPCONF:
                ip_port= ip_fd->if_port;
 
                if (!(ip_port->ip_flags & IPF_IPADDRSET))
                {
-                       ip_fd->if_flags |= IFF_GIPCONF_IP;
+                       ip_fd->if_ioctl= NWIOGIPCONF;
+                       ip_fd->if_flags |= IFF_IOCTL_IP;
                        return NW_SUSPEND;
                }
-               ip_fd->if_flags &= ~IFF_GIPCONF_IP;
-               data= bf_memreq(sizeof(nwio_ipconf_t));
+               ip_fd->if_flags &= ~IFF_IOCTL_IP;
+               data= bf_memreq(sizeof(*ipconf));
                ipconf= (nwio_ipconf_t *)ptr2acc_data(data);
                ipconf->nwic_flags= NWIC_IPADDR_SET;
                ipconf->nwic_ipaddr= ip_port->ip_ipaddr;
                ipconf->nwic_netmask= ip_port->ip_subnetmask;
                if (ip_port->ip_flags & IPF_NETMASKSET)
                        ipconf->nwic_flags |= NWIC_NETMASK_SET;
+               ipconf->nwic_mtu= ip_port->ip_mtu;
 
                result= (*ip_fd->if_put_userdata)(ip_fd->if_srfd, 0, data, 
                                                                        TRUE);
                return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, result, 
                                                        (acc_t *)0, TRUE);
-       
-       case NWIOGIPIROUTE:
+
+       case NWIOGIPOROUTE:
                data= (*ip_fd->if_get_userdata)(ip_fd->if_srfd,
                        0, sizeof(nwio_route_t), TRUE);
                if (data == NULL)
@@ -246,7 +275,7 @@ ioreq_t req;
 
                data= bf_memreq(sizeof(nwio_route_t));
                route_ent= (nwio_route_t *)ptr2acc_data(data);
-               result= ipr_get_iroute(ent_no, route_ent);
+               result= ipr_get_oroute(ent_no, route_ent);
                if (result < 0)
                        bf_afree(data);
                else
@@ -258,7 +287,7 @@ ioreq_t req;
                return (*ip_fd->if_put_userdata)(ip_fd->if_srfd,
                        result, (acc_t *)0, TRUE);
 
-       case NWIOSIPIROUTE:
+       case NWIOSIPOROUTE:
                data= (*ip_fd->if_get_userdata)(ip_fd->if_srfd,
                        0, sizeof(nwio_route_t), TRUE);
                if (data == NULL)
@@ -266,21 +295,47 @@ ioreq_t req;
                        return (*ip_fd->if_put_userdata)(ip_fd->if_srfd,
                                EFAULT, NULL, TRUE);
                }
+               if (!(ip_fd->if_port->ip_flags & IPF_IPADDRSET))
+               {
+                       /* Interface is down, no changes allowed */
+                       return (*ip_fd->if_put_userdata)(ip_fd->if_srfd,
+                               EINVAL, NULL, TRUE);
+               }
 
                data= bf_packIffLess (data, sizeof(nwio_route_t) );
                route_ent= (nwio_route_t *)ptr2acc_data(data);
-               result= ipr_add_iroute(ip_fd->if_port->ip_port, 
+               result= ipr_add_oroute(ip_fd->if_port-ip_port_table, 
+                       route_ent->nwr_dest, route_ent->nwr_netmask, 
+                       route_ent->nwr_gateway, (time_t)0, 
+                       route_ent->nwr_dist, route_ent->nwr_mtu,
+                       !!(route_ent->nwr_flags & NWRF_STATIC), 
+                       route_ent->nwr_pref, NULL);
+               bf_afree(data);
+
+               return (*ip_fd->if_put_userdata)(ip_fd->if_srfd,
+                       result, (acc_t *)0, TRUE);
+
+       case NWIODIPOROUTE:
+               data= (*ip_fd->if_get_userdata)(ip_fd->if_srfd,
+                       0, sizeof(nwio_route_t), TRUE);
+               if (data == NULL)
+               {
+                       return (*ip_fd->if_put_userdata)(ip_fd->if_srfd,
+                               EFAULT, NULL, TRUE);
+               }
+
+               data= bf_packIffLess (data, sizeof(nwio_route_t) );
+               route_ent= (nwio_route_t *)ptr2acc_data(data);
+               result= ipr_del_oroute(ip_fd->if_port-ip_port_table, 
                        route_ent->nwr_dest, route_ent->nwr_netmask, 
                        route_ent->nwr_gateway,
-                       (route_ent->nwr_flags & NWRF_UNREACHABLE) ? 
-                               IRTD_UNREACHABLE : route_ent->nwr_dist,
-                       !!(route_ent->nwr_flags & NWRF_STATIC), NULL);
+                       !!(route_ent->nwr_flags & NWRF_STATIC));
                bf_afree(data);
 
                return (*ip_fd->if_put_userdata)(ip_fd->if_srfd,
                        result, (acc_t *)0, TRUE);
 
-       case NWIOGIPOROUTE:
+       case NWIOGIPIROUTE:
                data= (*ip_fd->if_get_userdata)(ip_fd->if_srfd,
                        0, sizeof(nwio_route_t), TRUE);
                if (data == NULL)
@@ -296,7 +351,7 @@ ioreq_t req;
 
                data= bf_memreq(sizeof(nwio_route_t));
                route_ent= (nwio_route_t *)ptr2acc_data(data);
-               result= ipr_get_oroute(ent_no, route_ent);
+               result= ipr_get_iroute(ent_no, route_ent);
                if (result < 0)
                        bf_afree(data);
                else
@@ -308,7 +363,7 @@ ioreq_t req;
                return (*ip_fd->if_put_userdata)(ip_fd->if_srfd,
                        result, (acc_t *)0, TRUE);
 
-       case NWIODIPIROUTE:
+       case NWIOSIPIROUTE:
                data= (*ip_fd->if_get_userdata)(ip_fd->if_srfd,
                        0, sizeof(nwio_route_t), TRUE);
                if (data == NULL)
@@ -316,21 +371,28 @@ ioreq_t req;
                        return (*ip_fd->if_put_userdata)(ip_fd->if_srfd,
                                EFAULT, NULL, TRUE);
                }
+               if (!(ip_fd->if_port->ip_flags & IPF_IPADDRSET))
+               {
+                       /* Interface is down, no changes allowed */
+                       return (*ip_fd->if_put_userdata)(ip_fd->if_srfd,
+                               EINVAL, NULL, TRUE);
+               }
 
                data= bf_packIffLess (data, sizeof(nwio_route_t) );
                route_ent= (nwio_route_t *)ptr2acc_data(data);
-               result= ipr_del_iroute(ip_fd->if_port->ip_port
+               result= ipr_add_iroute(ip_fd->if_port-ip_port_table
                        route_ent->nwr_dest, route_ent->nwr_netmask, 
                        route_ent->nwr_gateway,
                        (route_ent->nwr_flags & NWRF_UNREACHABLE) ? 
                                IRTD_UNREACHABLE : route_ent->nwr_dist,
-                       !!(route_ent->nwr_flags & NWRF_STATIC));
+                       route_ent->nwr_mtu,
+                       !!(route_ent->nwr_flags & NWRF_STATIC), NULL);
                bf_afree(data);
 
                return (*ip_fd->if_put_userdata)(ip_fd->if_srfd,
                        result, (acc_t *)0, TRUE);
 
-       case NWIOSIPOROUTE:
+       case NWIODIPIROUTE:
                data= (*ip_fd->if_get_userdata)(ip_fd->if_srfd,
                        0, sizeof(nwio_route_t), TRUE);
                if (data == NULL)
@@ -341,22 +403,41 @@ ioreq_t req;
 
                data= bf_packIffLess (data, sizeof(nwio_route_t) );
                route_ent= (nwio_route_t *)ptr2acc_data(data);
-               result= ipr_add_oroute(ip_fd->if_port->ip_port
+               result= ipr_del_iroute(ip_fd->if_port-ip_port_table
                        route_ent->nwr_dest, route_ent->nwr_netmask, 
-                       route_ent->nwr_gateway, (time_t)0, 
-                       route_ent->nwr_dist,
-                       !!(route_ent->nwr_flags & NWRF_STATIC), 
-                       route_ent->nwr_pref, NULL);
+                       route_ent->nwr_gateway,
+                       !!(route_ent->nwr_flags & NWRF_STATIC));
                bf_afree(data);
 
                return (*ip_fd->if_put_userdata)(ip_fd->if_srfd,
                        result, (acc_t *)0, TRUE);
 
+               /* The following ARP ioctls are only valid if the
+                * underlying device is an ethernet.
+                */
+       case NWIOARPGIP:
+       case NWIOARPGNEXT:
+       case NWIOARPSIP:
+       case NWIOARPDIP:
+               ip_port= ip_fd->if_port;
+
+               if (ip_port->ip_dl_type != IPDL_ETH)
+               {
+                       return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, 
+                               EBADIOCTL, (acc_t *)0, TRUE);
+               }
+               result= arp_ioctl(ip_port->ip_dl.dl_eth.de_port,
+                       ip_fd->if_srfd, req, ip_fd->if_get_userdata,
+                       ip_fd->if_put_userdata);
+               assert (result != SUSPEND);
+               return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, result,
+                       (acc_t *)0, TRUE);
+
        default:
                break;
        }
-       DBLOCK(1, printf("replying EBADIOCTL\n"));
-       return (*ip_fd->if_put_userdata)(ip_fd-> if_srfd, EBADIOCTL,
+       DBLOCK(1, printf("replying EBADIOCTL: 0x%x\n", req));
+       return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, EBADIOCTL,
                (acc_t *)0, TRUE);
 }
 
@@ -410,6 +491,84 @@ ip_fd_t *ip_fd;
                *ip_fd_p= curr->if_proto_next;
 }
 
+PUBLIC int ip_setconf(ip_port_nr, ipconf)
+int ip_port_nr;
+nwio_ipconf_t *ipconf;
+{
+       int i, old_ip_flags, do_report;
+       ip_port_t *ip_port;
+       ip_fd_t *ip_fd;
+       ipaddr_t ipaddr;
+       u32_t mtu;
+
+       ip_port= &ip_port_table[ip_port_nr];
+
+       old_ip_flags= ip_port->ip_flags;
+
+       if (ipconf->nwic_flags & ~NWIC_FLAGS)
+               return EBADMODE;
+
+       do_report= 0;
+       if (ipconf->nwic_flags & NWIC_MTU_SET)
+       {
+               mtu= ipconf->nwic_mtu;
+               if (mtu < IP_MIN_MTU || mtu > ip_port->ip_mtu_max)
+                       return EINVAL;
+               ip_port->ip_mtu= mtu;
+               do_report= 1;
+       }
+
+       if (ipconf->nwic_flags & NWIC_NETMASK_SET)
+       {
+               ip_port->ip_subnetmask= ipconf->nwic_netmask;
+               ip_port->ip_flags |= IPF_NETMASKSET|IPF_SUBNET_BCAST;
+               if (ntohl(ip_port->ip_subnetmask) >= 0xfffffffe)
+                       ip_port->ip_flags &= ~IPF_SUBNET_BCAST;
+               do_report= 1;
+       }
+       if (ipconf->nwic_flags & NWIC_IPADDR_SET)
+       {
+               ipaddr= ipconf->nwic_ipaddr;
+               ip_port->ip_ipaddr= ipaddr;
+               ip_port->ip_flags |= IPF_IPADDRSET;
+               ip_port->ip_classfulmask=
+                       ip_netmask(ip_nettype(ipaddr));
+               if (!(ip_port->ip_flags & IPF_NETMASKSET))
+               {
+                   ip_port->ip_subnetmask= ip_port->ip_classfulmask;
+               }
+               if (ipaddr == HTONL(0x00000000))
+               {
+                       /* Special case. Use 0.0.0.0 to shutdown interface. */
+                       ip_port->ip_flags &= ~(IPF_IPADDRSET|IPF_NETMASKSET);
+                       ip_port->ip_subnetmask= HTONL(0x00000000);
+               }
+               (*ip_port->ip_dev_set_ipaddr)(ip_port);
+
+               /* revive calls waiting for an ip addresses */
+               for (i=0, ip_fd= ip_fd_table; i<IP_FD_NR; i++, ip_fd++)
+               {
+                       if (!(ip_fd->if_flags & IFF_INUSE))
+                               continue;
+                       if (ip_fd->if_port != ip_port)
+                               continue;
+                       if (ip_fd->if_flags & IFF_IOCTL_IP)
+                               ip_ioctl (i, ip_fd->if_ioctl);
+               }
+               
+               do_report= 1;
+       }
+
+       ipr_chk_itab(ip_port-ip_port_table, ip_port->ip_ipaddr,
+               ip_port->ip_subnetmask);
+       ipr_chk_otab(ip_port-ip_port_table, ip_port->ip_ipaddr,
+               ip_port->ip_subnetmask);
+       if (do_report)
+               report_addr(ip_port);
+
+       return 0;
+}
+
 PRIVATE int ip_checkopt (ip_fd)
 ip_fd_t *ip_fd;
 {
@@ -417,7 +576,6 @@ ip_fd_t *ip_fd;
 
        unsigned long flags;
        unsigned int en_di_flags;
-       ip_port_t *port;
        acc_t *pack;
        int result;
 
@@ -467,6 +625,47 @@ int for_ioctl;
        assert (!result);
 }
 
+PRIVATE void report_addr(ip_port)
+ip_port_t *ip_port;
+{
+       int i, hdr_len;
+       ip_fd_t *ip_fd;
+       acc_t *pack;
+       ip_hdr_t *ip_hdr;
+
+       pack= bf_memreq(IP_MIN_HDR_SIZE);
+       ip_hdr= (ip_hdr_t *)ptr2acc_data(pack);
+
+       hdr_len= IP_MIN_HDR_SIZE;
+       ip_hdr->ih_vers_ihl= (IP_VERSION << 4) | (hdr_len/4);
+       ip_hdr->ih_tos= 0;
+       ip_hdr->ih_length= htons(ip_port->ip_mtu);
+       ip_hdr->ih_id= 0;
+       ip_hdr->ih_flags_fragoff= 0;
+       ip_hdr->ih_ttl= 0;
+       ip_hdr->ih_proto= 0;
+       ip_hdr->ih_src= ip_port->ip_ipaddr;
+       ip_hdr->ih_dst= ip_port->ip_subnetmask;
+       ip_hdr_chksum(ip_hdr, hdr_len);
+
+       for (i=0, ip_fd= ip_fd_table; i<IP_FD_NR; i++, ip_fd++)
+       {
+               if (!(ip_fd->if_flags & IFF_INUSE))
+               {
+                       continue;
+               }
+               if (ip_fd->if_port != ip_port)
+               {
+                       continue;
+               }
+
+               /* Deliver packet to user */
+               pack->acc_linkC++;
+               ip_packet2user(ip_fd, pack, 255, IP_MIN_HDR_SIZE);
+       }
+       bf_afree(pack); pack= NULL;
+}
+
 /*
- * $PchId: ip_ioctl.c,v 1.8 1996/12/17 07:56:18 philip Exp $
+ * $PchId: ip_ioctl.c,v 1.22 2004/08/03 11:10:08 philip Exp $
  */
index 151b08bb4a1a7eae75895ed2ffce4835be4ecc9c..a9f6114c8ca435003571c245559298b493f8acc3 100644 (file)
@@ -29,21 +29,21 @@ int optlen;
                strict_source_present= FALSE, record_route_present= FALSE,
                timestamp_present= FALSE;
 
-assert (!(optlen & 3));
+       assert (!(optlen & 3));
        i= 0;
        while (i<optlen)
        {
-               DBLOCK(1, printf("*opt= %d\n", *opt));
+               DBLOCK(2, printf("*opt= %d\n", *opt));
 
                switch (*opt)
                {
-               case 0x0:               /* End of Option list */
+               case IP_OPT_EOL:        /* End of Option list */
                        return NW_OK;
-               case 0x1:               /* No Operation */
+               case IP_OPT_NOP:        /* No Operation */
                        i++;
                        opt++;
                        break;
-               case 0x82:              /* Security */
+               case IP_OPT_SEC:        /* Security */
                        if (security_present)
                                return EINVAL;
                        security_present= TRUE;
@@ -52,10 +52,10 @@ assert (!(optlen & 3));
                        i += opt[1];
                        opt += opt[1];
                        break;
-               case 0x83:              /* Lose Source and Record Route */
+               case IP_OPT_LSRR:       /* Lose Source and Record Route */
                        if (lose_source_present)
                        {
-                               DBLOCK(1, printf("snd lose soruce route\n"));
+                               DBLOCK(1, printf("2nd lose soruce route\n"));
                                return EINVAL;
                        }
                        lose_source_present= TRUE;
@@ -68,7 +68,7 @@ assert (!(optlen & 3));
                        i += opt[1];
                        opt += opt[1];
                        break;
-               case 0x89:              /* Strict Source and Record Route */
+               case IP_OPT_SSRR:       /* Strict Source and Record Route */
                        if (strict_source_present)
                                return EINVAL;
                        strict_source_present= TRUE;
@@ -77,7 +77,7 @@ assert (!(optlen & 3));
                        i += opt[1];
                        opt += opt[1];
                        break;
-               case 0x7:               /* Record Route */
+               case IP_OPT_RR:         /* Record Route */
                        if (record_route_present)
                                return EINVAL;
                        record_route_present= TRUE;
@@ -86,7 +86,7 @@ assert (!(optlen & 3));
                        i += opt[1];
                        opt += opt[1];
                        break;
-               case 0x88:
+               case IP_OPT_TS:         /* Timestamp */
                        if (timestamp_present)
                                return EINVAL;
                        timestamp_present= TRUE;
@@ -104,6 +104,12 @@ assert (!(optlen & 3));
                        i += opt[1];
                        opt += opt[1];
                        break;
+               case IP_OPT_RTRALT:
+                       if (opt[1] != 4)
+                               return EINVAL;
+                       i += opt[1];
+                       opt += opt[1];
+                       break;
                default:
                        return EINVAL;
                }
@@ -227,5 +233,5 @@ nettype_t nettype;
 #endif
 
 /*
- * $PchId: ip_lib.c,v 1.6 1996/12/17 07:59:36 philip Exp $
+ * $PchId: ip_lib.c,v 1.10 2002/06/08 21:35:52 philip Exp $
  */
index 853e39b3187606ecc219139048fc9f7e9630877d..9e8f9065821f40db8051449824eed54ff9c4d41f 100644 (file)
@@ -22,7 +22,7 @@ THIS_FILE
 FORWARD void ipps_main ARGS(( ip_port_t *ip_port ));
 FORWARD void ipps_set_ipaddr ARGS(( ip_port_t *ip_port ));
 FORWARD int ipps_send ARGS(( struct ip_port *ip_port, ipaddr_t dest, 
-                                               acc_t *pack, int broadcast ));
+                                       acc_t *pack, int type ));
 
 PUBLIC int ipps_init(ip_port)
 ip_port_t *ip_port;
@@ -32,10 +32,8 @@ ip_port_t *ip_port;
        result= psip_enable(ip_port->ip_dl.dl_ps.ps_port, ip_port->ip_port);
        if (result == -1)
                return -1;
-#if ZERO
        ip_port->ip_dl.dl_ps.ps_send_head= NULL;
        ip_port->ip_dl.dl_ps.ps_send_tail= NULL;
-#endif
        ip_port->ip_dev_main= ipps_main;
        ip_port->ip_dev_set_ipaddr= ipps_set_ipaddr;
        ip_port->ip_dev_send= ipps_send;
@@ -46,7 +44,8 @@ PUBLIC void ipps_get(ip_port_nr)
 int ip_port_nr;
 {
        int result;
-       acc_t *pack;
+       ipaddr_t dest;
+       acc_t *acc, *pack, *next_part;
        ip_port_t *ip_port;
 
        assert(ip_port_nr >= 0 && ip_port_nr < ip_conf_nr);
@@ -57,12 +56,54 @@ int ip_port_nr;
        {
                pack= ip_port->ip_dl.dl_ps.ps_send_head;
                ip_port->ip_dl.dl_ps.ps_send_head= pack->acc_ext_link;
-               result= psip_send(ip_port->ip_dl.dl_ps.ps_port, pack);
+
+               /* Extract nexthop address */
+               pack= bf_packIffLess(pack, sizeof(dest));
+               dest= *(ipaddr_t *)ptr2acc_data(pack);
+               pack= bf_delhead(pack, sizeof(dest));
+
+               if (bf_bufsize(pack) > ip_port->ip_mtu)
+               {
+                       next_part= pack;
+                       pack= ip_split_pack(ip_port, &next_part, 
+                               ip_port->ip_mtu);
+                       if (pack == NULL)
+                               continue;
+
+                       /* Prepend nexthop address */
+                       acc= bf_memreq(sizeof(dest));
+                       *(ipaddr_t *)(ptr2acc_data(acc))= dest;
+                       acc->acc_next= next_part;
+                       next_part= acc; acc= NULL;
+
+                       assert(next_part->acc_linkC == 1);
+                       next_part->acc_ext_link= NULL;
+                       if (ip_port->ip_dl.dl_ps.ps_send_head)
+                       {
+                               ip_port->ip_dl.dl_ps.ps_send_tail->
+                                       acc_ext_link= next_part;
+                       }
+                       else
+                       {
+                               ip_port->ip_dl.dl_ps.ps_send_head=
+                                       next_part;
+                       }
+                       ip_port->ip_dl.dl_ps.ps_send_tail= next_part;
+               }
+
+               result= psip_send(ip_port->ip_dl.dl_ps.ps_port, dest, pack);
                if (result != NW_SUSPEND)
                {
                        assert(result == NW_OK);
                        continue;
                }
+
+               /* Prepend nexthop address */
+               acc= bf_memreq(sizeof(dest));
+               *(ipaddr_t *)(ptr2acc_data(acc))= dest;
+               acc->acc_next= pack;
+               pack= acc; acc= NULL;
+
                pack->acc_ext_link= ip_port->ip_dl.dl_ps.ps_send_head;
                ip_port->ip_dl.dl_ps.ps_send_head= pack;
                if (pack->acc_ext_link == NULL)
@@ -71,8 +112,9 @@ int ip_port_nr;
        }
 }
 
-PUBLIC void ipps_put(ip_port_nr, pack)
+PUBLIC void ipps_put(ip_port_nr, nexthop, pack)
 int ip_port_nr;
+ipaddr_t nexthop;
 acc_t *pack;
 {
        ip_port_t *ip_port;
@@ -80,7 +122,10 @@ acc_t *pack;
        assert(ip_port_nr >= 0 && ip_port_nr < ip_conf_nr);
        ip_port= &ip_port_table[ip_port_nr];
        assert(ip_port->ip_dl_type == IPDL_PSIP);
-       ip_arrived(ip_port, pack);
+       if (nexthop == HTONL(0xffffffff))
+               ip_arrived_broadcast(ip_port, pack);
+       else
+               ip_arrived(ip_port, pack);
 }
 
 PRIVATE void ipps_main(ip_port)
@@ -92,57 +137,139 @@ ip_port_t *ip_port;
 PRIVATE void ipps_set_ipaddr(ip_port)
 ip_port_t *ip_port;
 {
-       int i;
-       ip_fd_t *ip_fd;
-
-       /* revive calls waiting for an ip addresses */
-       for (i=0, ip_fd= ip_fd_table; i<IP_FD_NR; i++, ip_fd++)
-       {
-               if (!(ip_fd->if_flags & IFF_INUSE))
-               {
-                       continue;
-               }
-               if (ip_fd->if_port != ip_port)
-               {
-                       continue;
-               }
-               if (ip_fd->if_flags & IFF_GIPCONF_IP)
-               {
-                       ip_ioctl (i, NWIOGIPCONF);
-               }
-       }
 }
 
-PRIVATE int ipps_send(ip_port, dest, pack, broadcast)
+PRIVATE int ipps_send(ip_port, dest, pack, type)
 struct ip_port *ip_port;
 ipaddr_t dest;
 acc_t *pack;
-int broadcast;
+int type;
 {
        int result;
+       acc_t *acc, *next_part;
 
-       if (broadcast)
+       if (type != IP_LT_NORMAL)
+       {
                ip_arrived_broadcast(ip_port, bf_dupacc(pack));
 
-       if (ip_port->ip_dl.dl_ps.ps_send_head == NULL)
+               /* Map all broadcasts to the on-link broadcast address.
+                * This saves the application from having to to find out
+                * if the destination is a subnet broadcast.
+                */
+               dest= HTONL(0xffffffff);
+       }
+
+       /* Note that allocating a packet may trigger a cleanup action,
+        * which may cause the send queue to become empty.
+        */
+       while (ip_port->ip_dl.dl_ps.ps_send_head != NULL)
        {
-               result= psip_send(ip_port->ip_dl.dl_ps.ps_port, pack);
-               if (result != NW_SUSPEND)
+               acc= bf_memreq(sizeof(dest));
+
+               if (ip_port->ip_dl.dl_ps.ps_send_head == NULL)
                {
-                       assert(result == NW_OK);
-                       return result;
+                       bf_afree(acc); acc= NULL;
+                       continue;
                }
-               assert (ip_port->ip_dl.dl_ps.ps_send_head == NULL);
-               ip_port->ip_dl.dl_ps.ps_send_head= pack;
-       }
-       else
+
+               /* Prepend nexthop address */
+               *(ipaddr_t *)(ptr2acc_data(acc))= dest;
+               acc->acc_next= pack;
+               pack= acc; acc= NULL;
+
+               assert(pack->acc_linkC == 1);
+               pack->acc_ext_link= NULL;
+
                ip_port->ip_dl.dl_ps.ps_send_tail->acc_ext_link= pack;
-       ip_port->ip_dl.dl_ps.ps_send_tail= pack;
-       pack->acc_ext_link= NULL;
+               ip_port->ip_dl.dl_ps.ps_send_tail= pack;
+
+               return NW_OK;
+       }
+
+       while (pack)
+       {
+               if (bf_bufsize(pack) > ip_port->ip_mtu)
+               {
+                       next_part= pack;
+                       pack= ip_split_pack(ip_port, &next_part, 
+                               ip_port->ip_mtu);
+                       if (pack == NULL)
+                       {
+                               return NW_OK;
+                       }
+
+                       /* Prepend nexthop address */
+                       acc= bf_memreq(sizeof(dest));
+                       *(ipaddr_t *)(ptr2acc_data(acc))= dest;
+                       acc->acc_next= next_part;
+                       next_part= acc; acc= NULL;
+
+                       assert(next_part->acc_linkC == 1);
+                       next_part->acc_ext_link= NULL;
+                       ip_port->ip_dl.dl_ps.ps_send_head= next_part;
+                       ip_port->ip_dl.dl_ps.ps_send_tail= next_part;
+               }
+               result= psip_send(ip_port->ip_dl.dl_ps.ps_port, dest, pack);
+               if (result == NW_SUSPEND)
+               {
+                       /* Prepend nexthop address */
+                       acc= bf_memreq(sizeof(dest));
+                       *(ipaddr_t *)(ptr2acc_data(acc))= dest;
+                       acc->acc_next= pack;
+                       pack= acc; acc= NULL;
+
+                       assert(pack->acc_linkC == 1);
+                       pack->acc_ext_link= ip_port->ip_dl.dl_ps.ps_send_head;
+                       ip_port->ip_dl.dl_ps.ps_send_head= pack;
+                       if (!pack->acc_ext_link)
+                               ip_port->ip_dl.dl_ps.ps_send_tail= pack;
+                       break;
+               }
+               assert(result == NW_OK);
+               pack= ip_port->ip_dl.dl_ps.ps_send_head;
+               if (!pack)
+                       break;
+               ip_port->ip_dl.dl_ps.ps_send_head= pack->acc_ext_link;
+
+               /* Extract nexthop address */
+               pack= bf_packIffLess(pack, sizeof(dest));
+               dest= *(ipaddr_t *)ptr2acc_data(pack);
+               pack= bf_delhead(pack, sizeof(dest));
+       }
 
        return NW_OK;
 }
 
+#if 0
+int ipps_check(ip_port_t *ip_port)
+{
+       int n, bad;
+       acc_t *prev, *curr;
+
+       for (n= 0, prev= NULL, curr= ip_port->ip_dl.dl_ps.ps_send_head_;
+               curr; prev= curr, curr= curr->acc_ext_link)
+       {
+               n++;
+       }
+       bad= 0;
+       if (prev != NULL && prev != ip_port->ip_dl.dl_ps.ps_send_tail_)
+       {
+               printf("ipps_check, ip[%d]: wrong tail: got %p, expected %p\n",
+                       ip_port-ip_port_table,
+                       ip_port->ip_dl.dl_ps.ps_send_tail_, prev);
+               bad++;
+       }
+       if (n != ip_port->ip_dl.dl_ps.ps_send_nr)
+       {
+               printf("ipps_check, ip[%d]: wrong count: got %d, expected %d\n",
+                       ip_port-ip_port_table,
+                       ip_port->ip_dl.dl_ps.ps_send_nr, n);
+               bad++;
+       }
+       return bad == 0;
+}
+#endif
+
 /*
- * $PchId: ip_ps.c,v 1.5 1995/11/21 06:45:27 philip Exp $
+ * $PchId: ip_ps.c,v 1.15 2003/01/21 15:57:52 philip Exp $
  */
index 7fc80b7c90fe63c550bf7075aad7318198252ff8..318beefb59d4b6a290cdfe59dcb9c1487c074ffd 100644 (file)
@@ -25,9 +25,8 @@ FORWARD acc_t *merge_frags ARGS(( acc_t *first, acc_t *second ));
 FORWARD int ip_frag_chk ARGS(( acc_t *pack ));
 FORWARD acc_t *reassemble ARGS(( ip_port_t *ip_port, acc_t *pack, 
        ip_hdr_t *ip_hdr ));
+FORWARD void route_packets ARGS(( event_t *ev, ev_arg_t ev_arg ));
 FORWARD int broadcast_dst ARGS(( ip_port_t *ip_port, ipaddr_t dest ));
-FORWARD void packet2user ARGS(( ip_fd_t *ip_fd, acc_t *pack,
-       time_t exp_time ));
 
 PUBLIC int ip_read (fd, count)
 int fd;
@@ -38,8 +37,10 @@ size_t count;
 
        ip_fd= &ip_fd_table[fd];
        if (!(ip_fd->if_flags & IFF_OPTSET))
+       {
                return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, EBADMODE,
                        (acc_t *)0, FALSE);
+       }
 
        ip_fd->if_rd_count= count;
 
@@ -50,7 +51,8 @@ size_t count;
                {
                        pack= ip_fd->if_rdbuf_head;
                        ip_fd->if_rdbuf_head= pack->acc_ext_link;
-                       packet2user (ip_fd, pack, ip_fd->if_exp_time);
+                       ip_packet2user (ip_fd, pack, ip_fd->if_exp_time,
+                               bf_bufsize(pack));
                        assert(!(ip_fd->if_flags & IFF_READ_IP));
                        return NW_OK;
                }
@@ -150,8 +152,17 @@ ip_hdr_t *pack_hdr;
                }
                if ((ass_ent->ia_min_ttl) * HZ + first_time <
                        get_time())
+               {
+                       if (broadcast_dst(ip_port, pack_hdr->ih_dst))
+                       {
+                               DBLOCK(1, printf(
+       "ip_read'reassemble: reassembly timeout for broadcast packet\n"););
+                               bf_afree(pack); pack= NULL;
+                               return NULL;
+                       }
                        icmp_snd_time_exceeded(ip_port->ip_port, pack,
                                ICMP_FRAG_REASSEM);
+               }
                else
                        return pack;
        }
@@ -268,9 +279,9 @@ ipaddr_t dst;
 
        if (new_ass_ent->ia_frags)
        {
-               DBLOCK(1, printf("old frags id= %u, proto= %u, src= ",
+               DBLOCK(2, printf("old frags id= %u, proto= %u, src= ",
                        ntohs(new_ass_ent->ia_id),
-                       ntohs(new_ass_ent->ia_proto));
+                       new_ass_ent->ia_proto);
                        writeIpAddr(new_ass_ent->ia_srcaddr); printf(" dst= ");
                        writeIpAddr(new_ass_ent->ia_dstaddr); printf(": ");
                        ip_print_frags(new_ass_ent->ia_frags); printf("\n"));
@@ -283,8 +294,17 @@ ipaddr_t dst;
                }
                curr_acc= new_ass_ent->ia_frags;
                new_ass_ent->ia_frags= 0;
-               icmp_snd_time_exceeded(ip_port->ip_port, curr_acc,
-                       ICMP_FRAG_REASSEM);
+               if (broadcast_dst(ip_port, new_ass_ent->ia_dstaddr))
+               {
+                       DBLOCK(1, printf(
+       "ip_read'find_ass_ent: reassembly timeout for broadcast packet\n"));
+                       bf_afree(curr_acc); curr_acc= NULL;
+               }
+               else
+               {
+                       icmp_snd_time_exceeded(ip_port->ip_port,
+                               curr_acc, ICMP_FRAG_REASSEM);
+               }
        }
        new_ass_ent->ia_min_ttl= IP_MAX_TTL;
        new_ass_ent->ia_port= ip_port;
@@ -348,15 +368,16 @@ acc_t *pack;
        return TRUE;
 }
 
-PRIVATE void packet2user (ip_fd, pack, exp_time)
+PUBLIC void ip_packet2user (ip_fd, pack, exp_time, data_len)
 ip_fd_t *ip_fd;
 acc_t *pack;
 time_t exp_time;
+size_t data_len;
 {
        acc_t *tmp_pack;
        ip_hdr_t *ip_hdr;
        int result, ip_hdr_len;
-       size_t size, transf_size;
+       size_t transf_size;
 
        assert (ip_fd->if_flags & IFF_INUSE);
        if (!(ip_fd->if_flags & IFF_READ_IP))
@@ -380,22 +401,19 @@ time_t exp_time;
                return;
        }
 
-       size= bf_bufsize (pack);
+       assert (pack->acc_length >= IP_MIN_HDR_SIZE);
+       ip_hdr= (ip_hdr_t *)ptr2acc_data(pack);
+
        if (ip_fd->if_ipopt.nwio_flags & NWIO_RWDATONLY)
        {
-
-               pack= bf_packIffLess (pack, IP_MIN_HDR_SIZE);
-               assert (pack->acc_length >= IP_MIN_HDR_SIZE);
-
-               ip_hdr= (ip_hdr_t *)ptr2acc_data(pack);
                ip_hdr_len= (ip_hdr->ih_vers_ihl & IH_IHL_MASK) * 4;
 
-               assert (size >= ip_hdr_len);
-               size -= ip_hdr_len;
+               assert (data_len > ip_hdr_len);
+               data_len -= ip_hdr_len;
                pack= bf_delhead(pack, ip_hdr_len);
        }
 
-       if (size>ip_fd->if_rd_count)
+       if (data_len > ip_fd->if_rd_count)
        {
                tmp_pack= bf_cut (pack, 0, ip_fd->if_rd_count);
                bf_afree(pack);
@@ -403,7 +421,7 @@ time_t exp_time;
                transf_size= ip_fd->if_rd_count;
        }
        else
-               transf_size= size;
+               transf_size= data_len;
 
        if (ip_fd->if_put_pkt)
        {
@@ -414,16 +432,17 @@ time_t exp_time;
        result= (*ip_fd->if_put_userdata)(ip_fd->if_srfd,
                (size_t)0, pack, FALSE);
        if (result >= 0)
-               if (size > transf_size)
+       {
+               if (data_len > transf_size)
                        result= EPACKSIZE;
                else
                        result= transf_size;
+       }
 
        ip_fd->if_flags &= ~IFF_READ_IP;
        result= (*ip_fd->if_put_userdata)(ip_fd->if_srfd, result,
                        (acc_t *)0, FALSE);
        assert (result >= 0);
-       return;
 }
 
 PUBLIC void ip_port_arrive (ip_port, pack, ip_hdr)
@@ -432,9 +451,8 @@ acc_t *pack;
 ip_hdr_t *ip_hdr;
 {
        ip_fd_t *ip_fd, *first_fd, *share_fd;
-       ip_hdr_t *hdr;
-       int port_nr;
        unsigned long ip_pack_stat;
+       unsigned size;
        int i;
        int hash, proto;
        time_t exp_time;
@@ -452,6 +470,14 @@ ip_hdr_t *ip_hdr;
                assert (!(ntohs(ip_hdr->ih_flags_fragoff) &
                        (IH_FRAGOFF_MASK|IH_MORE_FRAGS)));
        }
+       size= ntohs(ip_hdr->ih_length);
+       if (size > bf_bufsize(pack))
+       {
+               /* Should discard packet */
+               assert(0);
+               bf_afree(pack); pack= NULL;
+               return;
+       }
 
        exp_time= get_time() + (ip_hdr->ih_ttl+1) * HZ;
 
@@ -499,13 +525,13 @@ ip_hdr_t *ip_hdr;
                                continue;
                        }
                        pack->acc_linkC++;
-                       packet2user(ip_fd, pack, exp_time);
+                       ip_packet2user(ip_fd, pack, exp_time, size);
 
                }
                if (share_fd)
                {
                        pack->acc_linkC++;
-                       packet2user(share_fd, pack, exp_time);
+                       ip_packet2user(share_fd, pack, exp_time, size);
                }
        }
        if (first_fd)
@@ -515,10 +541,10 @@ ip_hdr_t *ip_hdr;
                        !(first_fd->if_ipopt.nwio_flags & NWIO_RWDATONLY))
                {
                        (*first_fd->if_put_pkt)(first_fd->if_srfd, pack,
-                               ntohs(ip_hdr->ih_length));
+                               size);
                }
                else
-                       packet2user(first_fd, pack, exp_time);
+                       ip_packet2user(first_fd, pack, exp_time, size);
        }
        else
        {
@@ -541,15 +567,12 @@ PUBLIC void ip_arrived(ip_port, pack)
 ip_port_t *ip_port;
 acc_t *pack;
 {
-       ip_port_t *next_port;
        ip_hdr_t *ip_hdr;
-       iroute_t *iroute;
        ipaddr_t dest;
-       nettype_t nettype;
-       int ip_frag_len, ip_hdr_len;
+       int ip_frag_len, ip_hdr_len, highbyte;
        size_t pack_size;
-       acc_t *tmp_pack;
-       int broadcast;
+       acc_t *tmp_pack, *hdr_pack;
+       ev_arg_t ev_arg;
 
        pack_size= bf_bufsize(pack);
 
@@ -567,16 +590,24 @@ assert (pack->acc_length >= IP_MIN_HDR_SIZE);
        ip_hdr_len= (ip_hdr->ih_vers_ihl & IH_IHL_MASK) << 2;
        if (ip_hdr_len>IP_MIN_HDR_SIZE)
        {
-               pack= bf_align(pack, IP_MIN_HDR_SIZE, 4);
                pack= bf_packIffLess(pack, ip_hdr_len);
                ip_hdr= (ip_hdr_t *)ptr2acc_data(pack);
        }
        ip_frag_len= ntohs(ip_hdr->ih_length);
-       if (ip_frag_len<pack_size)
+       if (ip_frag_len != pack_size)
        {
+               if (pack_size < ip_frag_len)
+               {
+                       /* Sent ICMP? */
+                       DBLOCK(1, printf("wrong acc_length\n"));
+                       bf_afree(pack);
+                       return;
+               }
+               assert(ip_frag_len<pack_size);
                tmp_pack= pack;
                pack= bf_cut(tmp_pack, 0, ip_frag_len);
                bf_afree(tmp_pack);
+               pack_size= ip_frag_len;
        }
 
        if (!ip_frag_chk(pack))
@@ -605,122 +636,60 @@ assert (pack->acc_length >= IP_MIN_HDR_SIZE);
                return;
        }
 
+       if (pack->acc_linkC != 1 || pack->acc_buffer->buf_linkC != 1)
+       {
+               /* Get a private copy of the IP header */
+               hdr_pack= bf_memreq(ip_hdr_len);
+               memcpy(ptr2acc_data(hdr_pack), ip_hdr, ip_hdr_len);
+               pack= bf_delhead(pack, ip_hdr_len);
+               hdr_pack->acc_next= pack;
+               pack= hdr_pack; hdr_pack= NULL;
+               ip_hdr= (ip_hdr_t *)ptr2acc_data(pack);
+       }
+       assert(pack->acc_linkC == 1);
+       assert(pack->acc_buffer->buf_linkC == 1);
+
        /* Try to decrement the ttl field with one. */
        if (ip_hdr->ih_ttl < 2)
        {
-               icmp_snd_time_exceeded(ip_port->ip_port, pack, ICMP_TTL_EXC);
+               icmp_snd_time_exceeded(ip_port->ip_port, pack,
+                       ICMP_TTL_EXC);
                return;
        }
        ip_hdr->ih_ttl--;
        ip_hdr_chksum(ip_hdr, ip_hdr_len);
 
        /* Avoid routing to bad destinations. */
-       nettype= ip_nettype(dest);
-       if (nettype != IPNT_CLASS_A && nettype != IPNT_CLASS_B && nettype !=
-               IPNT_CLASS_C)
+       highbyte= ntohl(dest) >> 24;
+       if (highbyte == 0 || highbyte == 127 ||
+               (highbyte == 169 && (((ntohl(dest) >> 16) & 0xff) == 254)) ||
+               highbyte >= 0xe0)
        {
                /* Bogus destination address */
-               if (nettype == IPNT_CLASS_D || nettype == IPNT_CLASS_E)
-                       bf_afree(pack);
-               else
-               {
-                       icmp_snd_unreachable(ip_port->ip_port, pack,
-                               ICMP_HOST_UNRCH);
-               }
-               return;
-       }
-       iroute= iroute_frag(ip_port->ip_port, dest);
-       if (iroute == NULL || iroute->irt_dist == IRTD_UNREACHABLE)
-       {
-               /* Also unreachable */
-               /* Finding out if we send a network unreachable is too much
-                * trouble.
-                */
-               icmp_snd_unreachable(ip_port->ip_port, pack,
-                       ICMP_HOST_UNRCH);
-               return;
-       }
-       next_port= &ip_port_table[iroute->irt_port];
-       if (next_port != ip_port)
-       {
-               if (iroute->irt_gateway != 0)
-               {
-                       /* Just send the packet to the next gateway */
-                       next_port->ip_dev_send(next_port, iroute->irt_gateway,
-                               pack, /* no bradcast */ 0);
-                       return;
-               }
-               /* The packet is for the attached network. Special addresses
-                * are the ip address of the interface and net.0 if
-                * no IP_42BSD_BCAST.
-                */
-               if (dest == next_port->ip_ipaddr)
-               {
-                       ip_port_arrive (next_port, pack, ip_hdr);
-                       return;
-               }
-               if (dest == iroute->irt_dest)
-               {
-#if IP_42BSD_BCAST
-                       broadcast= 1;
-#else
-                       /* Bogus destination address */
-                       icmp_snd_dstunrch(pack);
-                       return;
-#endif
-               }
-               else if (dest == (iroute->irt_dest | ~iroute->irt_subnetmask))
-                       broadcast= 1;
-               else
-                       broadcast= 0;
-
-               /* Just send the packet to it's destination */
-               next_port->ip_dev_send(next_port, dest, pack, broadcast);
+               bf_afree(pack);
                return;
        }
 
-       /* Now we know that the packet should be route over the same network
-        * as it came from. If there is a next hop gateway, we can send
-        * the packet to that gateway and send a redirect ICMP to the sender
-        * if the sender is on the attached network. If there is no gateway
-        * complain.
-        */
-       if (iroute->irt_gateway == 0)
+       /* Further processing from an event handler */
+       if (pack->acc_linkC != 1)
        {
-#if !CRAMPED
-               printf("packet should not be here, src=");
-               writeIpAddr(ip_hdr->ih_src);
-               printf(" dst=");
-               writeIpAddr(ip_hdr->ih_dst);
-               printf("\n");
-#endif
+               tmp_pack= bf_dupacc(pack);
                bf_afree(pack);
-               return;
-       }
-       if (((ip_hdr->ih_src ^ ip_port->ip_ipaddr) &
-               ip_port->ip_subnetmask) == 0)
-       {
-               /* Finding out if we can send a network redirect instead of
-                * a host redirect is too much trouble.
-                */
-               pack->acc_linkC++;
-               icmp_snd_redirect(ip_port->ip_port, pack,
-                       ICMP_REDIRECT_HOST, iroute->irt_gateway);
+               pack= tmp_pack;
+               tmp_pack= NULL;
        }
-       else
+       pack->acc_ext_link= NULL;
+       if (ip_port->ip_routeq_head)
        {
-#if !CRAMPED
-               printf("packet is wrongly routed, src=");
-               writeIpAddr(ip_hdr->ih_src);
-               printf(" dst=");
-               writeIpAddr(ip_hdr->ih_dst);
-               printf("\n");
-#endif
-               bf_afree(pack);
+               ip_port->ip_routeq_tail->acc_ext_link= pack;
+               ip_port->ip_routeq_tail= pack;
                return;
        }
-       ip_port->ip_dev_send(ip_port, iroute->irt_gateway, pack,
-               /* no broadcast */ 0);
+
+       ip_port->ip_routeq_head= pack;
+       ip_port->ip_routeq_tail= pack;
+       ev_arg.ev_ptr= ip_port;
+       ev_enqueue(&ip_port->ip_routeq_event, route_packets, ev_arg);
 }
 
 PUBLIC void ip_arrived_broadcast(ip_port, pack)
@@ -773,19 +742,13 @@ assert (pack->acc_length >= IP_MIN_HDR_SIZE);
 
        if (!broadcast_dst(ip_port, ip_hdr->ih_dst))
        {
-#if !CRAMPED
-               /* this message isn't very useful, but is quite annoying on
-                * the console
-                */
-               /*
-               printf("ip[%d]: broadcast packet for ip-nonbroadcast addr, src=",
+               printf(
+               "ip[%d]: broadcast packet for ip-nonbroadcast addr, src=",
                        ip_port->ip_port);
                writeIpAddr(ip_hdr->ih_src);
                printf(" dst=");
                writeIpAddr(ip_hdr->ih_dst);
                printf("\n");
-               */
-#endif
                bf_afree(pack);
                return;
        }
@@ -793,10 +756,220 @@ assert (pack->acc_length >= IP_MIN_HDR_SIZE);
        ip_port_arrive (ip_port, pack, ip_hdr);
 }
 
+PRIVATE void route_packets(ev, ev_arg)
+event_t *ev;
+ev_arg_t ev_arg;
+{
+       ip_port_t *ip_port;
+       ipaddr_t dest;
+       acc_t *pack;
+       iroute_t *iroute;
+       ip_port_t *next_port;
+       int r, type;
+       ip_hdr_t *ip_hdr;
+       size_t req_mtu;
+
+       ip_port= ev_arg.ev_ptr;
+       assert(&ip_port->ip_routeq_event == ev);
+
+       while (pack= ip_port->ip_routeq_head, pack != NULL)
+       {
+               ip_port->ip_routeq_head= pack->acc_ext_link;
+
+               ip_hdr= (ip_hdr_t *)ptr2acc_data(pack);
+               dest= ip_hdr->ih_dst;
+
+               iroute= iroute_frag(ip_port->ip_port, dest);
+               if (iroute == NULL || iroute->irt_dist == IRTD_UNREACHABLE)
+               {
+                       /* Also unreachable */
+                       /* Finding out if we send a network unreachable is too
+                        * much trouble.
+                        */
+                       if (iroute == NULL)
+                       {
+                               printf("ip[%d]: no route to ",
+                                       ip_port-ip_port_table);
+                               writeIpAddr(dest);
+                               printf("\n");
+                       }
+                       icmp_snd_unreachable(ip_port->ip_port, pack,
+                               ICMP_HOST_UNRCH);
+                       continue;
+               }
+               next_port= &ip_port_table[iroute->irt_port];
+
+               if (ip_hdr->ih_flags_fragoff & HTONS(IH_DONT_FRAG))
+               {
+                       req_mtu= bf_bufsize(pack);
+                       if (req_mtu > next_port->ip_mtu ||
+                               (iroute->irt_mtu && req_mtu>iroute->irt_mtu))
+                       {
+                               icmp_snd_mtu(ip_port->ip_port, pack,
+                                       next_port->ip_mtu);
+                               continue;
+                       }
+               }
+
+               if (next_port != ip_port)
+               {
+                       if (iroute->irt_gateway != 0)
+                       {
+                               /* Just send the packet to the next gateway */
+                               pack->acc_linkC++; /* Extra ref for ICMP */
+                               r= next_port->ip_dev_send(next_port,
+                                       iroute->irt_gateway,
+                                       pack, IP_LT_NORMAL);
+                               if (r == EDSTNOTRCH)
+                               {
+                                       printf("ip[%d]: gw ",
+                                               ip_port-ip_port_table);
+                                       writeIpAddr(iroute->irt_gateway);
+                                       printf(" on ip[%d] is down for dest ",
+                                               next_port-ip_port_table);
+                                       writeIpAddr(dest);
+                                       printf("\n");
+                                       icmp_snd_unreachable(next_port-
+                                               ip_port_table, pack,
+                                               ICMP_HOST_UNRCH);
+                                       pack= NULL;
+                               }
+                               else
+                               {
+                                       assert(r == 0);
+                                       bf_afree(pack); pack= NULL;
+                               }
+                               continue;
+                       }
+                       /* The packet is for the attached network. Special
+                        * addresses are the ip address of the interface and
+                        * net.0 if no IP_42BSD_BCAST.
+                        */
+                       if (dest == next_port->ip_ipaddr)
+                       {
+                               ip_port_arrive (next_port, pack, ip_hdr);
+                               continue;
+                       }
+                       if (dest == iroute->irt_dest)
+                       {
+                               /* Never forward obsolete directed broadcasts */
+#if IP_42BSD_BCAST && 0
+                               type= IP_LT_BROADCAST;
+#else
+                               /* Bogus destination address */
+                               DBLOCK(1, printf(
+                       "ip[%d]: dropping old-fashioned directed broadcast ",
+                                               ip_port-ip_port_table);
+                                       writeIpAddr(dest);
+                                       printf("\n"););
+                               icmp_snd_unreachable(next_port-ip_port_table,
+                                       pack, ICMP_HOST_UNRCH);
+                               continue;
+#endif
+                       }
+                       else if (dest == (iroute->irt_dest |
+                               ~iroute->irt_subnetmask))
+                       {
+                               if (!ip_forward_directed_bcast)
+                               {
+                                       /* Do not forward directed broadcasts */
+                                       DBLOCK(1, printf(
+                                       "ip[%d]: dropping directed broadcast ",
+                                                       ip_port-ip_port_table);
+                                               writeIpAddr(dest);
+                                               printf("\n"););
+                                       icmp_snd_unreachable(next_port-
+                                               ip_port_table, pack,
+                                               ICMP_HOST_UNRCH);
+                                       continue;
+                               }
+                               else
+                                       type= IP_LT_BROADCAST;
+                       }
+                       else
+                               type= IP_LT_NORMAL;
+
+                       /* Just send the packet to it's destination */
+                       pack->acc_linkC++; /* Extra ref for ICMP */
+                       r= next_port->ip_dev_send(next_port, dest, pack, type);
+                       if (r == EDSTNOTRCH)
+                       {
+                               DBLOCK(1, printf("ip[%d]: next hop ",
+                                       ip_port-ip_port_table);
+                                       writeIpAddr(dest);
+                                       printf(" on ip[%d] is down\n",
+                                       next_port-ip_port_table););
+                               icmp_snd_unreachable(next_port-ip_port_table,
+                                       pack, ICMP_HOST_UNRCH);
+                               pack= NULL;
+                       }
+                       else
+                       {
+                               assert(r == 0 || (printf("r = %d\n", r), 0));
+                               bf_afree(pack); pack= NULL;
+                       }
+                       continue;
+               }
+
+               /* Now we know that the packet should be routed over the same
+                * network as it came from. If there is a next hop gateway,
+                * we can send the packet to that gateway and send a redirect
+                * ICMP to the sender if the sender is on the attached
+                * network. If there is no gateway complain.
+                */
+               if (iroute->irt_gateway == 0)
+               {
+                       printf("ip_arrived: packet should not be here, src=");
+                       writeIpAddr(ip_hdr->ih_src);
+                       printf(" dst=");
+                       writeIpAddr(ip_hdr->ih_dst);
+                       printf("\n");
+                       bf_afree(pack);
+                       continue;
+               }
+               if (((ip_hdr->ih_src ^ ip_port->ip_ipaddr) &
+                       ip_port->ip_subnetmask) == 0)
+               {
+                       /* Finding out if we can send a network redirect
+                        * instead of a host redirect is too much trouble.
+                        */
+                       pack->acc_linkC++;
+                       icmp_snd_redirect(ip_port->ip_port, pack,
+                               ICMP_REDIRECT_HOST, iroute->irt_gateway);
+               }
+               else
+               {
+                       printf("ip_arrived: packet is wrongly routed, src=");
+                       writeIpAddr(ip_hdr->ih_src);
+                       printf(" dst=");
+                       writeIpAddr(ip_hdr->ih_dst);
+                       printf("\n");
+                       printf("in port %d, output %d, dest net ",
+                               ip_port->ip_port, 
+                               iroute->irt_port);
+                       writeIpAddr(iroute->irt_dest);
+                       printf("/");
+                       writeIpAddr(iroute->irt_subnetmask);
+                       printf(" next hop ");
+                       writeIpAddr(iroute->irt_gateway);
+                       printf("\n");
+                       bf_afree(pack);
+                       continue;
+               }
+               /* No code for unreachable ICMPs here. The sender should
+                * process the ICMP redirect and figure it out.
+                */
+               ip_port->ip_dev_send(ip_port, iroute->irt_gateway, pack,
+                       IP_LT_NORMAL);
+       }
+}
+
 PRIVATE int broadcast_dst(ip_port, dest)
 ip_port_t *ip_port;
 ipaddr_t dest;
 {
+       ipaddr_t my_ipaddr, netmask, classmask;
+
        /* Treat class D (multicast) address as broadcasts. */
        if ((dest & HTONL(0xF0000000)) == HTONL(0xE0000000))
        {
@@ -808,39 +981,45 @@ ipaddr_t dest;
        {
                return 1;
        }
-
-       if (((ip_port->ip_ipaddr ^ dest) & ip_port->ip_netmask) != 0)
-       {
-               /* Two possibilities, 0 (iff IP_42BSD_BCAST) and -1 */
-               if (dest == HTONL((ipaddr_t)-1))
-                       return 1;
+       /* Two possibilities, 0 (iff IP_42BSD_BCAST) and -1 */
+       if (dest == HTONL((ipaddr_t)-1))
+               return 1;
 #if IP_42BSD_BCAST
-               if (dest == HTONL((ipaddr_t)0))
-                       return 1;
+       if (dest == HTONL((ipaddr_t)0))
+               return 1;
 #endif
-               return 0;
-       }
-       if (((ip_port->ip_ipaddr ^ dest) & ip_port->ip_subnetmask) != 0)
+       netmask= ip_port->ip_subnetmask;
+       my_ipaddr= ip_port->ip_ipaddr;
+
+       if (((my_ipaddr ^ dest) & netmask) != 0)
        {
-               /* Two possibilities, netwerk.0 (iff IP_42BSD_BCAST) and
-                * netwerk.-1
-                */
-               if ((dest & ~ip_port->ip_netmask) == ~ip_port->ip_netmask)
+               classmask= ip_port->ip_classfulmask;
+
+               /* Not a subnet broadcast, maybe a classful broadcast */
+               if (((my_ipaddr ^ dest) & classmask) != 0)
+               {
+                       return 0;
+               }
+               /* Two possibilities, net.0 (iff IP_42BSD_BCAST) and net.-1 */
+               if ((dest & ~classmask) == ~classmask)
+               {
                        return 1;
+               }
 #if IP_42BSD_BCAST
-               if ((dest & ~ip_port->ip_netmask) == 0)
+               if ((dest & ~classmask) == 0)
                        return 1;
 #endif
                return 0;
        }
 
-       /* Two possibilities, netwerk.subnet.0 (iff IP_42BSD_BCAST) and
-        * netwerk.subnet.-1
-        */
-       if ((dest & ~ip_port->ip_subnetmask) == ~ip_port->ip_subnetmask)
+       if (!(ip_port->ip_flags & IPF_SUBNET_BCAST))
+               return 0;       /* No subnet broadcasts on this network */
+
+       /* Two possibilities, subnet.0 (iff IP_42BSD_BCAST) and subnet.-1 */
+       if ((dest & ~netmask) == ~netmask)
                return 1;
 #if IP_42BSD_BCAST
-       if ((dest & ~ip_port->ip_subnetmask) == 0)
+       if ((dest & ~netmask) == 0)
                return 1;
 #endif
        return 0;
@@ -856,7 +1035,7 @@ ev_arg_t arg;
        ip_port= arg.ev_ptr;
        assert(ev == &ip_port->ip_loopb_event);
 
-       while(pack= ip_port->ip_loopb_head)
+       while(pack= ip_port->ip_loopb_head, pack != NULL)
        {
                ip_port->ip_loopb_head= pack->acc_ext_link;
                ip_arrived(ip_port, pack);
@@ -864,5 +1043,5 @@ ev_arg_t arg;
 }
 
 /*
- * $PchId: ip_read.c,v 1.9 1997/01/31 08:51:39 philip Exp $
+ * $PchId: ip_read.c,v 1.33 2005/06/28 14:18:50 philip Exp $
  */
index 8840e57d7beded331bb8a6417bec13ec633d6339..5438837ab9241f70416edcc7dfbd5cfa1c76acd1 100644 (file)
@@ -59,11 +59,12 @@ size_t data_len;
        ip_port_t *ip_port;
        ip_fd_t *ip_fd;
        ip_hdr_t *ip_hdr, *tmp_hdr;
-       ipaddr_t dstaddr, netmask, nexthop, hostrep_dst;
+       ipaddr_t dstaddr, nexthop, hostrep_dst, my_ipaddr, netmask;
        u8_t *addrInBytes;
        acc_t *tmp_pack, *tmp_pack1;
        int hdr_len, hdr_opt_len, r;
-       int broadcast, ttl;
+       int type, ttl;
+       size_t req_mtu;
        ev_arg_t arg;
 
        ip_fd= &ip_fd_table[fd];
@@ -75,9 +76,16 @@ size_t data_len;
                return EBADMODE;
        }
 
-       data_len= bf_bufsize(data);
+       if (!(ip_fd->if_port->ip_flags & IPF_IPADDRSET))
+       {
+               /* Interface is down. What kind of error do we want? For
+                * the moment, we return OK.
+                */
+               bf_afree(data);
+               return NW_OK;
+       }
 
-       assert(ip_fd->if_port->ip_flags & IPF_IPADDRSET);
+       data_len= bf_bufsize(data);
 
        if (ip_fd->if_ipopt.nwio_flags & NWIO_RWDATONLY)
        {
@@ -179,6 +187,9 @@ size_t data_len;
        if (ip_fd->if_ipopt.nwio_flags & NWIO_REMSPEC)
                ip_hdr->ih_dst= ip_fd->if_ipopt.nwio_rem;
 
+       netmask= ip_port->ip_subnetmask;
+       my_ipaddr= ip_port->ip_ipaddr;
+
        dstaddr= ip_hdr->ih_dst;
        hostrep_dst= ntohl(dstaddr);
        r= 0;
@@ -188,10 +199,13 @@ size_t data_len;
                ;       /* OK, Multicast */
        else if ((hostrep_dst & 0xf0000000l) == 0xf0000000l)
                r= EBADDEST;    /* Bad class */
-       else if ((dstaddr ^ ip_port->ip_ipaddr) & ip_port->ip_subnetmask)
+       else if ((dstaddr ^ my_ipaddr) & netmask)
                ;       /* OK, remote destination */
-       else if (!(dstaddr & ~ip_port->ip_subnetmask))
+       else if (!(dstaddr & ~netmask) &&
+               (ip_port->ip_flags & IPF_SUBNET_BCAST))
+       {
                r= EBADDEST;    /* Zero host part */
+       }
        if (r<0)
        {
                DIFBLOCK(1, r == EBADDEST,
@@ -207,6 +221,20 @@ size_t data_len;
        assert (data->acc_length >= IP_MIN_HDR_SIZE);
        ip_hdr= (ip_hdr_t *)ptr2acc_data(data);
 
+       if (ip_hdr->ih_flags_fragoff & HTONS(IH_DONT_FRAG))
+       {
+               req_mtu= bf_bufsize(data);
+               if (req_mtu > ip_port->ip_mtu)
+               {
+                       DBLOCK(1, printf(
+                       "packet is larger than link MTU and DF is set\n"));
+                       bf_afree(data);
+                       return EPACKSIZE;
+               }
+       }
+       else
+               req_mtu= 0;
+
        addrInBytes= (u8_t *)&dstaddr;
 
        if ((addrInBytes[0] & 0xff) == 0x7f)    /* local loopback */
@@ -231,15 +259,24 @@ size_t data_len;
                return NW_OK;
        }
 
-       if (dstaddr == (ipaddr_t)-1)
+       if ((dstaddr & HTONL(0xe0000000)) == HTONL(0xe0000000))
        {
-               r= (*ip_port->ip_dev_send)(ip_port, dstaddr, data,
-                       /* broadcast */ 1);
-               return r;
+               if (dstaddr == (ipaddr_t)-1)
+               {
+                       r= (*ip_port->ip_dev_send)(ip_port, dstaddr, data,
+                               IP_LT_BROADCAST);
+                       return r;
+               }
+               if (ip_nettype(dstaddr) == IPNT_CLASS_D)
+               {
+                       /* Multicast, what about multicast routing? */
+                       r= (*ip_port->ip_dev_send)(ip_port, dstaddr, data,
+                               IP_LT_MULTICAST);
+                       return r;
+               }
        }
-       netmask= ip_get_netmask(dstaddr);
 
-       if (dstaddr == ip_port->ip_ipaddr)
+       if (dstaddr == my_ipaddr)
        {
                assert (data->acc_linkC == 1);
 
@@ -258,17 +295,18 @@ size_t data_len;
                return NW_OK;
        }
 
-       if (((dstaddr ^ ip_port->ip_ipaddr) & ip_port->ip_subnetmask) == 0)
+       if (((dstaddr ^ my_ipaddr) & netmask) == 0)
        {
-               broadcast= (dstaddr == (ip_port->ip_ipaddr |
-                       ~ip_port->ip_subnetmask));
+               type= ((dstaddr == (my_ipaddr | ~netmask) &&
+                       (ip_port->ip_flags & IPF_SUBNET_BCAST)) ?
+                       IP_LT_BROADCAST : IP_LT_NORMAL);
 
-               r= (*ip_port->ip_dev_send)(ip_port, dstaddr, data,
-                                                               broadcast);
+               r= (*ip_port->ip_dev_send)(ip_port, dstaddr, data, type);
                return r;
        }
 
-       r= oroute_frag (ip_port - ip_port_table, dstaddr, ttl, &nexthop);
+       r= oroute_frag (ip_port - ip_port_table, dstaddr, ttl, req_mtu, 
+               &nexthop);
 
        if (r == NW_OK)
        {
@@ -289,7 +327,7 @@ size_t data_len;
                else
                {
                        r= (*ip_port->ip_dev_send)(ip_port,
-                               nexthop, data, /* no broadcast */ 0);
+                               nexthop, data, IP_LT_NORMAL);
                }
        }
        else
@@ -308,18 +346,18 @@ int ip_hdr_len;
        ip_hdr->ih_hdr_chk= ~oneC_sum (0, (u16_t *)ip_hdr, ip_hdr_len);
 }
 
-PUBLIC acc_t *ip_split_pack (ip_port, ref_last, first_size)
+PUBLIC acc_t *ip_split_pack (ip_port, ref_last, mtu)
 ip_port_t *ip_port;
 acc_t **ref_last;
-int first_size;
+int mtu;
 {
        int pack_siz;
        ip_hdr_t *first_hdr, *second_hdr;
        int first_hdr_len, second_hdr_len;
        int first_data_len, second_data_len;
-       int new_first_data_len;
+       int data_len, max_data_len, nfrags, new_first_data_len;
        int first_opt_size, second_opt_size;
-       acc_t *first_pack, *second_pack, *tmp_pack, *tmp_pack1;
+       acc_t *first_pack, *second_pack, *tmp_pack;
        u8_t *first_optptr, *second_optptr;
        int i, optlen;
 
@@ -327,26 +365,53 @@ int first_size;
        *ref_last= 0;
        second_pack= 0;
 
+       first_pack= bf_align(first_pack, IP_MIN_HDR_SIZE, 4);
        first_pack= bf_packIffLess(first_pack, IP_MIN_HDR_SIZE);
        assert (first_pack->acc_length >= IP_MIN_HDR_SIZE);
 
        first_hdr= (ip_hdr_t *)ptr2acc_data(first_pack);
        first_hdr_len= (first_hdr->ih_vers_ihl & IH_IHL_MASK) * 4;
+       if (first_hdr_len>IP_MIN_HDR_SIZE)
+       {
+               first_pack= bf_packIffLess(first_pack, first_hdr_len);
+               first_hdr= (ip_hdr_t *)ptr2acc_data(first_pack);
+       }
 
        pack_siz= bf_bufsize(first_pack);
-       assert(pack_siz > first_size);
+       assert(pack_siz > mtu);
 
-       if (first_hdr->ih_flags_fragoff & HTONS(IH_DONT_FRAG))
+       assert (!(first_hdr->ih_flags_fragoff & HTONS(IH_DONT_FRAG)));
+
+       if (first_pack->acc_linkC != 1 ||
+               first_pack->acc_buffer->buf_linkC != 1)
+       {
+               /* Get a private copy of the IP header */
+               tmp_pack= bf_memreq(first_hdr_len);
+               memcpy(ptr2acc_data(tmp_pack), first_hdr, first_hdr_len);
+               first_pack= bf_delhead(first_pack, first_hdr_len);
+               tmp_pack->acc_next= first_pack;
+               first_pack= tmp_pack; tmp_pack= NULL;
+               first_hdr= (ip_hdr_t *)ptr2acc_data(first_pack);
+       }
+
+       data_len= ntohs(first_hdr->ih_length) - first_hdr_len;
+
+       /* Try to split the packet evenly. */
+       assert(mtu > first_hdr_len);
+       max_data_len= mtu-first_hdr_len;
+       nfrags= (data_len/max_data_len)+1;
+       new_first_data_len= data_len/nfrags;
+       if (new_first_data_len < 8)
        {
-               icmp_snd_unreachable(ip_port->ip_port, first_pack,
-                       ICMP_FRAGM_AND_DF);
-               return NULL;
+               /* Special case for extremely small MTUs */
+               new_first_data_len= 8;
        }
+       new_first_data_len &= ~7; /* data goes in 8 byte chuncks */
 
-       first_data_len= ntohs(first_hdr->ih_length) - first_hdr_len;
-       new_first_data_len= (first_size- first_hdr_len) & ~7;
-               /* data goes in 8 byte chuncks */
-       second_data_len= first_data_len-new_first_data_len;
+       assert(new_first_data_len >= 8);
+       assert(new_first_data_len+first_hdr_len <= mtu);
+
+       second_data_len= data_len-new_first_data_len;
        second_pack= bf_cut(first_pack, first_hdr_len+
                new_first_data_len, second_data_len);
        tmp_pack= first_pack;
@@ -406,7 +471,7 @@ int first_size;
        }
        second_hdr_len= IP_MIN_HDR_SIZE + second_opt_size;
 
-       second_hdr->ih_vers_ihl= second_hdr->ih_vers_ihl & 0xf0
+       second_hdr->ih_vers_ihl= (second_hdr->ih_vers_ihl & 0xf0)
                + (second_hdr_len/4);
        second_hdr->ih_length= htons(second_data_len+
                second_hdr_len);
@@ -421,7 +486,7 @@ int first_size;
        assert (!(second_hdr->ih_flags_fragoff & HTONS(IH_DONT_FRAG)));
 
        ip_hdr_chksum(first_hdr, first_hdr_len);
-       if (second_data_len+second_hdr_len <= first_size)
+       if (second_data_len+second_hdr_len <= mtu)
        {
                /* second_pack will not be split any further, so we have to
                 * calculate the header checksum.
@@ -430,6 +495,7 @@ int first_size;
        }
 
        *ref_last= second_pack;
+
        return first_pack;
 }
 
@@ -440,12 +506,10 @@ int error;
        if ((*ip_fd->if_get_userdata)(ip_fd->if_srfd, (size_t)error,
                (size_t)0, FALSE))
        {
-#if !CRAMPED
                ip_panic(( "can't error_reply" ));
-#endif
        }
 }
 
 /*
- * $PchId: ip_write.c,v 1.7.1.1.1.1 2001/01/22 19:59:07 philip Exp $
+ * $PchId: ip_write.c,v 1.22 2004/08/03 11:11:04 philip Exp $
  */
index cb2996b9b10fae625b1cd2ffe12de5eabc93e344..f9137caef381cea5b0d04aa3adcfb311ad4aca6d 100644 (file)
@@ -17,7 +17,7 @@ Copyright 1995 Philip Homburg
 
 THIS_FILE
 
-#define OROUTE_NR              32
+#define OROUTE_NR              128
 #define OROUTE_STATIC_NR       16
 #define OROUTE_HASH_ASS_NR      4
 #define OROUTE_HASH_NR         32
@@ -40,7 +40,7 @@ PRIVATE oroute_t *oroute_head;
 PRIVATE int static_oroute_nr;
 PRIVATE oroute_hash_t oroute_hash_table[OROUTE_HASH_NR][OROUTE_HASH_ASS_NR];
 
-#define IROUTE_NR              (sizeof(int) == 2 ? 64 : 512)
+#define IROUTE_NR              512
 #define IROUTE_HASH_ASS_NR      4
 #define IROUTE_HASH_NR         32
 #define IROUTE_HASH_MASK       (IROUTE_HASH_NR-1)
@@ -64,8 +64,8 @@ FORWARD oroute_t *oroute_find_ent ARGS(( int port_nr, ipaddr_t dest ));
 FORWARD void oroute_del ARGS(( oroute_t *oroute ));
 FORWARD oroute_t *sort_dists ARGS(( oroute_t *oroute ));
 FORWARD oroute_t *sort_gws ARGS(( oroute_t *oroute ));
-FORWARD        oroute_uncache_nw ARGS(( ipaddr_t dest, ipaddr_t netmask ));
-FORWARD        iroute_uncache_nw ARGS(( ipaddr_t dest, ipaddr_t netmask ));
+FORWARD        void oroute_uncache_nw ARGS(( ipaddr_t dest, ipaddr_t netmask ));
+FORWARD        void iroute_uncache_nw ARGS(( ipaddr_t dest, ipaddr_t netmask ));
 
 PUBLIC void ipr_init()
 {
@@ -73,17 +73,13 @@ PUBLIC void ipr_init()
        oroute_t *oroute;
        iroute_t *iroute;
 
-#if ZERO
        for (i= 0, oroute= oroute_table; i<OROUTE_NR; i++, oroute++)
                oroute->ort_flags= ORTF_EMPTY;
        static_oroute_nr= 0;
-#endif
        assert(OROUTE_HASH_ASS_NR == 4);
 
-#if ZERO
        for (i= 0, iroute= iroute_table; i<IROUTE_NR; i++, iroute++)
                iroute->irt_flags= IRTF_EMPTY;
-#endif
        assert(IROUTE_HASH_ASS_NR == 4);
 }
 
@@ -92,14 +88,12 @@ PUBLIC iroute_t *iroute_frag(port_nr, dest)
 int port_nr;
 ipaddr_t dest;
 {
-       int hash, i, r_hash_ind;
+       int hash, i;
        iroute_hash_t *iroute_hash;
        iroute_hash_t tmp_hash;
        iroute_t *iroute, *bestroute;
-       time_t currtim;
        unsigned long hash_tmp;
-
-       currtim= get_time();
+       u32_t tmp_mask;
 
        hash= hash_iroute(port_nr, dest, hash_tmp);
        iroute_hash= &iroute_hash_table[hash][0];
@@ -150,11 +144,12 @@ ipaddr_t dest;
                /* More specific netmasks are better */
                if (iroute->irt_subnetmask != bestroute->irt_subnetmask)
                {
-                       if (ntohl(iroute->irt_subnetmask) > 
-                               ntohl(bestroute->irt_subnetmask))
-                       {
+                       /* Using two ntohl macros in one expression
+                        * is not allowed (tmp_l is modified twice)
+                        */
+                       tmp_mask= ntohl(iroute->irt_subnetmask);
+                       if (tmp_mask > ntohl(bestroute->irt_subnetmask))
                                bestroute= iroute;
-                       }
                        continue;
                }
                        
@@ -189,10 +184,11 @@ ipaddr_t dest;
        return bestroute;
 }
 
-PUBLIC int oroute_frag(port_nr, dest, ttl, nexthop)
+PUBLIC int oroute_frag(port_nr, dest, ttl, msgsize, nexthop)
 int port_nr;
 ipaddr_t dest;
 int ttl;
+size_t msgsize;
 ipaddr_t *nexthop;
 {
        oroute_t *oroute;
@@ -200,6 +196,11 @@ ipaddr_t *nexthop;
        oroute= oroute_find_ent(port_nr, dest);
        if (!oroute || oroute->ort_dist > ttl)
                return EDSTNOTRCH;
+       if (msgsize && oroute->ort_mtu && 
+               oroute->ort_mtu < msgsize)
+       {
+               return EPACKSIZE;
+       }
 
        *nexthop= oroute->ort_gateway;
        return NW_OK;
@@ -207,13 +208,14 @@ ipaddr_t *nexthop;
 
 
 PUBLIC int ipr_add_oroute(port_nr, dest, subnetmask, gateway, 
-       timeout, dist, static_route, preference, oroute_p)
+       timeout, dist, mtu, static_route, preference, oroute_p)
 int port_nr;
 ipaddr_t dest;
 ipaddr_t subnetmask;
 ipaddr_t gateway;
 time_t timeout;
 int dist;
+int mtu;
 int static_route;
 i32_t preference;
 oroute_t **oroute_p;
@@ -222,24 +224,30 @@ oroute_t **oroute_p;
        ip_port_t *ip_port;
        oroute_t *oroute, *oldest_route, *prev, *nw_route, *gw_route, 
                *prev_route;
-       time_t currtim;
+       time_t currtim, exp_tim, exp_tim_orig;
 
        oldest_route= 0;
        currtim= get_time();
+       if (timeout)
+               exp_tim= timeout+currtim;
+       else
+               exp_tim= 0;
 
        DBLOCK(0x10, 
-               printf("adding oroute to "); writeIpAddr(dest);
+               printf("ip[%d]: adding oroute to ", port_nr);
+               writeIpAddr(dest);
                printf("["); writeIpAddr(subnetmask); printf("] through ");
                writeIpAddr(gateway);
-               printf(" timeout: %lds, distance %d\n",
-                       (long)timeout/HZ, dist));
+               printf(" timeout: %lds, distance %d, pref %ld, mtu %d\n",
+                       (long)timeout/HZ, dist, (long)preference, mtu));
 
        ip_port= &ip_port_table[port_nr];
 
        /* Validate gateway */
        if (((gateway ^ ip_port->ip_ipaddr) & ip_port->ip_subnetmask) != 0)
        {
-               DBLOCK(2, printf("ipr_add_oroute: invalid gateway: "); writeIpAddr(gateway); printf("\n"));
+               DBLOCK(1, printf("ip[%d]: (ipr_add_oroute) invalid gateway: ",
+                       port_nr); writeIpAddr(gateway); printf("\n"));
                return EINVAL;
        }
 
@@ -273,24 +281,27 @@ oroute_t **oroute_p;
                                continue;
                        if (oroute->ort_dist > dist)
                                continue;
-                       if (oroute->ort_dist == dist && 
-                               oroute->ort_pref == preference)
-                       {
-                               if (timeout)
-                                       oroute->ort_exp_tim= currtim + timeout;
-                               else
-                                       oroute->ort_exp_tim= 0;
-                               oroute->ort_timestamp= currtim;
-                               assert(oroute->ort_port == port_nr);
-                               if (oroute_p != NULL)
-                                       *oroute_p= oroute;
-                               return NW_OK;
-                       }
                        break;
                }
                if (oroute)
                {
                        assert(oroute->ort_port == port_nr);
+                       if (dest != 0)
+                       {
+                               /* The new expire should not be later
+                                * than the old expire time. Except for
+                                * default routes, where the expire time
+                                * is simple set to the new value.
+                                */
+                               exp_tim_orig= oroute->ort_exp_tim;
+                               if (!exp_tim)
+                                       exp_tim= exp_tim_orig;
+                               else if (exp_tim_orig &&
+                                       exp_tim > exp_tim_orig)
+                               {
+                                       exp_tim= exp_tim_orig;
+                               }
+                       }
                        oroute_del(oroute);
                        oroute->ort_flags= 0;
                        oldest_route= oroute;
@@ -341,12 +352,10 @@ oroute_t **oroute_p;
        oldest_route->ort_dest= dest;
        oldest_route->ort_gateway= gateway;
        oldest_route->ort_subnetmask= subnetmask;
-       if (timeout)
-               oldest_route->ort_exp_tim= currtim + timeout;
-       else
-               oldest_route->ort_exp_tim= 0;
+       oldest_route->ort_exp_tim= exp_tim;
        oldest_route->ort_timestamp= currtim;
        oldest_route->ort_dist= dist;
+       oldest_route->ort_mtu= mtu;
        oldest_route->ort_port= port_nr;
        oldest_route->ort_flags= ORTF_INUSE;
        oldest_route->ort_pref= preference;
@@ -357,12 +366,12 @@ oroute_t **oroute_p;
         * and insert the entry during the reconstruction.
         */
        for (prev= 0, nw_route= oroute_head; nw_route; 
-                               prev= nw_route, nw_route= nw_route->ort_nextnw)
+               prev= nw_route, nw_route= nw_route->ort_nextnw)
        {
                if (nw_route->ort_port != port_nr)
                        continue;
                if (nw_route->ort_dest == dest &&
-                                       nw_route->ort_subnetmask == subnetmask)
+                       nw_route->ort_subnetmask == subnetmask)
                {
                        if (prev)
                                prev->ort_nextnw= nw_route->ort_nextnw;
@@ -373,7 +382,7 @@ oroute_t **oroute_p;
        }
        prev_route= nw_route;
        for(prev= NULL, gw_route= nw_route; gw_route; 
-                               prev= gw_route, gw_route= gw_route->ort_nextgw)
+               prev= gw_route, gw_route= gw_route->ort_nextgw)
        {
                if (gw_route->ort_gateway == gateway)
                {
@@ -399,6 +408,92 @@ oroute_t **oroute_p;
        return NW_OK;
 }
 
+PUBLIC int ipr_del_oroute(port_nr, dest, subnetmask, gateway, static_route)
+int port_nr;
+ipaddr_t dest;
+ipaddr_t subnetmask;
+ipaddr_t gateway;
+int static_route;
+{
+       int i;
+       oroute_t *oroute;
+
+       for(i= 0, oroute= oroute_table; i<OROUTE_NR; i++, oroute++)
+       {
+               if ((oroute->ort_flags & ORTF_INUSE) == 0)
+                       continue;
+               if (oroute->ort_port != port_nr ||
+                       oroute->ort_dest != dest ||
+                       oroute->ort_subnetmask != subnetmask ||
+                       oroute->ort_gateway != gateway)
+               {
+                       continue;
+               }
+               if (!!(oroute->ort_flags & ORTF_STATIC) != static_route)
+                       continue;
+               break;
+       }
+
+       if (i == OROUTE_NR)
+               return ESRCH;
+
+       if (static_route)
+               static_oroute_nr--;
+
+       oroute_del(oroute);
+       oroute->ort_flags &= ~ORTF_INUSE;
+       return NW_OK;
+}
+
+
+
+PUBLIC void ipr_chk_otab(port_nr, addr, mask)
+int port_nr;
+ipaddr_t addr;
+ipaddr_t mask;
+{
+       int i;
+       oroute_t *oroute;
+
+       DBLOCK(1,
+               printf("ip[%d] (ipr_chk_otab): addr ", port_nr);
+               writeIpAddr(addr);
+               printf(" mask ");
+               writeIpAddr(mask);
+               printf("\n");
+       );
+
+       if (addr == 0)
+       {
+               /* Special hack to flush entries for an interface that
+                * goes down.
+                */
+               addr= mask= HTONL(0xffffffff);
+       }
+
+       for(i= 0, oroute= oroute_table; i<OROUTE_NR; i++, oroute++)
+       {
+               if ((oroute->ort_flags & ORTF_INUSE) == 0)
+                       continue;
+               if (oroute->ort_port != port_nr ||
+                       ((oroute->ort_gateway ^ addr) & mask) == 0)
+               {
+                       continue;
+               }
+               DBLOCK(1, printf("ip[%d] (ipr_chk_otab): deleting route to ",
+                               port_nr);
+                       writeIpAddr(oroute->ort_dest);
+                       printf(" gw ");
+                       writeIpAddr(oroute->ort_gateway);
+                       printf("\n"));
+
+               if (oroute->ort_flags & ORTF_STATIC)
+                       static_oroute_nr--;
+               oroute_del(oroute);
+               oroute->ort_flags &= ~ORTF_INUSE;
+       }
+}
+
 
 PUBLIC void ipr_gateway_down(port_nr, gateway, timeout)
 int port_nr;
@@ -421,7 +516,8 @@ time_t timeout;
                        continue;
                result= ipr_add_oroute(port_nr, route_ind->ort_dest, 
                        route_ind->ort_subnetmask, gateway, 
-                       timeout, ORTD_UNREACHABLE, FALSE, 0, NULL);
+                       timeout, ORTD_UNREACHABLE, route_ind->ort_mtu,
+                       FALSE, 0, NULL);
                assert(result == NW_OK);
        }
 }
@@ -440,13 +536,14 @@ time_t timeout;
 
        if (!oroute)
        {
-               DBLOCK(1, printf("got a dest unreachable for ");
+               DBLOCK(1, printf("ip[%d]: got a dest unreachable for ",
+                       port_nr);
                        writeIpAddr(dest); printf("but no route present\n"));
 
                return;
        }
        result= ipr_add_oroute(port_nr, dest, netmask, oroute->ort_gateway, 
-               timeout, ORTD_UNREACHABLE, FALSE, 0, NULL);
+               timeout, ORTD_UNREACHABLE, oroute->ort_mtu, FALSE, 0, NULL);
        assert(result == NW_OK);
 }
 
@@ -461,29 +558,41 @@ ipaddr_t new_gateway;
 time_t timeout;
 {
        oroute_t *oroute;
+       ip_port_t *ip_port;
        int result;
 
+       ip_port= &ip_port_table[port_nr];
        oroute= oroute_find_ent(port_nr, dest);
 
        if (!oroute)
        {
-               DBLOCK(1, printf("got a redirect for ");
+               DBLOCK(1, printf("ip[%d]: got a redirect for ", port_nr);
                        writeIpAddr(dest); printf("but no route present\n"));
                return;
        }
        if (oroute->ort_gateway != old_gateway)
        {
-               DBLOCK(1, printf("got a redirect from ");
+               DBLOCK(1, printf("ip[%d]: got a redirect from ", port_nr);
                        writeIpAddr(old_gateway); printf(" for ");
                        writeIpAddr(dest); printf(" but curr gateway is ");
                        writeIpAddr(oroute->ort_gateway); printf("\n"));
                return;
        }
+       if ((new_gateway ^ ip_port->ip_ipaddr) & ip_port->ip_subnetmask)
+       {
+               DBLOCK(1, printf("ip[%d]: redirect from ", port_nr);
+                       writeIpAddr(old_gateway); printf(" for ");
+                       writeIpAddr(dest); printf(" but new gateway ");
+                       writeIpAddr(new_gateway);
+                       printf(" is not on local subnet\n"));
+               return;
+       }
        if (oroute->ort_flags & ORTF_STATIC)
        {
                if (oroute->ort_dest == dest)
                {
-                       DBLOCK(1, printf("got a redirect for ");
+                       DBLOCK(1, printf("ip[%d]: got a redirect for ",
+                               port_nr);
                                writeIpAddr(dest);
                                printf("but route is fixed\n"));
                        return;
@@ -493,11 +602,11 @@ time_t timeout;
        {
                result= ipr_add_oroute(port_nr, dest, netmask, 
                        oroute->ort_gateway, HZ, ORTD_UNREACHABLE, 
-                       FALSE, 0, NULL);
+                       oroute->ort_mtu, FALSE, 0, NULL);
                assert(result == NW_OK);
        }
        result= ipr_add_oroute(port_nr, dest, netmask, new_gateway,
-               timeout, 1, FALSE, 0, NULL);
+               timeout, 1, oroute->ort_mtu, FALSE, 0, NULL);
        assert(result == NW_OK);
 }
 
@@ -516,18 +625,20 @@ time_t timeout;
 
        if (!oroute)
        {
-               DBLOCK(1, printf("got a ttl exceeded for ");
+               DBLOCK(1, printf("ip[%d]: got a ttl exceeded for ",
+                       port_nr);
                        writeIpAddr(dest); printf("but no route present\n"));
                return;
        }
 
        new_dist= oroute->ort_dist * 2;
-       if (new_dist>IP_MAX_TTL)
+       if (new_dist > IP_DEF_TTL)
        {
                new_dist= oroute->ort_dist+1;
-               if (new_dist>IP_MAX_TTL)
+               if (new_dist >= IP_DEF_TTL)
                {
-                       DBLOCK(1, printf("got a ttl exceeded for ");
+                       DBLOCK(1, printf("ip[%d]: got a ttl exceeded for ",
+                               port_nr);
                                writeIpAddr(dest);
                                printf(" but dist is %d\n",
                                oroute->ort_dist));
@@ -536,7 +647,37 @@ time_t timeout;
        }
 
        result= ipr_add_oroute(port_nr, dest, netmask, oroute->ort_gateway, 
-               timeout, new_dist, FALSE, 0, NULL);
+               timeout, new_dist, oroute->ort_mtu, FALSE, 0, NULL);
+       assert(result == NW_OK);
+}
+
+PUBLIC void ipr_mtu(port_nr, dest, mtu, timeout)
+int port_nr;
+ipaddr_t dest;
+u16_t mtu;
+time_t timeout;
+{
+       oroute_t *oroute;
+       int result;
+
+       oroute= oroute_find_ent(port_nr, dest);
+
+       if (!oroute)
+       {
+               DBLOCK(1, printf("ip[%d]: got a mtu exceeded for ",
+                       port_nr);
+                       writeIpAddr(dest); printf("but no route present\n"));
+               return;
+       }
+
+       if (mtu <  IP_MIN_MTU)
+               return;
+       if (oroute->ort_mtu && mtu >= oroute->ort_mtu)
+               return;         /* Only decrease mtu */
+
+       result= ipr_add_oroute(port_nr, dest, HTONL(0xffffffff),
+               oroute->ort_gateway, timeout, oroute->ort_dist, mtu,
+               FALSE, 0, NULL);
        assert(result == NW_OK);
 }
 
@@ -572,6 +713,7 @@ nwio_route_t *route_ent;
                        route_ent->nwr_flags |= NWRF_STATIC;
        }
        route_ent->nwr_pref= oroute->ort_pref;
+       route_ent->nwr_mtu= oroute->ort_mtu;
        route_ent->nwr_ifaddr= ip_get_ifaddr(oroute->ort_port);
        return NW_OK;
 }
@@ -581,12 +723,13 @@ PRIVATE oroute_t *oroute_find_ent(port_nr, dest)
 int port_nr;
 ipaddr_t dest;
 {
-       int hash, i, r_hash_ind;
+       int hash;
        oroute_hash_t *oroute_hash;
        oroute_hash_t tmp_hash;
        oroute_t *oroute, *bestroute;
        time_t currtim;
        unsigned long hash_tmp;
+       u32_t tmp_mask;
 
        currtim= get_time();
 
@@ -645,8 +788,11 @@ ipaddr_t dest;
                        continue;
                }
                assert(oroute->ort_dest != bestroute->ort_dest);
-               if (ntohl(oroute->ort_subnetmask) > 
-                       ntohl(bestroute->ort_subnetmask))
+               /* Using two ntohl macros in one expression
+                * is not allowed (tmp_l is modified twice)
+                */
+               tmp_mask= ntohl(oroute->ort_subnetmask);
+               if (tmp_mask > ntohl(bestroute->ort_subnetmask))
                {
                        bestroute= oroute;
                        continue;
@@ -670,6 +816,19 @@ oroute_t *oroute;
 {
        oroute_t *prev, *nw_route, *gw_route, *dist_route, *prev_route;
 
+       DBLOCK(0x10, 
+               printf("ip[%d]: deleting oroute to ", oroute->ort_port);
+               writeIpAddr(oroute->ort_dest);
+               printf("["); writeIpAddr(oroute->ort_subnetmask);
+               printf("] through ");
+               writeIpAddr(oroute->ort_gateway);
+               printf(
+       " timestamp %lds, timeout: %lds, distance %d pref %ld mtu %ld ",
+                       (long)oroute->ort_timestamp/HZ,
+                       (long)oroute->ort_exp_tim/HZ, oroute->ort_dist,
+                       (long)oroute->ort_pref, (long)oroute->ort_mtu);
+               printf("flags 0x%x\n", oroute->ort_flags));
+
        for (prev= NULL, nw_route= oroute_head; nw_route; 
                                prev= nw_route, nw_route= nw_route->ort_nextnw)
        {
@@ -735,6 +894,8 @@ oroute_t *oroute;
        int best_dist, best_pref;
 
        best= NULL;
+       best_dist= best_pref= 0;
+       best_prev= NULL;
        for (prev= NULL, r= oroute; r; prev= r, r= r->ort_nextdist)
        {
                if (best == NULL)
@@ -777,6 +938,8 @@ oroute_t *oroute;
        int best_dist, best_pref;
 
        best= NULL;
+       best_dist= best_pref= 0;
+       best_prev= NULL;
        for (prev= NULL, r= oroute; r; prev= r, r= r->ort_nextgw)
        {
                if (best == NULL)
@@ -812,7 +975,7 @@ oroute_t *oroute;
 }
 
 
-PRIVATE        oroute_uncache_nw(dest, netmask)
+PRIVATE        void oroute_uncache_nw(dest, netmask)
 ipaddr_t dest;
 ipaddr_t netmask;
 {
@@ -849,6 +1012,7 @@ nwio_route_t *route_ent;
 
        iroute= &iroute_table[ent_no];
 
+       route_ent->nwr_ent_no= ent_no;
        route_ent->nwr_ent_count= IROUTE_NR;
        route_ent->nwr_dest= iroute->irt_dest;
        route_ent->nwr_netmask= iroute->irt_subnetmask;
@@ -864,23 +1028,38 @@ nwio_route_t *route_ent;
                        route_ent->nwr_flags |= NWRF_UNREACHABLE;
        }
        route_ent->nwr_pref= 0;
+       route_ent->nwr_mtu= iroute->irt_mtu;
        route_ent->nwr_ifaddr= ip_get_ifaddr(iroute->irt_port);
        return NW_OK;
 }
 
 
 PUBLIC int ipr_add_iroute(port_nr, dest, subnetmask, gateway, 
-       dist, static_route, iroute_p)
+       dist, mtu, static_route, iroute_p)
 int port_nr;
 ipaddr_t dest;
 ipaddr_t subnetmask;
 ipaddr_t gateway;
 int dist;
+int mtu;
 int static_route;
 iroute_t **iroute_p;
 {
        int i;
        iroute_t *iroute, *unused_route;
+       ip_port_t *ip_port;
+
+       ip_port= &ip_port_table[port_nr];
+
+       /* Check gateway */
+       if (((gateway ^ ip_port->ip_ipaddr) & ip_port->ip_subnetmask) != 0 &&
+               gateway != 0)
+       {
+               DBLOCK(1, printf("ip[%d] (ipr_add_iroute): invalid gateway: ",
+                       port_nr);
+                       writeIpAddr(gateway); printf("\n"));
+               return EINVAL;
+       }
 
        unused_route= NULL;
        if (static_route)
@@ -932,6 +1111,7 @@ iroute_t **iroute_p;
        iroute->irt_subnetmask= subnetmask;
        iroute->irt_gateway= gateway;
        iroute->irt_dist= dist;
+       iroute->irt_mtu= mtu;
        iroute->irt_flags= IRTF_INUSE;
        if (static_route)
                iroute->irt_flags |= IRTF_STATIC;
@@ -943,13 +1123,11 @@ iroute_t **iroute_p;
 }
 
 
-PUBLIC int ipr_del_iroute(port_nr, dest, subnetmask, gateway, 
-       dist, static_route)
+PUBLIC int ipr_del_iroute(port_nr, dest, subnetmask, gateway, static_route)
 int port_nr;
 ipaddr_t dest;
 ipaddr_t subnetmask;
 ipaddr_t gateway;
-int dist;
 int static_route;
 {
        int i;
@@ -983,7 +1161,63 @@ int static_route;
 }
 
 
-PRIVATE        iroute_uncache_nw(dest, netmask)
+PUBLIC void ipr_chk_itab(port_nr, addr, mask)
+int port_nr;
+ipaddr_t addr;
+ipaddr_t mask;
+{
+       int i;
+       iroute_t *iroute;
+
+       DBLOCK(1,
+               printf("ip[%d] (ipr_chk_itab): addr ", port_nr);
+               writeIpAddr(addr);
+               printf(" mask ");
+               writeIpAddr(mask);
+               printf("\n");
+       );
+
+       if (addr == 0)
+       {
+               /* Special hack to flush entries for an interface that
+                * goes down.
+                */
+               addr= mask= HTONL(0xffffffff);
+       }
+
+       for(i= 0, iroute= iroute_table; i<IROUTE_NR; i++, iroute++)
+       {
+               if ((iroute->irt_flags & IRTF_INUSE) == 0)
+                       continue;
+               if (iroute->irt_port != port_nr)
+                       continue;
+               if (iroute->irt_gateway == 0)
+               {
+                       /* Special case: attached network. */
+                       if (iroute->irt_subnetmask == mask &&
+                               iroute->irt_dest == (addr & mask))
+                       {
+                               /* Nothing changed. */
+                               continue;
+                       }
+               }
+               if (((iroute->irt_gateway ^ addr) & mask) == 0)
+                       continue;
+
+               DBLOCK(1, printf("ip[%d] (ipr_chk_itab): deleting route to ",
+                               port_nr);
+                   writeIpAddr(iroute->irt_dest);
+                   printf(" gw ");
+                   writeIpAddr(iroute->irt_gateway);
+                   printf("\n"));
+
+               iroute_uncache_nw(iroute->irt_dest, iroute->irt_subnetmask);
+               iroute->irt_flags &= ~IRTF_INUSE;
+       }
+}
+
+
+PRIVATE        void iroute_uncache_nw(dest, netmask)
 ipaddr_t dest;
 ipaddr_t netmask;
 {
@@ -1008,9 +1242,5 @@ ipaddr_t netmask;
 
 
 /*
- * Debugging, management
- */
-
-/*
- * $PchId: ipr.c,v 1.9 1996/07/31 17:26:33 philip Exp $
+ * $PchId: ipr.c,v 1.23 2003/01/22 11:49:58 philip Exp $
  */
index 552515d761a261b073bd9529285b4818aea7cdda..bece6b056f2a7c6f66246e203001c569f7147a1a 100644 (file)
@@ -14,6 +14,7 @@ typedef struct oroute
        ipaddr_t ort_subnetmask;
        int ort_dist;
        i32_t ort_pref;
+       u32_t ort_mtu;
        ipaddr_t ort_gateway;
        time_t ort_exp_tim;
        time_t ort_timestamp;
@@ -36,6 +37,7 @@ typedef struct iroute
        ipaddr_t irt_gateway;
        ipaddr_t irt_subnetmask;
        int irt_dist;
+       u32_t irt_mtu;
        int irt_port;
        int irt_flags;
 } iroute_t;
@@ -50,22 +52,28 @@ typedef struct iroute
 #define IPR_TTL_TIMEOUT                (60L * HZ)
 #define IPR_REDIRECT_TIMEOUT   (20 * 60L * HZ)
 #define IPR_GW_DOWN_TIMEOUT    (60L * HZ)
+#define IPR_MTU_TIMEOUT                (10*60L * HZ)   /* RFC-1191 */
 
 /* Prototypes */
 
 iroute_t *iroute_frag ARGS(( int port_nr, ipaddr_t dest ));
-int oroute_frag ARGS(( int port_nr, ipaddr_t dest, int ttl, 
+int oroute_frag ARGS(( int port_nr, ipaddr_t dest, int ttl, size_t msgsize,
                                                        ipaddr_t *nexthop ));
 void ipr_init ARGS(( void ));
 int ipr_get_iroute ARGS(( int ent_no, nwio_route_t *route_ent ));
 int ipr_add_iroute ARGS(( int port_nr, ipaddr_t dest, ipaddr_t subnetmask, 
-       ipaddr_t gateway, int dist, int static_route, iroute_t **route_p ));
+       ipaddr_t gateway, int dist, int mtu, int static_route,
+       iroute_t **route_p ));
 int ipr_del_iroute ARGS(( int port_nr, ipaddr_t dest, ipaddr_t subnetmask, 
-       ipaddr_t gateway, int dist, int static_route ));
+       ipaddr_t gateway, int static_route ));
+void ipr_chk_itab ARGS(( int port_nr, ipaddr_t addr, ipaddr_t mask ));
 int ipr_get_oroute ARGS(( int ent_no, nwio_route_t *route_ent ));
 int ipr_add_oroute ARGS(( int port_nr, ipaddr_t dest, ipaddr_t subnetmask, 
-       ipaddr_t gateway, time_t timeout, int dist, int static_route,
+       ipaddr_t gateway, time_t timeout, int dist, int mtu, int static_route,
        i32_t preference, oroute_t **route_p ));
+int ipr_del_oroute ARGS(( int port_nr, ipaddr_t dest, ipaddr_t subnetmask, 
+       ipaddr_t gateway, int static_route ));
+void ipr_chk_otab ARGS(( int port_nr, ipaddr_t addr, ipaddr_t mask ));
 void ipr_gateway_down ARGS(( int port_nr, ipaddr_t gateway, time_t timeout ));
 void ipr_redirect ARGS(( int port_nr, ipaddr_t dest, ipaddr_t subnetmask,
        ipaddr_t old_gateway, ipaddr_t new_gateway, time_t timeout ));
@@ -73,9 +81,10 @@ void ipr_destunrch ARGS(( int port_nr, ipaddr_t dest, ipaddr_t subnetmask,
        time_t timeout ));
 void ipr_ttl_exc ARGS(( int port_nr, ipaddr_t dest, ipaddr_t subnetmask,
        time_t timeout ));
+void ipr_mtu ARGS(( int port_nr, ipaddr_t dest, U16_t mtu, time_t timeout ));
 
 #endif /* IPR_H */
 
 /*
- * $PchId: ipr.h,v 1.4 1995/11/21 06:45:27 philip Exp $
+ * $PchId: ipr.h,v 1.8 2002/06/09 07:48:11 philip Exp $
  */
index 7fdeb443b200a3aff4566722c178aaad86281eb1..5860564427f383417acf15d744bfd53381fdbf4f 100644 (file)
@@ -17,8 +17,6 @@ Copyright 1995 Philip Homburg
 #include "psip.h"
 #include "sr.h"
 
-#if ENABLE_PSIP
-
 THIS_FILE
 
 typedef struct psip_port
@@ -55,21 +53,24 @@ typedef struct psip_fd
 #define PFF_INUSE      1
 #define PFF_READ_IP    2
 #define PFF_PROMISC    4
+#define PFF_NEXTHOP    8
 
 PRIVATE psip_port_t *psip_port_table;
 PRIVATE psip_fd_t psip_fd_table[PSIP_FD_NR];
 
 FORWARD int psip_open ARGS(( int port, int srfd,
        get_userdata_t get_userdata, put_userdata_t put_userdata,
-       put_pkt_t pkt_pkt ));
+       put_pkt_t pkt_pkt, select_res_t select_res ));
 FORWARD int psip_ioctl ARGS(( int fd, ioreq_t req ));
 FORWARD int psip_read ARGS(( int fd, size_t count ));
 FORWARD int psip_write ARGS(( int fd, size_t count ));
+FORWARD int psip_select ARGS(( int port_nr, unsigned operations ));
 FORWARD void psip_close ARGS(( int fd ));
 FORWARD int psip_cancel ARGS(( int fd, int which_operation ));
 FORWARD void promisc_restart_read ARGS(( psip_port_t *psip_port ));
 FORWARD int psip_setopt ARGS(( psip_fd_t *psip_fd, nwio_psipopt_t *newoptp ));
 FORWARD void psip_buffree ARGS(( int priority ));
+FORWARD void check_promisc ARGS(( psip_port_t *psip_port ));
 #ifdef BUF_CONSISTENCY_CHECK
 FORWARD void psip_bufcheck ARGS(( void ));
 #endif
@@ -89,22 +90,18 @@ PUBLIC void psip_init()
        psip_port_t *psip_port;
        psip_fd_t *psip_fd;
 
-#if ZERO
        for (i=0, psip_port= psip_port_table; i<psip_conf_nr; i++, psip_port++)
                psip_port->pp_flags= PPF_EMPTY;
 
        for (i=0, psip_fd= psip_fd_table; i<PSIP_FD_NR; i++, psip_fd++)
                psip_fd->pf_flags= PFF_EMPTY;
-#endif
 
        for (i=0, psip_port= psip_port_table; i<psip_conf_nr; i++, psip_port++)
        {
                psip_port->pp_flags |= PPF_CONFIGURED;
-#if ZERO
                psip_port->pp_opencnt= 0;
                psip_port->pp_rd_head= NULL;
                psip_port->pp_promisc_head= NULL;
-#endif
        }
 
 #ifndef BUF_CONSISTENCY_CHECK
@@ -120,30 +117,34 @@ int ip_port_nr;
 {
        psip_port_t *psip_port;
 
-       assert(port_nr >= 0 && port_nr < psip_conf_nr);
+       assert(port_nr >= 0);
+       if (port_nr >= psip_conf_nr)
+               return -1;
 
        psip_port= &psip_port_table[port_nr];
-       assert(psip_port->pp_flags & PPF_CONFIGURED);
+       if (!(psip_port->pp_flags &PPF_CONFIGURED))
+               return -1;
 
        psip_port->pp_ipdev= ip_port_nr;
        psip_port->pp_flags |= PPF_ENABLED;
 
        sr_add_minor(if2minor(psip_conf[port_nr].pc_ifno, PSIP_DEV_OFF),
                port_nr, psip_open, psip_close, psip_read,
-               psip_write, psip_ioctl, psip_cancel);
+               psip_write, psip_ioctl, psip_cancel, psip_select);
 
        return NW_OK;
 }
 
-PUBLIC int psip_send(port_nr, pack)
+PUBLIC int psip_send(port_nr, dest, pack)
 int port_nr;
+ipaddr_t dest;
 acc_t *pack;
 {
        psip_port_t *psip_port;
        psip_fd_t *psip_fd, *mark_fd;
        int i, result, result1;
-       size_t buf_size;
-       acc_t *hdr_pack;
+       size_t buf_size, extrasize;
+       acc_t *hdr_pack, *acc;
        psip_io_hdr_t *hdr;
 
        assert(port_nr >= 0 && port_nr < psip_conf_nr);
@@ -182,8 +183,13 @@ acc_t *pack;
                assert(psip_fd->pf_flags & PFF_READ_IP);
                psip_fd->pf_flags &= ~PFF_READ_IP;
 
+               if (psip_fd->pf_flags & PFF_NEXTHOP)
+                       extrasize= sizeof(dest);
+               else
+                       extrasize= 0;
+
                buf_size= bf_bufsize(pack);
-               if (buf_size <= psip_fd->pf_rd_count)
+               if (buf_size+extrasize <= psip_fd->pf_rd_count)
                {
                        if (psip_port->pp_flags & PPF_PROMISC)
                        {
@@ -192,6 +198,7 @@ acc_t *pack;
                                hdr= (psip_io_hdr_t *)ptr2acc_data(hdr_pack);
                                memset(hdr, '\0', sizeof(*hdr));
                                hdr->pih_flags |= PF_LOC2REM;
+                               hdr->pih_nexthop= dest;
 
                                pack->acc_linkC++;
                                hdr_pack->acc_next= pack;
@@ -212,6 +219,17 @@ acc_t *pack;
                                            promisc_restart_read(psip_port);
                                }
                        }
+
+                       if (extrasize)
+                       {
+                               /* Prepend nexthop address */
+                               acc= bf_memreq(sizeof(dest));
+                               *(ipaddr_t *)(ptr2acc_data(acc))= dest;
+                               acc->acc_next= pack;
+                               pack= acc; acc= NULL;
+                               buf_size += extrasize;
+                       }
+
                        result= (*psip_fd->pf_put_userdata)(psip_fd->pf_srfd, 
                                (size_t)0, pack, FALSE);
                        if (result == NW_OK)
@@ -230,12 +248,14 @@ acc_t *pack;
        return NW_SUSPEND;
 }
 
-PRIVATE int psip_open(port, srfd, get_userdata, put_userdata, put_pkt)
+PRIVATE int psip_open(port, srfd, get_userdata, put_userdata, put_pkt,
+       select_res)
 int port;
 int srfd;
 get_userdata_t get_userdata;
 put_userdata_t put_userdata;
 put_pkt_t put_pkt;
+select_res_t select_res;
 {
        psip_port_t *psip_port;
        psip_fd_t *psip_fd;
@@ -272,6 +292,7 @@ ioreq_t req;
        int result;
        psip_fd_t *psip_fd;
        acc_t *data;
+       nwio_ipconf_t *ipconfp;
        nwio_psipopt_t *psip_opt, *newoptp;
 
        assert(fd >= 0 && fd < PSIP_FD_NR);
@@ -279,6 +300,22 @@ ioreq_t req;
 
        switch(req)
        {
+       case NWIOSIPCONF:
+               data= (*psip_fd->pf_get_userdata)(psip_fd->pf_srfd, 0, 
+                       sizeof(*ipconfp), TRUE);
+               if (!data)
+               {
+                       result= EFAULT;
+                       break;
+               }
+               data= bf_packIffLess(data, sizeof(*ipconfp));
+               assert (data->acc_length == sizeof(*ipconfp));
+
+               ipconfp= (nwio_ipconf_t *)ptr2acc_data(data);
+               result= ip_setconf(psip_fd->pf_port->pp_ipdev, ipconfp);
+               bf_afree(data);
+               reply_thr_get(psip_fd, result, TRUE);
+               break;
        case NWIOSPSIPOPT:
                data= (*psip_fd->pf_get_userdata)(psip_fd->pf_srfd, 0, 
                        sizeof(*psip_opt), TRUE);
@@ -303,7 +340,15 @@ ioreq_t req;
                        else
                        {
                                psip_fd->pf_flags &= ~PFF_PROMISC;
-                               /* XXX check port flags */
+                               check_promisc(psip_fd->pf_port);
+                       }
+                       if (psip_fd->pf_psipopt.nwpo_flags & NWPO_EN_NEXTHOP)
+                       {
+                               psip_fd->pf_flags |= PFF_NEXTHOP;
+                       }
+                       else
+                       {
+                               psip_fd->pf_flags &= ~PFF_NEXTHOP;
                        }
                }
                reply_thr_get(psip_fd, result, TRUE);
@@ -331,11 +376,36 @@ size_t count;
 {
        psip_port_t *psip_port;
        psip_fd_t *psip_fd;
+       acc_t *pack;
+       size_t buf_size;
+       int result, result1;
 
        assert(fd >= 0 && fd < PSIP_FD_NR);
        psip_fd= &psip_fd_table[fd];
        psip_port= psip_fd->pf_port;
 
+       if ((psip_fd->pf_flags & PFF_PROMISC) && psip_port->pp_promisc_head)
+       {
+               /* Deliver a queued packet. */
+               pack= psip_port->pp_promisc_head;
+               buf_size= bf_bufsize(pack);
+               if (buf_size <= count)
+               {
+                       psip_port->pp_promisc_head= pack->acc_ext_link;
+                       result= (*psip_fd->pf_put_userdata)(psip_fd->pf_srfd, 
+                               (size_t)0, pack, FALSE);
+                       if (result == NW_OK)
+                               result= buf_size;
+               }
+               else
+                       result= EPACKSIZE;
+
+               result1= (*psip_fd->pf_put_userdata)(psip_fd->pf_srfd,
+                               (size_t)result, NULL, FALSE);
+               assert(result1 == NW_OK);
+               return NW_OK;
+       }
+
        psip_fd->pf_rd_count= count;
        if (psip_port->pp_rd_head == NULL)
                psip_port->pp_rd_head= psip_fd;
@@ -345,9 +415,7 @@ size_t count;
        psip_port->pp_rd_tail= psip_fd;
 
        psip_fd->pf_flags |= PFF_READ_IP;
-       if (psip_fd->pf_flags & PFF_PROMISC)
-               promisc_restart_read(psip_port);
-       else
+       if (!(psip_fd->pf_flags & PFF_PROMISC))
                ipps_get(psip_port->pp_ipdev);
        if (psip_fd->pf_flags & PFF_READ_IP)
                return NW_SUSPEND;
@@ -362,6 +430,8 @@ size_t count;
        psip_fd_t *psip_fd;
        acc_t *pack, *hdr_pack;
        psip_io_hdr_t *hdr;
+       size_t pack_len;
+       ipaddr_t nexthop;
 
        assert(fd >= 0 && fd < PSIP_FD_NR);
        psip_fd= &psip_fd_table[fd];
@@ -376,6 +446,33 @@ size_t count;
                assert(pack == NULL);
                return NW_OK;
        }
+
+       if (psip_fd->pf_flags & PFF_NEXTHOP)
+       {
+               pack_len= bf_bufsize(pack);
+               if (pack_len <= sizeof(nexthop))
+               {
+                       /* Something strange */
+                       bf_afree(pack); pack= NULL;
+                       pack= (*psip_fd->pf_get_userdata)(psip_fd->pf_srfd,
+                               (size_t)EPACKSIZE, (size_t)0, FALSE);
+                       assert(pack == NULL);
+                       return NW_OK;
+               }
+               pack= bf_packIffLess(pack, sizeof(nexthop));
+               nexthop= *(ipaddr_t *)ptr2acc_data(pack);
+               pack= bf_delhead(pack, sizeof(nexthop));
+
+               /* Map multicast to broadcast */
+               if ((nexthop & HTONL(0xE0000000)) == HTONL(0xE0000000))
+                       nexthop= HTONL(0xffffffff);
+       }
+       else
+       {
+               /* Assume point to point */
+               nexthop= HTONL(0x00000000);
+       }
+
        if (psip_port->pp_flags & PPF_PROMISC)
        {
                /* Deal with promiscuous mode. */
@@ -383,6 +480,7 @@ size_t count;
                hdr= (psip_io_hdr_t *)ptr2acc_data(hdr_pack);
                memset(hdr, '\0', sizeof(*hdr));
                hdr->pih_flags |= PF_REM2LOC;
+               hdr->pih_nexthop= nexthop;
 
                pack->acc_linkC++;
                hdr_pack->acc_next= pack;
@@ -391,69 +489,55 @@ size_t count;
                {
                        /* Append at the end. */
                        psip_port->pp_promisc_tail->acc_ext_link= hdr_pack;
+                       psip_port->pp_promisc_tail= hdr_pack;
                }
                else
                {
                        /* First packet. */
                        psip_port->pp_promisc_head= hdr_pack;
+                       psip_port->pp_promisc_tail= hdr_pack;
                        if (psip_port->pp_rd_head)
                                promisc_restart_read(psip_port);
                }
        }
-       ipps_put(psip_port->pp_ipdev, pack);
+       ipps_put(psip_port->pp_ipdev, nexthop, pack);
        pack= (*psip_fd->pf_get_userdata)(psip_fd->pf_srfd, (size_t)count,
                (size_t)0, FALSE);
        assert(pack == NULL);
        return NW_OK;
 }
 
+PRIVATE int psip_select(fd, operations)
+int fd;
+unsigned operations;
+{
+       printf("psip_select: not implemented\n");
+       return 0;
+}
+
 PRIVATE void psip_close(fd)
 int fd;
 {
        psip_port_t *psip_port;
        psip_fd_t *psip_fd;
-       acc_t *acc, *acc_next;
-       int i;
 
        assert(fd >= 0 && fd < PSIP_FD_NR);
        psip_fd= &psip_fd_table[fd];
        psip_port= psip_fd->pf_port;
 
+       if (psip_fd->pf_flags & PFF_PROMISC)
+       {
+               /* Check if the port should still be in promiscuous mode.
+                */
+               psip_fd->pf_flags &= ~PFF_PROMISC;
+               check_promisc(psip_fd->pf_port);
+       }
+
        assert(psip_port->pp_opencnt >0);
        psip_port->pp_opencnt--;
        psip_fd->pf_flags= PFF_EMPTY;
        ipps_get(psip_port->pp_ipdev);
 
-       /* Check if the port should still be in promiscuous mode. */
-       if (psip_port->pp_flags & PPF_PROMISC)
-       {
-               psip_port->pp_flags &= ~PPF_PROMISC;
-               for (i= 0, psip_fd= psip_fd_table; i<PSIP_FD_NR;
-                       i++, psip_fd++)
-               {
-                       if ((psip_fd->pf_flags & (PFF_INUSE|PFF_PROMISC)) !=
-                               (PFF_INUSE|PFF_PROMISC))
-                       {
-                               continue;
-                       }
-                       if (psip_fd->pf_port != psip_port)
-                               continue;
-                       psip_port->pp_flags |= PPF_PROMISC;
-                       break;
-               }
-               if (!(psip_port->pp_flags & PPF_PROMISC))
-               {
-                       /* Delete queued packets. */
-                       acc= psip_port->pp_promisc_head;
-                       psip_port->pp_promisc_head= NULL;
-                       while (acc)
-                       {
-                               acc_next= acc->acc_ext_link;
-                               bf_afree(acc);
-                               acc= acc_next;
-                       }
-               }
-       }
 }
 
 PRIVATE int psip_cancel(fd, which_operation)
@@ -472,22 +556,18 @@ int which_operation;
 
        switch(which_operation)
        {
-#if !CRAMPED
        case SR_CANCEL_IOCTL:
                ip_panic(( "should not be here" ));
-#endif
        case SR_CANCEL_READ:
                assert(psip_fd->pf_flags & PFF_READ_IP);
                for (prev_fd= NULL, tmp_fd= psip_port->pp_rd_head; tmp_fd;
-                               prev_fd= tmp_fd, tmp_fd= tmp_fd->pf_rd_next)
+                       prev_fd= tmp_fd, tmp_fd= tmp_fd->pf_rd_next)
                {
                        if (tmp_fd == psip_fd)
                                break;
                }
-#if !CRAMPED
                if (tmp_fd == NULL)
                        ip_panic(( "unable to find to request to cancel" ));
-#endif
                if (prev_fd == NULL)
                        psip_port->pp_rd_head= psip_fd->pf_rd_next;
                else
@@ -499,12 +579,10 @@ int which_operation;
                                                (size_t)EINTR, NULL, FALSE);
                assert(result == NW_OK);
                break;
-#if !CRAMPED
        case SR_CANCEL_WRITE:
                ip_panic(( "should not be here" ));
        default:
                ip_panic(( "invalid operation for cancel" ));
-#endif
        }
        return NW_OK;
 }
@@ -512,39 +590,36 @@ int which_operation;
 PRIVATE void promisc_restart_read(psip_port)
 psip_port_t *psip_port;
 {
-       psip_fd_t *psip_fd, *mark_fd;
+       psip_fd_t *psip_fd, *prev, *next;
        acc_t *pack;
        size_t buf_size;
-       int i, result, result1;
-
-       while (psip_port->pp_promisc_head)
+       int result, result1;
+
+       /* Overkill at the moment: just one reader in promiscious mode is
+        * allowed.
+        */
+       pack= psip_port->pp_promisc_head;
+       if (!pack)
+               return;
+       assert(pack->acc_ext_link == NULL);
+
+       for(psip_fd= psip_port->pp_rd_head, prev= NULL; psip_fd;
+               prev= psip_fd, psip_fd= psip_fd->pf_rd_next)
        {
-               mark_fd= psip_port->pp_rd_tail;
-
-               for(i= 0; i<PSIP_FD_NR; i++)
-               {
-                       psip_fd= psip_port->pp_rd_head;
-                       if (!psip_fd)
-                               return;
-                       psip_port->pp_rd_head= psip_fd->pf_rd_next;
-                       if (psip_fd->pf_flags & PFF_PROMISC)
-                               break;
-                       psip_fd->pf_rd_next= NULL;
-                       if (psip_port->pp_rd_head == NULL)
-                               psip_port->pp_rd_head= psip_fd;
-                       else
-                               psip_port->pp_rd_tail->pf_rd_next= psip_fd;
-                       psip_port->pp_rd_tail= psip_fd;
-                       if (psip_fd == mark_fd)
-                               return;
-               }
-               if (i == PSIP_FD_NR)
-                       ip_panic(( "psip'promisc_restart_read: loop" ));
+again:
+               if (!(psip_fd->pf_flags & PFF_PROMISC))
+                       continue;
+               next= psip_fd->pf_rd_next;
+               if (prev)
+                       prev->pf_rd_next= next;
+               else
+                       psip_port->pp_rd_head= next;
+               if (!next)
+                       psip_port->pp_rd_tail= prev;
 
                assert(psip_fd->pf_flags & PFF_READ_IP);
                psip_fd->pf_flags &= ~PFF_READ_IP;
 
-               pack= psip_port->pp_promisc_head;
                buf_size= bf_bufsize(pack);
                if (buf_size <= psip_fd->pf_rd_count)
                {
@@ -560,6 +635,16 @@ psip_port_t *psip_port;
                result1= (*psip_fd->pf_put_userdata)(psip_fd->pf_srfd,
                                (size_t)result, NULL, FALSE);
                assert(result1 == NW_OK);
+
+               if (psip_port->pp_promisc_head)
+               {
+                       /* Restart from the beginning */
+                       assert(result == EPACKSIZE);
+                       psip_fd= psip_port->pp_rd_head;
+                       prev= NULL;
+                       goto again;
+               }
+               break;
        }
 }
 
@@ -568,11 +653,8 @@ psip_fd_t *psip_fd;
 nwio_psipopt_t *newoptp;
 {
        nwio_psipopt_t oldopt;
-       int result;
-       unsigned int new_en_flags, new_di_flags, old_en_flags, old_di_flags,
-               all_flags, flags;
+       unsigned int new_en_flags, new_di_flags, old_en_flags, old_di_flags;
        unsigned long new_flags;
-       int i;
 
        oldopt= psip_fd->pf_psipopt;
 
@@ -593,6 +675,15 @@ nwio_psipopt_t *newoptp;
        }
 
        new_flags= ((unsigned long)new_di_flags << 16) | new_en_flags;
+       if ((new_flags & NWPO_EN_PROMISC) &&
+               (psip_fd->pf_port->pp_flags & PPF_PROMISC))
+       {
+               printf("psip_setopt: EBUSY for port %d, flags 0x%x\n",
+                       psip_fd->pf_port - psip_port_table,
+                       psip_fd->pf_port->pp_flags);
+               /* We can support only one at a time. */
+               return EBUSY;
+       }
 
        psip_fd->pf_psipopt= *newoptp;
        psip_fd->pf_psipopt.nwpo_flags= new_flags;
@@ -600,6 +691,48 @@ nwio_psipopt_t *newoptp;
        return NW_OK;
 }
 
+PRIVATE void check_promisc(psip_port)
+psip_port_t *psip_port;
+{
+       int i;
+       psip_fd_t *psip_fd;
+       acc_t *acc, *acc_next;
+
+       /* Check if the port should still be in promiscuous mode.  Overkill
+        * at the moment.
+        */
+       if (!(psip_port->pp_flags & PPF_PROMISC))
+               return;
+
+       psip_port->pp_flags &= ~PPF_PROMISC;
+       for (i= 0, psip_fd= psip_fd_table; i<PSIP_FD_NR; i++, psip_fd++)
+       {
+               if ((psip_fd->pf_flags & (PFF_INUSE|PFF_PROMISC)) !=
+                       (PFF_INUSE|PFF_PROMISC))
+               {
+                       continue;
+               }
+               if (psip_fd->pf_port != psip_port)
+                       continue;
+               printf("check_promisc: setting PROMISC for port %d\n",
+                       psip_port-psip_port_table);
+               psip_port->pp_flags |= PPF_PROMISC;
+               break;
+       }
+       if (!(psip_port->pp_flags & PPF_PROMISC))
+       {
+               /* Delete queued packets. */
+               acc= psip_port->pp_promisc_head;
+               psip_port->pp_promisc_head= NULL;
+               while (acc)
+               {
+                       acc_next= acc->acc_ext_link;
+                       bf_afree(acc);
+                       acc= acc_next;
+               }
+       }
+}
+
 PRIVATE void psip_buffree (priority)
 int priority;
 {
@@ -679,8 +812,7 @@ int for_ioctl;
        assert (!result);
 }
 
-#endif /* ENABLE_PSIP */
 
 /*
- * $PchId: psip.c,v 1.6 1996/05/07 20:50:31 philip Exp $
+ * $PchId: psip.c,v 1.15 2005/06/28 14:19:29 philip Exp $
  */
index 570ccc7e038a82755048672fee7fe5082ea2bc59..22f38d359ed07856267d62c72260e6483f629c88 100644 (file)
@@ -14,10 +14,10 @@ Copyright 1995 Philip Homburg
 void psip_prep ARGS(( void ));
 void psip_init ARGS(( void ));
 int psip_enable ARGS(( int port_nr, int ip_port_nr ));
-int psip_send ARGS(( int port_nr, acc_t *pack ));
+int psip_send ARGS(( int port_nr, ipaddr_t dest, acc_t *pack ));
 
 #endif /* PSIP_H */
 
 /*
- * $PchId: psip.h,v 1.4 1995/11/21 06:45:27 philip Exp $
+ * $PchId: psip.h,v 1.6 2001/04/19 21:16:22 philip Exp $
  */
diff --git a/servers/inet/generic/rand256.c b/servers/inet/generic/rand256.c
new file mode 100644 (file)
index 0000000..736cdbe
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+rand256.c
+
+Created:       Oct 2000 by Philip Homburg <philip@f-mnx.phicoh.com>
+
+Generate 256-bit random numbers 
+*/
+
+#include <sha2.h>
+#include "inet.h"
+#include "rand256.h"
+
+PRIVATE u32_t base_bits[8];
+
+PUBLIC void init_rand256(bits)
+u8_t bits[32];
+{
+       memcpy(base_bits, bits, sizeof(base_bits));
+}
+
+PUBLIC void rand256(bits)
+u8_t bits[32];
+{
+       u32_t a;
+       SHA256_CTX ctx;
+
+       a= ++base_bits[0];
+       if (a == 0)
+               base_bits[1]++;
+       SHA256_Init(&ctx);
+       SHA256_Update(&ctx, (unsigned char *)base_bits, sizeof(base_bits));
+       SHA256_Final(bits, &ctx);
+}
+
+/*
+ * $PchId: rand256.c,v 1.1 2005/06/28 14:13:43 philip Exp $
+ */
diff --git a/servers/inet/generic/rand256.h b/servers/inet/generic/rand256.h
new file mode 100644 (file)
index 0000000..0fd5444
--- /dev/null
@@ -0,0 +1,14 @@
+/*
+rand256.h
+
+Created:       Oct 2000 by Philip Homburg <philip@f-mnx.phicoh.com>
+
+Provide 256-bit random numbers
+*/
+
+void init_rand256 ARGS(( u8_t bits[32] ));
+void rand256 ARGS(( u8_t bits[32] ));
+
+/*
+ * $PchId: rand256.h,v 1.1 2005/06/28 14:14:05 philip Exp $
+ */
index 11c41a00f3727c1d678bd9fcdbe222b12f7e31cc..86cdaf2baf38e17b140ac3e9a2a3cf010be6f376 100644 (file)
@@ -13,6 +13,11 @@ Copyright 1995 Philip Homburg
 #define SR_CANCEL_READ 2
 #define SR_CANCEL_WRITE        3
 
+#define SR_SELECT_READ         0x01
+#define SR_SELECT_WRITE                0x02
+#define SR_SELECT_EXCEPTION    0x04
+#define SR_SELECT_POLL         0x10
+
 /* Forward struct declarations */
 
 struct acc;
@@ -21,21 +26,24 @@ struct acc;
 
 typedef int  (*sr_open_t) ARGS(( int port, int srfd,
        get_userdata_t get_userdata, put_userdata_t put_userdata,
-       put_pkt_t put_pkt ));
+       put_pkt_t put_pkt, select_res_t select_res ));
 typedef void (*sr_close_t) ARGS(( int fd ));
 typedef int (*sr_read_t) ARGS(( int fd, size_t count ));
 typedef int (*sr_write_t) ARGS(( int fd, size_t count ));
 typedef int  (*sr_ioctl_t) ARGS(( int fd, ioreq_t req ));
 typedef int  (*sr_cancel_t) ARGS(( int fd, int which_operation ));
+typedef int  (*sr_select_t) ARGS(( int fd, unsigned operations ));
 
 void sr_init ARGS(( void  ));
 void sr_add_minor ARGS(( int minor, int port, sr_open_t openf,
        sr_close_t closef, sr_read_t sr_read, sr_write_t sr_write,
-       sr_ioctl_t ioctlf, sr_cancel_t cancelf ));
+       sr_ioctl_t ioctlf, sr_cancel_t cancelf, sr_select_t selectf ));
 
 #endif /* SR_H */
 
+/* Track TCP connections back into sr (for lsof, identd, etc.) */
+EXTERN sr_cancel_t tcp_cancel_f;
 
 /*
- * $PchId: sr.h,v 1.6 1996/05/07 20:50:51 philip Exp $
+ * $PchId: sr.h,v 1.9 2005/06/28 14:19:51 philip Exp $
  */
index 2f788380cb82f9b3d6677fe42cbe5bf6e2b243fd..2add222fe524b8df21f4ccc86a633b7112a82406 100644 (file)
@@ -10,12 +10,11 @@ Copyright 1995 Philip Homburg
 #include "event.h"
 #include "type.h"
 
-#if !CRAMPED
 #include "io.h"
 #include "ip.h"
-#endif
 #include "sr.h"
 #include "assert.h"
+#include "rand256.h"
 #include "tcp.h"
 #include "tcp_int.h"
 
@@ -24,8 +23,10 @@ THIS_FILE
 PUBLIC tcp_port_t *tcp_port_table;
 PUBLIC tcp_fd_t tcp_fd_table[TCP_FD_NR];
 PUBLIC tcp_conn_t tcp_conn_table[TCP_CONN_NR];
+PUBLIC sr_cancel_t tcp_cancel_f;
 
 FORWARD void tcp_main ARGS(( tcp_port_t *port ));
+FORWARD int tcp_select ARGS(( int fd, unsigned operations ));
 FORWARD acc_t *tcp_get_data ARGS(( int fd, size_t offset,
        size_t count, int for_ioctl ));
 FORWARD int tcp_put_data ARGS(( int fd, size_t offset,
@@ -49,22 +50,23 @@ FORWARD tcp_conn_t *find_best_conn ARGS(( ip_hdr_t *ip_hdr,
        tcp_hdr_t *tcp_hdr ));
 FORWARD int maybe_listen ARGS(( ipaddr_t locaddr, Tcpport_t locport,
                                ipaddr_t remaddr, Tcpport_t remport ));
-FORWARD int conn_right4fd ARGS(( tcp_conn_t *tcp_conn, tcp_fd_t *tcp_fd ));
 FORWARD int tcp_su4connect ARGS(( tcp_fd_t *tcp_fd ));
 FORWARD void tcp_buffree ARGS(( int priority ));
 #ifdef BUF_CONSISTENCY_CHECK
 FORWARD void tcp_bufcheck ARGS(( void ));
 #endif
-FORWARD void tcp_setup_conn ARGS(( tcp_conn_t *tcp_conn ));
+FORWARD void tcp_setup_conn ARGS(( tcp_port_t *tcp_port,
+                                       tcp_conn_t *tcp_conn ));
+FORWARD u32_t tcp_rand32 ARGS(( void ));
 
 PUBLIC void tcp_prep()
 {
-       tcp_port_table= alloc(ip_conf_nr * sizeof(tcp_port_table[0]));
+       tcp_port_table= alloc(tcp_conf_nr * sizeof(tcp_port_table[0]));
 }
 
 PUBLIC void tcp_init()
 {
-       int i, j, k;
+       int i, j, k, ifno;
        tcp_fd_t *tcp_fd;
        tcp_port_t *tcp_port;
        tcp_conn_t *tcp_conn;
@@ -74,7 +76,6 @@ PUBLIC void tcp_init()
        assert (BUF_S >= sizeof(struct nwio_tcpconf));
        assert (BUF_S >= IP_MAX_HDR_SIZE + TCP_MAX_HDR_SIZE);
 
-#if ZERO
        for (i=0, tcp_fd= tcp_fd_table; i<TCP_FD_NR; i++, tcp_fd++)
        {
                tcp_fd->tf_flags= TFF_EMPTY;
@@ -86,7 +87,6 @@ PUBLIC void tcp_init()
                tcp_conn->tc_flags= TCF_EMPTY;
                tcp_conn->tc_busy= 0;
        }
-#endif
 
 #ifndef BUF_CONSISTENCY_CHECK
        bf_logon(tcp_buffree);
@@ -94,17 +94,15 @@ PUBLIC void tcp_init()
        bf_logon(tcp_buffree, tcp_bufcheck);
 #endif
 
-       for (i=0, tcp_port= tcp_port_table; i<ip_conf_nr; i++, tcp_port++)
+       for (i=0, tcp_port= tcp_port_table; i<tcp_conf_nr; i++, tcp_port++)
        {
-               tcp_port->tp_ipdev= i;
+               tcp_port->tp_ipdev= tcp_conf[i].tc_port;
 
-#if ZERO
                tcp_port->tp_flags= TPF_EMPTY;
                tcp_port->tp_state= TPS_EMPTY;
                tcp_port->tp_snd_head= NULL;
                tcp_port->tp_snd_tail= NULL;
                ev_init(&tcp_port->tp_snd_event);
-#endif
                for (j= 0; j<TCP_CONN_HASH_NR; j++)
                {
                        for (k= 0; k<4; k++)
@@ -114,12 +112,14 @@ PUBLIC void tcp_init()
                        }
                }
 
-               sr_add_minor(if2minor(ip_conf[i].ic_ifno, TCP_DEV_OFF),
+               ifno= ip_conf[tcp_port->tp_ipdev].ic_ifno;
+               sr_add_minor(if2minor(ifno, TCP_DEV_OFF),
                        i, tcp_open, tcp_close, tcp_read,
-                       tcp_write, tcp_ioctl, tcp_cancel);
+                       tcp_write, tcp_ioctl, tcp_cancel, tcp_select);
 
                tcp_main(tcp_port);
        }
+       tcp_cancel_f= tcp_cancel;
 }
 
 PRIVATE void tcp_main(tcp_port)
@@ -135,7 +135,7 @@ tcp_port_t *tcp_port;
                tcp_port->tp_state= TPS_SETPROTO;
                tcp_port->tp_ipfd= ip_open(tcp_port->tp_ipdev,
                        tcp_port->tp_ipdev, tcp_get_data,
-                       tcp_put_data, tcp_put_pkt);
+                       tcp_put_data, tcp_put_pkt, 0 /* no select_res */);
                if (tcp_port->tp_ipfd < 0)
                {
                        tcp_port->tp_state= TPS_ERROR;
@@ -206,8 +206,12 @@ tcp_port_t *tcp_port;
                tcp_conn->tc_rt_dead= TCP_DEF_RT_DEAD;
                tcp_conn->tc_stt= 0;
                tcp_conn->tc_0wnd_to= 0;
+               tcp_conn->tc_artt= TCP_DEF_RTT*TCP_RTT_SCALE;
+               tcp_conn->tc_drtt= 0;
                tcp_conn->tc_rtt= TCP_DEF_RTT;
-               tcp_conn->tc_mss= TCP_DEF_MSS;
+               tcp_conn->tc_max_mtu= tcp_port->tp_mtu;
+               tcp_conn->tc_mtu= tcp_conn->tc_max_mtu;
+               tcp_conn->tc_mtutim= 0;
                tcp_conn->tc_error= NW_OK;
                tcp_conn->tc_snd_wnd= TCP_MAX_SND_WND_SIZE;
                tcp_conn->tc_snd_cinc=
@@ -233,11 +237,57 @@ tcp_port_t *tcp_port;
                read_ip_packets(tcp_port);
                return;
 
-#if !CRAMPED
        default:
                ip_panic(( "unknown state" ));
-#endif
+               break;
+       }
+}
+
+PRIVATE int tcp_select(fd, operations)
+int fd;
+unsigned operations;
+{
+       unsigned resops;
+
+       tcp_fd_t *tcp_fd;
+       tcp_conn_t *tcp_conn;
+
+       tcp_fd= &tcp_fd_table[fd];
+       assert (tcp_fd->tf_flags & TFF_INUSE);
+
+       resops= 0;
+       if (operations & SR_SELECT_READ)
+       {
+               if (!(tcp_fd->tf_flags & TFF_CONNECTED))
+                       return ENOTCONN;        /* Is this right? */
+
+               tcp_conn= tcp_fd->tf_conn;
+
+               if (tcp_conn->tc_state == TCS_CLOSED || tcp_sel_read(tcp_conn))
+                       resops |= SR_SELECT_READ;
+               else if (!(operations & SR_SELECT_POLL))
+                       tcp_fd->tf_flags |= TFF_SEL_READ;
+       }
+       if (operations & SR_SELECT_WRITE)
+       {
+               if (!(tcp_fd->tf_flags & TFF_CONNECTED))
+                       return ENOTCONN;        /* Is this right? */
+               tcp_conn= tcp_fd->tf_conn;
+
+               if (tcp_conn->tc_state == TCS_CLOSED ||
+                       tcp_conn->tc_flags & TCF_FIN_SENT ||
+                       tcp_sel_write(tcp_conn))
+               {
+                       resops |= SR_SELECT_WRITE;
+               }
+               else if (!(operations & SR_SELECT_POLL))
+                       tcp_fd->tf_flags |= TFF_SEL_WRITE;
        }
+       if (operations & SR_SELECT_EXCEPTION)
+       {
+               printf("tcp_select: not implemented for exceptions\n");
+       }
+       return resops;
 }
 
 PRIVATE acc_t *tcp_get_data (port, offset, count, for_ioctl)
@@ -325,10 +375,8 @@ assert (count == sizeof(struct nwio_ipopt));
                }
                break;
        default:
-#if !CRAMPED
                printf("tcp_get_data(%d, 0x%x, 0x%x) called but tp_state= 0x%x\n",
                        port, offset, count, tcp_port->tp_state);
-#endif
                break;
        }
        return NW_OK;
@@ -368,6 +416,8 @@ int for_ioctl;
                        ipconf= (struct nwio_ipconf *)ptr2acc_data(data);
 assert (ipconf->nwic_flags & NWIC_IPADDR_SET);
                        tcp_port->tp_ipaddr= ipconf->nwic_ipaddr;
+                       tcp_port->tp_subnetmask= ipconf->nwic_netmask;
+                       tcp_port->tp_mtu= ipconf->nwic_mtu;
                        bf_afree(data);
                }
                break;
@@ -397,10 +447,9 @@ assert (ipconf->nwic_flags & NWIC_IPADDR_SET);
                }
                break;
        default:
-#if !CRAMPED
-               printf("tcp_put_data(%d, 0x%x, 0x%x) called but tp_state= 0x%x\n",
-       fd, offset, data, tcp_port->tp_state);
-#endif
+               printf(
+               "tcp_put_data(%d, 0x%x, %p) called but tp_state= 0x%x\n",
+                       fd, offset, data, tcp_port->tp_state);
                break;
        }
        return NW_OK;
@@ -421,10 +470,10 @@ size_t datalen;
        tcp_hdr_t *tcp_hdr;
        acc_t *ip_pack, *tcp_pack;
        size_t ip_datalen, tcp_datalen, ip_hdr_len, tcp_hdr_len;
-       u16_t sum;
+       u16_t sum, mtu;
        u32_t bits;
-       int hash;
-       ipaddr_t srcaddr, dstaddr;
+       int i, hash;
+       ipaddr_t srcaddr, dstaddr, ipaddr, mask;
        tcpport_t srcport, dstport;
 
        tcp_port= &tcp_port_table[fd];
@@ -435,7 +484,32 @@ size_t datalen;
        ip_datalen= datalen - ip_hdr_len;
        if (ip_datalen == 0)
        {
-               DBLOCK(1, printf("tcp_put_pkt: no TCP header\n"));
+               if (ip_hdr->ih_proto == 0)
+               {
+                       /* IP layer reports new IP address */
+                       ipaddr= ip_hdr->ih_src;
+                       mask= ip_hdr->ih_dst;
+                       mtu= ntohs(ip_hdr->ih_length);
+                       tcp_port->tp_ipaddr= ipaddr;
+                       tcp_port->tp_subnetmask= mask;
+                       tcp_port->tp_mtu= mtu;
+                       DBLOCK(1, printf("tcp_put_pkt: using address ");
+                               writeIpAddr(ipaddr);
+                               printf(", netmask ");
+                               writeIpAddr(mask);
+                               printf(", mtu %u\n", mtu));
+                       for (i= 0, tcp_conn= tcp_conn_table+i;
+                               i<TCP_CONN_NR; i++, tcp_conn++)
+                       {
+                               if (!(tcp_conn->tc_flags & TCF_INUSE))
+                                       continue;
+                               if (tcp_conn->tc_port != tcp_port)
+                                       continue;
+                               tcp_conn->tc_locaddr= ipaddr;
+                       }
+               }
+               else
+                       DBLOCK(1, printf("tcp_put_pkt: no TCP header\n"));
                bf_afree(data);
                return;
        }
@@ -549,7 +623,7 @@ size_t datalen;
        }
        else
                tcp_conn= NULL;
-       if (tcp_conn != NULL && tcp_conn->tc_state == TCS_CLOSED ||
+       if ((tcp_conn != NULL && tcp_conn->tc_state == TCS_CLOSED) ||
                (tcp_hdr->th_flags & THF_SYN))
        {
                tcp_conn= NULL;
@@ -583,12 +657,14 @@ size_t datalen;
 }
 
 
-PUBLIC int tcp_open (port, srfd, get_userdata, put_userdata, put_pkt)
+PUBLIC int tcp_open (port, srfd, get_userdata, put_userdata, put_pkt,
+       select_res)
 int port;
 int srfd;
 get_userdata_t get_userdata;
 put_userdata_t put_userdata;
 put_pkt_t put_pkt;
+select_res_t select_res;
 {
        int i;
        tcp_fd_t *tcp_fd;
@@ -603,7 +679,7 @@ put_pkt_t put_pkt;
        tcp_fd= &tcp_fd_table[i];
 
        tcp_fd->tf_flags= TFF_INUSE;
-       tcp_fd->tf_flags |= TFF_PUSH_DATA;      /* XXX */
+       tcp_fd->tf_flags |= TFF_PUSH_DATA;
 
        tcp_fd->tf_port= &tcp_port_table[port];
        tcp_fd->tf_srfd= srfd;
@@ -613,6 +689,7 @@ put_pkt_t put_pkt;
        tcp_fd->tf_tcpopt.nwto_flags= TCP_DEF_OPT;
        tcp_fd->tf_get_userdata= get_userdata;
        tcp_fd->tf_put_userdata= put_userdata;
+       tcp_fd->tf_select_res= select_res;
        tcp_fd->tf_conn= 0;
        return i;
 }
@@ -730,9 +807,31 @@ assert (conf_acc->acc_length == sizeof(*tcp_conf));
                tcp_conn->tc_busy--;
                tcp_conn_write(tcp_conn, 0);
                if (!(tcp_fd->tf_flags & TFF_IOCTL_IP))
-                       return NW_OK;
+                       result= NW_OK;
                else
-                       return NW_SUSPEND;
+                       result= NW_SUSPEND;
+               break;
+       case NWIOTCPPUSH:
+               if (!(tcp_fd->tf_flags & TFF_CONNECTED))
+               {
+                       tcp_fd->tf_flags &= ~TFF_IOCTL_IP;
+                       reply_thr_get (tcp_fd, ENOTCONN, TRUE);
+                       result= NW_OK;
+                       break;
+               }
+               tcp_conn= tcp_fd->tf_conn;
+               tcp_conn->tc_SND_PSH= tcp_conn->tc_SND_NXT;
+               tcp_conn->tc_flags &= ~TCF_NO_PUSH;
+               tcp_conn->tc_flags |= TCF_PUSH_NOW;
+
+               /* Start the timer (if necessary) */
+               if (tcp_conn->tc_SND_TRM == tcp_conn->tc_SND_UNA)
+                       tcp_set_send_timer(tcp_conn);
+
+               tcp_conn_write(tcp_conn, 0);
+               tcp_fd->tf_flags &= ~TFF_IOCTL_IP;
+               reply_thr_get (tcp_fd, NW_OK, TRUE);
+               result= NW_OK;
                break;
        default:
                tcp_fd->tf_flags &= ~TFF_IOCTL_IP;
@@ -754,8 +853,6 @@ tcp_fd_t *tcp_fd;
        nwio_tcpconf_t *tcpconf;
        nwio_tcpconf_t oldconf, newconf;
        acc_t *data;
-       int result;
-       tcpport_t port;
        tcp_fd_t *fd_ptr;
        unsigned int new_en_flags, new_di_flags,
                old_en_flags, old_di_flags, all_flags, flags;
@@ -947,12 +1044,8 @@ tcp_fd_t *tcp_fd;
        nwio_tcpopt_t *tcpopt;
        nwio_tcpopt_t oldopt, newopt;
        acc_t *data;
-       int result;
-       tcpport_t port;
-       tcp_fd_t *fd_ptr;
        unsigned int new_en_flags, new_di_flags,
-               old_en_flags, old_di_flags, all_flags, flags;
-       int i;
+               old_en_flags, old_di_flags;
 
        data= (*tcp_fd->tf_get_userdata) (tcp_fd->tf_srfd, 0,
                sizeof(nwio_tcpopt_t), TRUE);
@@ -968,11 +1061,9 @@ assert (data->acc_length == sizeof(nwio_tcpopt_t));
        newopt= *tcpopt;
 
        old_en_flags= oldopt.nwto_flags & 0xffff;
-       old_di_flags= (oldopt.nwto_flags >> 16) &
-               0xffff;
+       old_di_flags= (oldopt.nwto_flags >> 16) & 0xffff;
        new_en_flags= newopt.nwto_flags & 0xffff;
-       new_di_flags= (newopt.nwto_flags >> 16) &
-               0xffff;
+       new_di_flags= (newopt.nwto_flags >> 16) & 0xffff;
        if (new_en_flags & new_di_flags)
        {
                tcp_fd->tf_flags &= ~TFF_IOCTL_IP;
@@ -981,33 +1072,24 @@ assert (data->acc_length == sizeof(nwio_tcpopt_t));
        }
 
        /* NWTO_SND_URG_MASK */
-       if (!((new_en_flags | new_di_flags) &
-               NWTO_SND_URG_MASK))
+       if (!((new_en_flags | new_di_flags) & NWTO_SND_URG_MASK))
        {
-               new_en_flags |= (old_en_flags &
-                       NWTO_SND_URG_MASK);
-               new_di_flags |= (old_di_flags &
-                       NWTO_SND_URG_MASK);
+               new_en_flags |= (old_en_flags & NWTO_SND_URG_MASK);
+               new_di_flags |= (old_di_flags & NWTO_SND_URG_MASK);
        }
 
        /* NWTO_RCV_URG_MASK */
-       if (!((new_en_flags | new_di_flags) &
-               NWTO_RCV_URG_MASK))
+       if (!((new_en_flags | new_di_flags) & NWTO_RCV_URG_MASK))
        {
-               new_en_flags |= (old_en_flags &
-                       NWTO_RCV_URG_MASK);
-               new_di_flags |= (old_di_flags &
-                       NWTO_RCV_URG_MASK);
+               new_en_flags |= (old_en_flags & NWTO_RCV_URG_MASK);
+               new_di_flags |= (old_di_flags & NWTO_RCV_URG_MASK);
        }
 
        /* NWTO_BSD_URG_MASK */
-       if (!((new_en_flags | new_di_flags) &
-               NWTO_BSD_URG_MASK))
+       if (!((new_en_flags | new_di_flags) & NWTO_BSD_URG_MASK))
        {
-               new_en_flags |= (old_en_flags &
-                       NWTO_BSD_URG_MASK);
-               new_di_flags |= (old_di_flags &
-                       NWTO_BSD_URG_MASK);
+               new_en_flags |= (old_en_flags & NWTO_BSD_URG_MASK);
+               new_di_flags |= (old_di_flags & NWTO_BSD_URG_MASK);
        }
        else
        {
@@ -1021,17 +1103,21 @@ assert (data->acc_length == sizeof(nwio_tcpopt_t));
        }
 
        /* NWTO_DEL_RST_MASK */
-       if (!((new_en_flags | new_di_flags) &
-               NWTO_DEL_RST_MASK))
+       if (!((new_en_flags | new_di_flags) & NWTO_DEL_RST_MASK))
        {
-               new_en_flags |= (old_en_flags &
-                       NWTO_DEL_RST_MASK);
-               new_di_flags |= (old_di_flags &
-                       NWTO_DEL_RST_MASK);
+               new_en_flags |= (old_en_flags & NWTO_DEL_RST_MASK);
+               new_di_flags |= (old_di_flags & NWTO_DEL_RST_MASK);
        }
 
-       newopt.nwto_flags= ((unsigned long)new_di_flags
-               << 16) | new_en_flags;
+       /* NWTO_BULK_MASK */
+       if (!((new_en_flags | new_di_flags) & NWTO_BULK_MASK))
+       {
+               new_en_flags |= (old_en_flags & NWTO_BULK_MASK);
+               new_di_flags |= (old_di_flags & NWTO_BULK_MASK);
+       }
+
+       newopt.nwto_flags= ((unsigned long)new_di_flags << 16) |
+               new_en_flags;
        tcp_fd->tf_tcpopt= newopt;
        if (newopt.nwto_flags & NWTO_SND_URG)
                tcp_fd->tf_flags |= TFF_WR_URG;
@@ -1046,13 +1132,9 @@ assert (data->acc_length == sizeof(nwio_tcpopt_t));
        if (tcp_fd->tf_conn)
        {
                if (newopt.nwto_flags & NWTO_BSD_URG)
-               {
                        tcp_fd->tf_conn->tc_flags |= TCF_BSD_URG;
-               }
                else
-               {
                        tcp_fd->tf_conn->tc_flags &= ~TCF_BSD_URG;
-               }
        }
 
        if (newopt.nwto_flags & NWTO_DEL_RST)
@@ -1060,6 +1142,11 @@ assert (data->acc_length == sizeof(nwio_tcpopt_t));
        else
                tcp_fd->tf_flags &= ~TFF_DEL_RST;
 
+       if (newopt.nwto_flags & NWTO_BULK)
+               tcp_fd->tf_flags &= ~TFF_PUSH_DATA;
+       else
+               tcp_fd->tf_flags |= TFF_PUSH_DATA;
+
        bf_afree(data);
        tcp_fd->tf_flags &= ~TFF_IOCTL_IP;
        reply_thr_get(tcp_fd, NW_OK, TRUE);
@@ -1072,20 +1159,20 @@ int fd;
 {
        tcpport_t port, nw_port;
 
-       nw_port= htons(0xC000+fd);
-       if (is_unused_port(nw_port))
-               return nw_port;
-
-       for (port= 0xC000+TCP_FD_NR; port < 0xFFFF; port++)
+       for (port= 0x8000+fd; port < 0xffff-TCP_FD_NR; port+= TCP_FD_NR)
+       {
+               nw_port= htons(port);
+               if (is_unused_port(nw_port))
+                       return nw_port;
+       }
+       for (port= 0x8000; port < 0xffff; port++)
        {
                nw_port= htons(port);
                if (is_unused_port(nw_port))
                        return nw_port;
        }
-#if !CRAMPED
        ip_panic(( "unable to find unused port (shouldn't occur)" ));
        return 0;
-#endif
 }
 
 PRIVATE int is_unused_port(port)
@@ -1103,9 +1190,9 @@ tcpport_t port;
                if (tcp_fd->tf_tcpconf.nwtc_locport == port)
                        return FALSE;
        }
-       for (i= ip_conf_nr, tcp_conn= tcp_conn_table+i;
+       for (i= tcp_conf_nr, tcp_conn= tcp_conn_table+i;
                i<TCP_CONN_NR; i++, tcp_conn++)
-               /* the first ip_conf_nr ports are special */
+               /* the first tcp_conf_nr ports are special */
        {
                if (!(tcp_conn->tc_flags & TCF_INUSE))
                        continue;
@@ -1115,8 +1202,7 @@ tcpport_t port;
        return TRUE;
 }
 
-PRIVATE int
-reply_thr_put(tcp_fd, reply, for_ioctl)
+PRIVATE int reply_thr_put(tcp_fd, reply, for_ioctl)
 tcp_fd_t *tcp_fd;
 int reply;
 int for_ioctl;
@@ -1142,7 +1228,6 @@ PUBLIC int tcp_su4listen(tcp_fd)
 tcp_fd_t *tcp_fd;
 {
        tcp_conn_t *tcp_conn;
-       acc_t *tmp_acc;
 
        tcp_conn= tcp_fd->tf_conn;
 
@@ -1157,8 +1242,7 @@ tcp_fd_t *tcp_fd;
        else
                tcp_conn->tc_remaddr= 0;
 
-       tcp_setup_conn(tcp_conn);
-       tcp_conn->tc_port= tcp_fd->tf_port;
+       tcp_setup_conn(tcp_fd->tf_port, tcp_conn);
        tcp_conn->tc_fd= tcp_fd;
        tcp_conn->tc_connInprogress= 1;
        tcp_conn->tc_orglisten= TRUE;
@@ -1179,11 +1263,10 @@ PRIVATE tcp_conn_t *find_empty_conn()
 {
        int i;
        tcp_conn_t *tcp_conn;
-       int state;
 
-       for (i=ip_conf_nr, tcp_conn= tcp_conn_table+i;
+       for (i=tcp_conf_nr, tcp_conn= tcp_conn_table+i;
                i<TCP_CONN_NR; i++, tcp_conn++)
-               /* the first ip_conf_nr connections are reserved for
+               /* the first tcp_conf_nr connections are reserved for
                 * RSTs
                 */
        {
@@ -1227,9 +1310,9 @@ ipaddr_t remaddr;
 
        assert(remport);
        assert(remaddr);
-       for (i=ip_conf_nr, tcp_conn= tcp_conn_table+i; i<TCP_CONN_NR;
+       for (i=tcp_conf_nr, tcp_conn= tcp_conn_table+i; i<TCP_CONN_NR;
                i++, tcp_conn++)
-               /* the first ip_conf_nr connections are reserved for
+               /* the first tcp_conf_nr connections are reserved for
                        RSTs */
        {
                if (tcp_conn->tc_flags == TCF_EMPTY)
@@ -1300,9 +1383,9 @@ tcp_hdr_t *tcp_hdr;
        best_level= 0;
        best_conn= NULL;
        listen_conn= NULL;
-       for (i= ip_conf_nr, tcp_conn= tcp_conn_table+i;
+       for (i= tcp_conf_nr, tcp_conn= tcp_conn_table+i;
                i<TCP_CONN_NR; i++, tcp_conn++)
-               /* the first ip_conf_nr connections are reserved for
+               /* the first tcp_conf_nr connections are reserved for
                        RSTs */
        {
                if (!(tcp_conn->tc_flags & TCF_INUSE))
@@ -1379,7 +1462,7 @@ tcp_hdr_t *tcp_hdr;
                        return NULL;
                }
 
-               for (i=0, tcp_conn= tcp_conn_table; i<ip_conf_nr;
+               for (i=0, tcp_conn= tcp_conn_table; i<tcp_conf_nr;
                        i++, tcp_conn++)
                {
                        /* find valid port to send RST */
@@ -1435,7 +1518,7 @@ tcpport_t remport;
        tcp_conn_t *tcp_conn;
        tcp_fd_t *fd;
 
-       for (i= ip_conf_nr, tcp_conn= tcp_conn_table+i;
+       for (i= tcp_conf_nr, tcp_conn= tcp_conn_table+i;
                i<TCP_CONN_NR; i++, tcp_conn++)
        {
                if (!(tcp_conn->tc_flags & TCF_INUSE))
@@ -1538,14 +1621,14 @@ size_t count;
        tcp_fd->tf_write_offset= 0;
        tcp_fd->tf_write_count= count;
 
+       /* New data may cause a segment to be sent. Clear PUSH_NOW
+        * from last NWIOTCPPUSH ioctl.
+        */
+       tcp_conn->tc_flags &= ~(TCF_NO_PUSH|TCF_PUSH_NOW);
+
        /* Start the timer (if necessary) */
-       if (tcp_conn->tc_SND_UNA == tcp_conn->tc_SND_NXT &&
-               tcp_conn->tc_transmit_seq == tcp_conn->tc_SND_UNA)
-       {
+       if (tcp_conn->tc_SND_TRM == tcp_conn->tc_SND_UNA)
                tcp_set_send_timer(tcp_conn);
-       }
-       assert(tcp_conn->tc_transmit_timer.tim_active ||
-               (tcp_print_conn(tcp_conn), printf("\n"), 0));
 
        assert(tcp_conn->tc_busy == 0);
        tcp_conn->tc_busy++;
@@ -1677,7 +1760,6 @@ int which_operation;
 {
        tcp_fd_t *tcp_fd;
        tcp_conn_t *tcp_conn;
-       int i;
 
        tcp_fd= &tcp_fd_table[fd];
 
@@ -1733,10 +1815,9 @@ assert (tcp_fd->tf_flags & TFF_IOCTL_IP);
                        break;
                }
                break;
-#if !CRAMPED
        default:
                ip_panic(( "unknown cancel request" ));
-#endif
+               break;
        }
        return NW_OK;
 }
@@ -1749,7 +1830,6 @@ PRIVATE int tcp_connect(tcp_fd)
 tcp_fd_t *tcp_fd;
 {
        tcp_conn_t *tcp_conn;
-       int state;
 
        if (!(tcp_fd->tf_flags & TFF_CONF_SET))
        {
@@ -1803,7 +1883,6 @@ PRIVATE int tcp_su4connect(tcp_fd)
 tcp_fd_t *tcp_fd;
 {
        tcp_conn_t *tcp_conn;
-       acc_t *tmp_acc;
 
        tcp_conn= tcp_fd->tf_conn;
 
@@ -1815,10 +1894,9 @@ tcp_fd_t *tcp_fd;
        tcp_conn->tc_remport= tcp_fd->tf_tcpconf.nwtc_remport;
        tcp_conn->tc_remaddr= tcp_fd->tf_tcpconf.nwtc_remaddr;
 
-       tcp_setup_conn(tcp_conn);
+       tcp_setup_conn(tcp_fd->tf_port, tcp_conn);
 
        tcp_conn->tc_fd= tcp_fd;
-       tcp_conn->tc_port= tcp_fd->tf_port;
        tcp_conn->tc_connInprogress= 1;
        tcp_conn->tc_orglisten= FALSE;
        tcp_conn->tc_state= TCS_SYN_SENT;
@@ -1835,30 +1913,6 @@ tcp_fd_t *tcp_fd;
                return NW_OK;
 }
 
-PRIVATE int conn_right4fd(tcp_conn, tcp_fd)
-tcp_fd_t *tcp_fd;
-tcp_conn_t *tcp_conn;
-{
-       unsigned long flags;
-
-       flags= tcp_fd->tf_tcpconf.nwtc_flags;
-
-       if (tcp_fd->tf_tcpconf.nwtc_locport != tcp_conn->tc_locport)
-               return FALSE;
-
-       if ((flags & NWTC_SET_RA) && tcp_fd->tf_tcpconf.nwtc_remaddr !=
-               tcp_conn->tc_remaddr)
-               return FALSE;
-
-       if ((flags & NWTC_SET_RP) && tcp_fd->tf_tcpconf.nwtc_remport !=
-               tcp_conn->tc_remport)
-               return FALSE;
-
-       if (tcp_fd->tf_port != tcp_conn->tc_port)
-               return FALSE;
-
-       return TRUE;
-}
 
 /*
 tcp_listen
@@ -1868,7 +1922,6 @@ PRIVATE int tcp_listen(tcp_fd)
 tcp_fd_t *tcp_fd;
 {
        tcp_conn_t *tcp_conn;
-       int state;
 
        if (!(tcp_fd->tf_flags & TFF_CONF_SET))
        {
@@ -2005,7 +2058,7 @@ PRIVATE void tcp_bufcheck()
        tcp_conn_t *tcp_conn;
        tcp_port_t *tcp_port;
 
-       for (i= 0, tcp_port= tcp_port_table; i<ip_conf_nr; i++, tcp_port++)
+       for (i= 0, tcp_port= tcp_port_table; i<tcp_conf_nr; i++, tcp_port++)
        {
                if (tcp_port->tp_pack)
                        bf_check_acc(tcp_port->tp_pack);
@@ -2041,7 +2094,7 @@ tcp_conn_t *tcp_conn;
                        tcp_close_connection(tcp_conn, EDSTNOTRCH);
                return;
        }
-       else if (new_ttl == TCP_DEF_TTL)
+       else if (new_ttl < TCP_DEF_TTL_NEXT)
                new_ttl= TCP_DEF_TTL_NEXT;
        else
        {
@@ -2055,14 +2108,154 @@ tcp_conn_t *tcp_conn;
        tcp_conn_write(tcp_conn, 1);
 }
 
+FORWARD u32_t mtu_table[]=
+{      /* From RFC-1191 */
+/*     Plateau    MTU    Comments                      Reference       */
+/*     ------     ---    --------                      ---------       */
+/*               65535  Official maximum MTU          RFC 791          */
+/*               65535  Hyperchannel                  RFC 1044         */
+       65535,
+       32000,    /*     Just in case                                   */
+/*               17914  16Mb IBM Token Ring           ref. [6]         */
+       17914,
+/*               8166   IEEE 802.4                    RFC 1042         */
+       8166,
+/*               4464   IEEE 802.5 (4Mb max)          RFC 1042         */
+/*               4352   FDDI (Revised)                RFC 1188         */
+       4352, /* (1%) */
+/*               2048   Wideband Network              RFC 907          */
+/*               2002   IEEE 802.5 (4Mb recommended)  RFC 1042         */
+       2002, /* (2%) */
+/*               1536   Exp. Ethernet Nets            RFC 895          */
+/*               1500   Ethernet Networks             RFC 894          */
+/*               1500   Point-to-Point (default)      RFC 1134         */
+/*               1492   IEEE 802.3                    RFC 1042         */
+       1492, /* (3%) */
+/*               1006   SLIP                          RFC 1055         */
+/*               1006   ARPANET                       BBN 1822         */
+       1006,
+/*               576    X.25 Networks                 RFC 877          */
+/*               544    DEC IP Portal                 ref. [10]        */
+/*               512    NETBIOS                       RFC 1088         */
+/*               508    IEEE 802/Source-Rt Bridge     RFC 1042         */
+/*               508    ARCNET                        RFC 1051         */
+       508, /* (13%) */
+/*               296    Point-to-Point (low delay)    RFC 1144         */
+       296,
+       68,       /*     Official minimum MTU          RFC 791          */
+       0,        /*     End of list                                    */
+};
+
+PUBLIC void tcp_mtu_exceeded(tcp_conn)
+tcp_conn_t *tcp_conn;
+{
+       u16_t mtu;
+       int i;
+       clock_t curr_time;
+
+       if (!(tcp_conn->tc_flags & TCF_PMTU))
+       {
+               /* Strange, got MTU exceeded but DF is not set. Ignore
+                * the error. If the problem persists, the connection will
+                * time-out.
+                */
+               return;
+       }
+       curr_time= get_time();
+
+       /* We get here in cases. Either were are trying to find an MTU 
+        * that works at all, or we are trying see how far we can increase
+        * the current MTU. If the last change to the MTU was a long time 
+        * ago, we assume the second case. 
+        */
+       if (curr_time >= tcp_conn->tc_mtutim + TCP_PMTU_INCR_IV)
+       {
+               mtu= tcp_conn->tc_mtu;
+               mtu -= mtu/TCP_PMTU_INCR_FRAC;
+               tcp_conn->tc_mtu= mtu;
+               tcp_conn->tc_mtutim= curr_time;
+               DBLOCK(1, printf(
+                       "tcp_mtu_exceeded: new (lowered) mtu %d for conn %d\n",
+                       mtu, tcp_conn-tcp_conn_table));
+               tcp_conn->tc_stt= 0;
+               tcp_conn->tc_SND_TRM= tcp_conn->tc_SND_UNA;
+               tcp_conn_write(tcp_conn, 1);
+               return;
+       }
+
+       tcp_conn->tc_mtutim= curr_time;
+       mtu= tcp_conn->tc_mtu;
+       for (i= 0; mtu_table[i] >= mtu; i++)
+               ;       /* Nothing to do */
+       mtu= mtu_table[i];
+       if (mtu >= TCP_MIN_PATH_MTU)
+       {
+               tcp_conn->tc_mtu= mtu;
+       }
+       else
+       {
+               /* Small MTUs can be used for denial-of-service attacks.
+                * Switch-off PMTU if the MTU becomes too small.
+                */
+               tcp_conn->tc_flags &= ~TCF_PMTU;
+               tcp_conn->tc_mtu= TCP_MIN_PATH_MTU;
+               DBLOCK(1, printf(
+                       "tcp_mtu_exceeded: clearing TCF_PMTU for conn %d\n",
+                       tcp_conn-tcp_conn_table););
+
+       }
+       DBLOCK(1, printf("tcp_mtu_exceeded: new mtu %d for conn %d\n",
+               mtu, tcp_conn-tcp_conn_table););
+       tcp_conn->tc_stt= 0;
+       tcp_conn->tc_SND_TRM= tcp_conn->tc_SND_UNA;
+       tcp_conn_write(tcp_conn, 1);
+}
+
+PUBLIC void tcp_mtu_incr(tcp_conn)
+tcp_conn_t *tcp_conn;
+{
+       clock_t curr_time;
+       u32_t mtu;
+
+       assert(tcp_conn->tc_mtu < tcp_conn->tc_max_mtu);
+       if (!(tcp_conn->tc_flags & TCF_PMTU))
+       {
+               /* Use a much longer time-out for retrying PMTU discovery
+                * after is has been disabled. Note that PMTU discovery
+                * can be disabled during a short loss of connectivity.
+                */
+               curr_time= get_time();
+               if (curr_time > tcp_conn->tc_mtutim+TCP_PMTU_EN_IV)
+               {
+                       tcp_conn->tc_flags |= TCF_PMTU;
+                       DBLOCK(1, printf(
+                               "tcp_mtu_incr: setting TCF_PMTU for conn %d\n",
+                               tcp_conn-tcp_conn_table););
+               }
+               return;
+       }
+
+       mtu= tcp_conn->tc_mtu;
+       mtu += mtu/TCP_PMTU_INCR_FRAC;
+       if (mtu > tcp_conn->tc_max_mtu)
+               mtu= tcp_conn->tc_max_mtu;
+       tcp_conn->tc_mtu= mtu;
+       DBLOCK(0x1, printf("tcp_mtu_incr: new mtu %ld for conn %d\n",
+               mtu, tcp_conn-tcp_conn_table););
+}
+
 /*
 tcp_setup_conn
 */
 
-PRIVATE void tcp_setup_conn(tcp_conn)
+PRIVATE void tcp_setup_conn(tcp_port, tcp_conn)
+tcp_port_t *tcp_port;
 tcp_conn_t *tcp_conn;
 {
+       u16_t mss;
+
        assert(!tcp_conn->tc_connInprogress);
+       tcp_conn->tc_port= tcp_port;
        if (tcp_conn->tc_flags & TCF_INUSE)
        {
                assert (tcp_conn->tc_state == TCS_CLOSED);
@@ -2082,13 +2275,13 @@ tcp_conn_t *tcp_conn;
        }
        if (!tcp_conn->tc_ISS)
        {
-               tcp_conn->tc_ISS= (get_time()/HZ)*ISS_INC_FREQ;
+               tcp_conn->tc_ISS= tcp_rand32();
        }
        tcp_conn->tc_SND_UNA= tcp_conn->tc_ISS;
        tcp_conn->tc_SND_TRM= tcp_conn->tc_ISS;
        tcp_conn->tc_SND_NXT= tcp_conn->tc_ISS+1;
        tcp_conn->tc_SND_UP= tcp_conn->tc_ISS;
-       tcp_conn->tc_SND_PSH= tcp_conn->tc_ISS;
+       tcp_conn->tc_SND_PSH= tcp_conn->tc_ISS+1;
        tcp_conn->tc_IRS= 0;
        tcp_conn->tc_RCV_LO= tcp_conn->tc_IRS;
        tcp_conn->tc_RCV_NXT= tcp_conn->tc_IRS;
@@ -2098,6 +2291,9 @@ tcp_conn_t *tcp_conn;
        assert(tcp_conn->tc_rcvd_data == NULL);
        assert(tcp_conn->tc_adv_data == NULL);
        assert(tcp_conn->tc_send_data == NULL);
+
+       tcp_conn->tc_ka_time= TCP_DEF_KEEPALIVE;
+
        tcp_conn->tc_remipopt= NULL;
        tcp_conn->tc_tcpopt= NULL;
 
@@ -2106,10 +2302,15 @@ tcp_conn_t *tcp_conn;
        tcp_conn->tc_stt= 0;
        tcp_conn->tc_rt_dead= TCP_DEF_RT_DEAD;
        tcp_conn->tc_0wnd_to= 0;
+       tcp_conn->tc_artt= TCP_DEF_RTT*TCP_RTT_SCALE;
+       tcp_conn->tc_drtt= 0;
        tcp_conn->tc_rtt= TCP_DEF_RTT;
-       tcp_conn->tc_mss= TCP_DEF_MSS;
+       tcp_conn->tc_max_mtu= tcp_conn->tc_port->tp_mtu;
+       tcp_conn->tc_mtu= tcp_conn->tc_max_mtu;
+       tcp_conn->tc_mtutim= 0;
        tcp_conn->tc_error= NW_OK;
-       tcp_conn->tc_snd_cwnd= tcp_conn->tc_SND_UNA + 2*tcp_conn->tc_mss;
+       mss= tcp_conn->tc_mtu-IP_TCP_MIN_HDR_SIZE;
+       tcp_conn->tc_snd_cwnd= tcp_conn->tc_SND_UNA + 2*mss;
        tcp_conn->tc_snd_cthresh= TCP_MAX_SND_WND_SIZE;
        tcp_conn->tc_snd_cinc=
                (long)TCP_DEF_MSS*TCP_DEF_MSS/TCP_MAX_SND_WND_SIZE+1;
@@ -2118,11 +2319,20 @@ tcp_conn_t *tcp_conn;
        tcp_conn->tc_rt_seq= 0;
        tcp_conn->tc_rt_threshold= tcp_conn->tc_ISS;
        tcp_conn->tc_flags= TCF_INUSE;
+       tcp_conn->tc_flags |= TCF_PMTU;
 
        clck_untimer(&tcp_conn->tc_transmit_timer);
        tcp_conn->tc_transmit_seq= 0;
 }
 
+PRIVATE u32_t tcp_rand32()
+{
+       u8_t bits[32];
+
+       rand256(bits);
+       return bits[0] | (bits[1] << 8) | (bits[2] << 16) | (bits[3] << 24);
+}
+
 /*
- * $PchId: tcp.c,v 1.14.2.2 1999/11/17 22:05:27 philip Exp $
+ * $PchId: tcp.c,v 1.34 2005/06/28 14:20:27 philip Exp $
  */
index 53194d07c6c4122b30f025e4f3891ce5027d0ab2..7b96d44e13069e85ee9b456e30ea1fd940b4012e 100644 (file)
@@ -7,19 +7,18 @@ Copyright 1995 Philip Homburg
 #ifndef TCP_H
 #define TCP_H
 
-#define ISS_INC_FREQ   250000L
 #define TCP_MAX_DATAGRAM       8192
 
 #ifndef TCP_MAX_SND_WND_SIZE
-#define TCP_MAX_SND_WND_SIZE   ((CRAMPED ? 4 : 16) * 1024)
+#define TCP_MAX_SND_WND_SIZE   (32*1024)
 #endif
 
 #ifndef TCP_MIN_RCV_WND_SIZE
-#define TCP_MIN_RCV_WND_SIZE   (4 * 1024)
+#define TCP_MIN_RCV_WND_SIZE   (4*1024)
 #endif
 
 #ifndef TCP_MAX_RCV_WND_SIZE
-#define TCP_MAX_RCV_WND_SIZE   ((CRAMPED ? 4 : 8) * 1024)
+#define TCP_MAX_RCV_WND_SIZE   (TCP_MIN_RCV_WND_SIZE + 28*1024)
 #endif
 
 #define TCP_DEF_TOS            0
@@ -50,22 +49,39 @@ Copyright 1995 Philip Homburg
 #define TCP_RTT_MAX            (10*HZ) /* The maximum retransmission interval
                                         * is TCP_RTT_MAX ticks
                                         */
+#define TCP_RTT_SMOOTH          16     /* weight is 15/16 */
+#define TCP_DRTT_MULT            4     /* weight of the deviation */
+#define TCP_RTT_SCALE          256     /* Scaled values for more accuracy */
+
+#ifndef TCP_DEF_KEEPALIVE
+#define TCP_DEF_KEEPALIVE      (20L*60*HZ)     /* Keepalive interval */
+#endif
 
 #ifndef TCP_DEF_MSS
 #define TCP_DEF_MSS            1400
 #endif
 
+#define TCP_MIN_PATH_MTU        500
+#define TCP_PMTU_INCR_IV       (1L*60*HZ)      /* 1 minute in ticks */
+#define TCP_PMTU_EN_IV         (10L*60*HZ)     /* 10 minutes in ticks */
+#define TCP_PMTU_INCR_FRAC     100             /* Add 1% each time */
+#define TCP_PMTU_BLACKHOLE     (10*HZ)         /* Assume a PMTU blackhole
+                                                * after 10 seconds.
+                                                */
+
 #define TCP_DEF_CONF           (NWTC_COPY | NWTC_LP_UNSET | NWTC_UNSET_RA | \
                                        NWTC_UNSET_RP)
 #define TCP_DEF_OPT            (NWTO_NOFLAG)
 
+#define TCP_DACK_RETRANS       3       /* # dup ACKs to start fast retrans. */
+
 struct acc;
 
 void tcp_prep ARGS(( void ));
 void tcp_init ARGS(( void ));
 int tcp_open ARGS(( int port, int srfd,
        get_userdata_t get_userdata, put_userdata_t put_userdata, 
-       put_pkt_t put_pkt ));
+       put_pkt_t put_pkt, select_res_t select_res ));
 int tcp_read ARGS(( int fd, size_t count));
 int tcp_write ARGS(( int fd, size_t count));
 int tcp_ioctl ARGS(( int fd, ioreq_t req));
@@ -75,5 +91,5 @@ void tcp_close ARGS(( int fd));
 #endif /* TCP_H */
 
 /*
- * $PchId: tcp.h,v 1.8 1996/05/07 20:51:37 philip Exp $
+ * $PchId: tcp.h,v 1.17 2005/06/28 14:20:54 philip Exp $
  */
index 0e59ca55a9a3a801db8c0e0fc278c4dc7d8131c3..8d6f47987d298f5357ed8b119dbf167a9ace4802 100644 (file)
@@ -7,6 +7,8 @@ Copyright 1995 Philip Homburg
 #ifndef TCP_INT_H
 #define TCP_INT_H
 
+#define IP_TCP_MIN_HDR_SIZE    (IP_MIN_HDR_SIZE+TCP_MIN_HDR_SIZE)
+
 #define TCP_CONN_HASH_SHIFT    4
 #define TCP_CONN_HASH_NR       (1 << TCP_CONN_HASH_SHIFT)
 
@@ -18,6 +20,8 @@ typedef struct tcp_port
        int tp_ipfd;
        acc_t *tp_pack;
        ipaddr_t tp_ipaddr;
+       ipaddr_t tp_subnetmask;
+       u16_t tp_mtu;
        struct tcp_conn *tp_snd_head;
        struct tcp_conn *tp_snd_tail;
        event_t tp_snd_event;
@@ -48,6 +52,7 @@ typedef struct tcp_fd
        nwio_tcpopt_t tf_tcpopt;
        get_userdata_t tf_get_userdata;
        put_userdata_t tf_put_userdata;
+       select_res_t tf_select_res;
        struct tcp_conn *tf_conn;
        size_t tf_write_offset;
        size_t tf_write_count;
@@ -57,23 +62,26 @@ typedef struct tcp_fd
 
 #define TFF_EMPTY         0x0
 #define TFF_INUSE         0x1
-#define TFF_IOCTL_IP      0x2
-#define TFF_CONF_SET      0x4
-#define TFF_IOC_INIT_SP           0x8
-#define TFF_CONNECT      0x20
-#define TFF_WRITE_IP     0x80
+#define TFF_READ_IP       0x2
+#define TFF_WRITE_IP      0x4
+#define TFF_IOCTL_IP      0x8
+#define TFF_CONF_SET     0x10
+#define TFF_IOC_INIT_SP          0x20
+#define TFF_CONNECT      0x40
+#define TFF_CONNECTED    0x80
 #define TFF_WR_URG      0x100
 #define TFF_PUSH_DATA   0x200
-#define TFF_READ_IP     0x400
-#define TFF_RECV_URG    0x800
-#define TFF_CONNECTED  0x1000
-#define TFF_DEL_RST    0x2000
+#define TFF_RECV_URG    0x400
+#define TFF_DEL_RST     0x800
+#define TFF_SEL_READ   0x1000
+#define TFF_SEL_WRITE  0x2000
+#define TFF_SEL_EXCEPT 0x4000
 
 typedef struct tcp_conn
 {
        int tc_flags;
        int tc_state;
-       int tc_busy;            /* do not steal buffer when a counnection is 
+       int tc_busy;            /* do not steal buffer when a connection is 
                                 * busy
                                 */
        tcp_port_t *tc_port;
@@ -84,11 +92,9 @@ typedef struct tcp_conn
        tcpport_t tc_remport;
        ipaddr_t tc_remaddr;
 
-#if 1
        int tc_connInprogress;
-#endif
        int tc_orglisten;
-       time_t tc_senddis;
+       clock_t tc_senddis;
 
        /* Sending side */
        u32_t tc_ISS;           /* initial sequence number */
@@ -104,12 +110,15 @@ typedef struct tcp_conn
        u32_t tc_snd_cthresh;   /* threshold for send window */
        u32_t tc_snd_cinc;      /* increment for send window threshold */
        u16_t tc_snd_wnd;       /* max send queue size */
+       u16_t tc_snd_dack;      /* # of duplicate ACKs */
 
        /* round trip calculation. */
-       time_t tc_rt_time;
+       clock_t tc_rt_time;
        u32_t tc_rt_seq;
        u32_t tc_rt_threshold;
-       time_t tc_rtt;
+       clock_t tc_artt;        /* Avg. retransmission time. Scaled. */
+       clock_t tc_drtt;        /* Diviation, also scaled. */
+       clock_t tc_rtt;         /* Computed retrans time */
 
        acc_t *tc_send_data;
        acc_t *tc_frag2send;
@@ -127,17 +136,28 @@ typedef struct tcp_conn
        acc_t *tc_adv_data;
        u32_t tc_adv_seq;
 
+       /* Keep alive. Record SDN_NXT and RCV_NXT in tc_ka_snd and
+        * tc_ka_rcv when setting the keepalive timer to detect
+        * any activity that may have happend before the timer
+        * expired.
+        */
+       u32_t tc_ka_snd;
+       u32_t tc_ka_rcv;
+       clock_t tc_ka_time;
+
        acc_t *tc_remipopt;
        acc_t *tc_tcpopt;
        u8_t tc_tos;
        u8_t tc_ttl;
-       u16_t tc_mss;
+       u16_t tc_max_mtu;       /* Max. negotiated (or selected) MTU */
+       u16_t tc_mtu;           /* discovered PMTU */
+       clock_t tc_mtutim;      /* Last time MTU/TCF_PMTU flag was changed */
 
        struct timer tc_transmit_timer;
        u32_t tc_transmit_seq;
-       time_t tc_0wnd_to;
-       time_t tc_stt;          /* time of first send after last ack */
-       time_t tc_rt_dead;
+       clock_t tc_0wnd_to;
+       clock_t tc_stt;         /* time of first send after last ack */
+       clock_t tc_rt_dead;
 
        int tc_error;
        int tc_inconsistent; 
@@ -151,6 +171,9 @@ typedef struct tcp_conn
 #define TCF_SEND_ACK           0x10
 #define TCF_FIN_SENT           0x20
 #define TCF_BSD_URG            0x40
+#define TCF_NO_PUSH            0x80
+#define TCF_PUSH_NOW           0x100
+#define TCF_PMTU               0x200
 
 #if DEBUG & 0x200
 #define TCF_DEBUG              0x1000
@@ -167,13 +190,18 @@ typedef struct tcp_conn
 void tcp_frag2conn ARGS(( tcp_conn_t *tcp_conn, ip_hdr_t *ip_hdr,
        tcp_hdr_t *tcp_hdr, acc_t *tcp_data, size_t data_len ));
 void tcp_fd_read ARGS(( tcp_conn_t *tcp_conn, int enq ));
+unsigned tcp_sel_read ARGS(( tcp_conn_t *tcp_conn ));
+void tcp_rsel_read ARGS(( tcp_conn_t *tcp_conn ));
 
 /* tcp_send.c */
 void tcp_conn_write ARGS(( tcp_conn_t *tcp_conn, int enq ));
 void tcp_release_retrans ARGS(( tcp_conn_t *tcp_conn, u32_t seg_ack,
        U16_t new_win ));
+void tcp_fast_retrans ARGS(( tcp_conn_t *tcp_conn ));
 void tcp_set_send_timer ARGS(( tcp_conn_t *tcp_conn ));
 void tcp_fd_write ARGS(( tcp_conn_t *tcp_conn ));
+unsigned tcp_sel_write ARGS(( tcp_conn_t *tcp_conn ));
+void tcp_rsel_write ARGS(( tcp_conn_t *tcp_conn ));
 void tcp_close_connection ARGS(( tcp_conn_t *tcp_conn,
        int error ));
 void tcp_port_write ARGS(( tcp_port_t *tcp_port ));
@@ -183,7 +211,7 @@ void tcp_shutdown ARGS(( tcp_conn_t *tcp_conn ));
 void tcp_extract_ipopt ARGS(( tcp_conn_t *tcp_conn,
        ip_hdr_t *ip_hdr ));
 void tcp_extract_tcpopt ARGS(( tcp_conn_t *tcp_conn,
-       tcp_hdr_t *tcp_hdr ));
+       tcp_hdr_t *tcp_hdr, size_t *mssp ));
 void tcp_get_ipopt ARGS(( tcp_conn_t *tcp_conn, ip_hdropt_t
        *ip_hdropt ));
 void tcp_get_tcpopt ARGS(( tcp_conn_t *tcp_conn, tcp_hdropt_t
@@ -207,6 +235,8 @@ void tcp_reply_ioctl ARGS(( tcp_fd_t *tcp_fd, int reply ));
 void tcp_reply_write ARGS(( tcp_fd_t *tcp_fd, size_t reply ));
 void tcp_reply_read ARGS(( tcp_fd_t *tcp_fd, size_t reply ));
 void tcp_notreach ARGS(( tcp_conn_t *tcp_conn ));
+void tcp_mtu_exceeded ARGS(( tcp_conn_t *tcp_conn ));
+void tcp_mtu_incr ARGS(( tcp_conn_t *tcp_conn ));
 
 #define TCP_FD_NR      (10*IP_PORT_MAX)
 #define TCP_CONN_NR    (2*TCP_FD_NR)
@@ -223,5 +253,5 @@ EXTERN tcp_fd_t tcp_fd_table[TCP_FD_NR];
 #endif /* TCP_INT_H */
 
 /*
- * $PchId: tcp_int.h,v 1.10 1996/05/07 20:51:59 philip Exp $
+ * $PchId: tcp_int.h,v 1.17 2005/06/28 14:21:08 philip Exp $
  */
index 243262e686724455843d95aba79e79b8983a65a1..ddd849e44f50d6be544efe98a73f5d1e50c0e397 100644 (file)
@@ -16,8 +16,6 @@ Copyright 1995 Philip Homburg
 
 THIS_FILE
 
-#if you_want_to_be_complete
-
 #undef tcp_LEmod4G
 PUBLIC int tcp_LEmod4G(n1, n2)
 u32_t n1;
@@ -49,7 +47,6 @@ u32_t n2;
 {
        return !!((u32_t)(n2-n1) & 0x80000000L);
 }
-#endif
 
 PUBLIC void tcp_extract_ipopt(tcp_conn, ip_hdr)
 tcp_conn_t *tcp_conn;
@@ -64,17 +61,62 @@ ip_hdr_t *ip_hdr;
        DBLOCK(1, printf("ip_hdr options NOT supported (yet?)\n"));
 }
 
-PUBLIC void tcp_extract_tcpopt(tcp_conn, tcp_hdr)
+PUBLIC void tcp_extract_tcpopt(tcp_conn, tcp_hdr, mssp)
 tcp_conn_t *tcp_conn;
 tcp_hdr_t *tcp_hdr;
+size_t *mssp;
 {
-       int tcp_hdr_len;
+       int i, tcp_hdr_len, type, len;
+       u8_t *cp;
+       u16_t mss;
+
+       *mssp= 0;       /* No mss */
 
        tcp_hdr_len= (tcp_hdr->th_data_off & TH_DO_MASK) >> 2;
        if (tcp_hdr_len == TCP_MIN_HDR_SIZE)
                return;
-
-       DBLOCK(2, printf("tcp_hdr options NOT supported (yet?)\n"));
+       i= TCP_MIN_HDR_SIZE;
+       while (i<tcp_hdr_len)
+       {
+               cp= ((u8_t *)tcp_hdr)+i;
+               type= cp[0];
+               if (type == TCP_OPT_NOP)
+               {
+                       i++;
+                       continue;
+               }
+               if (type == TCP_OPT_EOL)
+                       break;
+               if (i+2 > tcp_hdr_len)
+                       break;  /* No length field */
+               len= cp[1];
+               if (i+len > tcp_hdr_len)
+                       break;  /* Truncated option */
+               i += len;
+               switch(type)
+               {
+               case TCP_OPT_MSS:
+                       if (len != 4)
+                               break;
+                       mss= (cp[2] << 8) | cp[3];
+                       DBLOCK(1, printf("tcp_extract_tcpopt: got mss %d\n",
+                               mss););
+                       *mssp= mss;
+                       break;
+               case TCP_OPT_WSOPT:     /* window scale option */
+               case TCP_OPT_SACKOK:    /* SACK permitted */
+               case TCP_OPT_TS:        /* Timestamps option */
+               case TCP_OPT_CCNEW:     /* new connection count */
+                       /* Ignore this option. */
+                       break;
+               default:
+                       DBLOCK(0x1,
+                               printf(
+                       "tcp_extract_tcpopt: unknown option %d, len %d\n",
+                                       type, len));
+                       break;
+               }
+       }
 }
 
 PUBLIC u16_t tcp_pack_oneCsum(ip_hdr, tcp_pack)
@@ -239,6 +281,8 @@ acc_t *data;
        ip_hdr->ih_src= tcp_conn->tc_locaddr;
        ip_hdr->ih_dst= tcp_conn->tc_remaddr;
        ip_hdr->ih_flags_fragoff= 0;
+       if (tcp_conn->tc_flags & TCF_PMTU)
+               ip_hdr->ih_flags_fragoff |= HTONS(IH_DONT_FRAG);
 
        tcp_hdr->th_srcport= tcp_conn->tc_locport;
        tcp_hdr->th_dstport= tcp_conn->tc_remport;
@@ -251,10 +295,10 @@ acc_t *data;
        return hdr_acc;
 }
 
-#if !CRAMPED
 PUBLIC void tcp_print_state (tcp_conn)
 tcp_conn_t *tcp_conn;
 {
+#if DEBUG
        printf("tcp_conn_table[%d]->tc_state= ", tcp_conn-
                tcp_conn_table);
        if (!(tcp_conn->tc_flags & TCF_INUSE))
@@ -272,8 +316,8 @@ tcp_conn_t *tcp_conn;
        case TCS_CLOSING: printf("CLOSING"); break;
        default: printf("unknown (=%d)", tcp_conn->tc_state); break;
        }
-}
 #endif
+}
 
 PUBLIC int tcp_check_conn(tcp_conn)
 tcp_conn_t *tcp_conn;
@@ -286,9 +330,7 @@ tcp_conn_t *tcp_conn;
        if (tcp_conn->tc_inconsistent)
        {
                assert(tcp_conn->tc_inconsistent == 1);
-#if !CRAMPED
                printf("tcp_check_conn: connection is inconsistent\n");
-#endif
                return allright;
        }
 
@@ -308,38 +350,34 @@ tcp_conn_t *tcp_conn;
        size= hi_queue-lo_queue;
        if (size<0)
        {
-#if !CRAMPED
                printf("rcv hi_queue-lo_queue < 0\n");
-               printf("SND_NXT= 0x%x, SND_UNA= 0x%x\n", 
-                       tcp_conn->tc_SND_NXT, tcp_conn->tc_SND_UNA);
-               printf("lo_queue= 0x%x, hi_queue= 0x%x\n", 
-                       lo_queue, hi_queue);
+               printf("SND_NXT= 0x%lx, SND_UNA= 0x%lx\n", 
+                       (unsigned long)tcp_conn->tc_SND_NXT,
+                       (unsigned long)tcp_conn->tc_SND_UNA);
+               printf("lo_queue= 0x%lx, hi_queue= 0x%lx\n", 
+                       (unsigned long)lo_queue,
+                       (unsigned long)hi_queue);
                printf("size= %d\n", size);
-#endif
                allright= FALSE;
        }
        else if (!tcp_conn->tc_rcvd_data)
        {
                if (size)
                {
-#if !CRAMPED
                        printf("RCV_NXT-RCV_LO != 0\n");
                        tcp_print_conn(tcp_conn);
                        printf("lo_queue= %lu, hi_queue= %lu\n",
                                lo_queue, hi_queue);
-#endif
                        allright= FALSE;
                }
        }
        else if (size != bf_bufsize(tcp_conn->tc_rcvd_data))
        {
-#if !CRAMPED
                printf("RCV_NXT-RCV_LO != sizeof tc_rcvd_data\n");
                tcp_print_conn(tcp_conn);
                printf(
                "lo_queue= %lu, hi_queue= %lu, sizeof tc_rcvd_data= %d\n",
                        lo_queue, hi_queue, bf_bufsize(tcp_conn->tc_rcvd_data));
-#endif
                allright= FALSE;
        }
        else if (size != 0 && (tcp_conn->tc_state == TCS_CLOSED ||
@@ -347,18 +385,15 @@ tcp_conn_t *tcp_conn;
                tcp_conn->tc_state == TCS_SYN_RECEIVED ||
                tcp_conn->tc_state ==  TCS_SYN_SENT))
        {
-#if !CRAMPED
                printf("received data but not connected\n");
                tcp_print_conn(tcp_conn);
-#endif
                allright= FALSE;
        }
        if (tcp_Lmod4G(tcp_conn->tc_RCV_HI, tcp_conn->tc_RCV_NXT))
        {
-#if !CRAMPED
-               printf("tc_RCV_HI (%d) < tc_RCV_NXT (%d)\n", 
-                       tcp_conn->tc_RCV_HI, tcp_conn->tc_RCV_NXT);
-#endif
+               printf("tc_RCV_HI (0x%lx) < tc_RCV_NXT (0x%lx)\n", 
+                       (unsigned long)tcp_conn->tc_RCV_HI,
+                       (unsigned long)tcp_conn->tc_RCV_NXT);
                allright= FALSE;
        }
 
@@ -380,71 +415,64 @@ tcp_conn_t *tcp_conn;
        size= hi_queue-lo_queue;
        if (size<0)
        {
-#if !CRAMPED
                printf("snd hi_queue-lo_queue < 0\n");
-               printf("SND_ISS= 0x%x, SND_UNA= 0x%x, SND_NXT= 0x%x\n",
-                       tcp_conn->tc_ISS, tcp_conn->tc_SND_UNA,
-                       tcp_conn->tc_SND_NXT);
-               printf("hi_queue= 0x%x, lo_queue= 0x%x, size= %d\n",
-                       hi_queue, lo_queue, size);
-#endif
+               printf("SND_ISS= 0x%lx, SND_UNA= 0x%lx, SND_NXT= 0x%lx\n",
+                       (unsigned long)tcp_conn->tc_ISS,
+                       (unsigned long)tcp_conn->tc_SND_UNA,
+                       (unsigned long)tcp_conn->tc_SND_NXT);
+               printf("hi_queue= 0x%lx, lo_queue= 0x%lx, size= %d\n",
+                       (unsigned long)hi_queue, (unsigned long)lo_queue,
+                       size);
                allright= FALSE;
        }
        else if (!tcp_conn->tc_send_data)
        {
                if (size)
                {
-#if !CRAMPED
                        printf("SND_NXT-SND_UNA != 0\n");
-                       printf("SND_NXT= %d, SND_UNA= %d\n", 
-                               tcp_conn->tc_SND_NXT, tcp_conn->tc_SND_UNA);
-                       printf("lo_queue= %d, hi_queue= %d\n", 
-                               lo_queue, hi_queue);
-#endif
+                       printf("SND_NXT= 0x%lx, SND_UNA= 0x%lx\n", 
+                               (unsigned long)tcp_conn->tc_SND_NXT,
+                               (unsigned long)tcp_conn->tc_SND_UNA);
+                       printf("lo_queue= 0x%lx, hi_queue= 0x%lx\n", 
+                               (unsigned long)lo_queue,
+                               (unsigned long)hi_queue);
                        allright= FALSE;
                }
        }
        else if (size != bf_bufsize(tcp_conn->tc_send_data))
        {
-#if !CRAMPED
                printf("SND_NXT-SND_UNA != sizeof tc_send_data\n");
-               printf("SND_NXT= %d, SND_UNA= %d\n", 
-                       tcp_conn->tc_SND_NXT, tcp_conn->tc_SND_UNA);
-               printf("lo_queue= %d, lo_queue= %d\n", 
-                       lo_queue, hi_queue);
+               printf("SND_NXT= 0x%lx, SND_UNA= 0x%lx\n", 
+                       (unsigned long)tcp_conn->tc_SND_NXT,
+                       (unsigned long)tcp_conn->tc_SND_UNA);
+               printf("lo_queue= 0x%lx, lo_queue= 0x%lx\n", 
+                       (unsigned long)lo_queue,
+                       (unsigned long)hi_queue);
                printf("bf_bufsize(data)= %d\n", 
                        bf_bufsize(tcp_conn->tc_send_data));
-#endif
+               
                allright= FALSE;
        }
 
        /* checking counters */
        if (!tcp_GEmod4G(tcp_conn->tc_SND_UNA, tcp_conn->tc_ISS))
        {
-#if !CRAMPED
                printf("SND_UNA < ISS\n");
-#endif
                allright= FALSE;
        }
        if (!tcp_GEmod4G(tcp_conn->tc_SND_NXT, tcp_conn->tc_SND_UNA))
        {
-#if !CRAMPED
                printf("SND_NXT<SND_UNA\n");
-#endif
                allright= FALSE;
        }
        if (!tcp_GEmod4G(tcp_conn->tc_SND_TRM, tcp_conn->tc_SND_UNA))
        {
-#if !CRAMPED
                printf("SND_TRM<SND_UNA\n");
-#endif
                allright= FALSE;
        }
        if (!tcp_GEmod4G(tcp_conn->tc_SND_NXT, tcp_conn->tc_SND_TRM))
        {
-#if !CRAMPED
                printf("SND_NXT<SND_TRM\n");
-#endif
                allright= FALSE;
        }
 
@@ -452,7 +480,6 @@ tcp_conn_t *tcp_conn;
        return allright;
 }
 
-#if !CRAMPED
 PUBLIC void tcp_print_pack(ip_hdr, tcp_hdr)
 ip_hdr_t *ip_hdr;
 tcp_hdr_t *tcp_hdr;
@@ -480,7 +507,7 @@ tcp_hdr_t *tcp_hdr;
        if (tcp_hdr->th_flags & THF_PSH)
                printf(" <PSH>");
        if (tcp_hdr->th_flags & THF_ACK)
-               printf(" <ACK 0x%x %u>", ntohl(tcp_hdr->th_ack_nr),
+               printf(" <ACK 0x%lx %u>", ntohl(tcp_hdr->th_ack_nr),
                        ntohs(tcp_hdr->th_window));
        if (tcp_hdr->th_flags & THF_URG)
                printf(" <URG %u>", tcp_hdr->th_urgptr);
@@ -488,13 +515,12 @@ tcp_hdr_t *tcp_hdr;
        if (tcp_hdr_len != TCP_MIN_HDR_SIZE)
                printf(" <options %d>", tcp_hdr_len-TCP_MIN_HDR_SIZE);
 }
-#endif
 
-#if !CRAMPED
 PUBLIC void tcp_print_conn(tcp_conn)
 tcp_conn_t *tcp_conn;
 {
-       int iss, irs;
+       u32_t iss, irs;
+       tcp_fd_t *tcp_fd;
 
        iss= tcp_conn->tc_ISS;
        irs= tcp_conn->tc_IRS;
@@ -505,6 +531,10 @@ tcp_conn_t *tcp_conn;
                iss, tcp_conn->tc_SND_UNA-iss, tcp_conn->tc_SND_UNA, 
                tcp_conn->tc_SND_TRM-iss, tcp_conn->tc_SND_TRM,
                tcp_conn->tc_SND_NXT-iss, tcp_conn->tc_SND_NXT);
+       printf(
+       " UP +0x%lx(0x%lx) PSH +0x%lx(0x%lx) ",
+               tcp_conn->tc_SND_UP-iss, tcp_conn->tc_SND_UP,
+               tcp_conn->tc_SND_PSH-iss, tcp_conn->tc_SND_PSH);
        printf(" snd_cwnd +0x%lx(0x%lx)",
                tcp_conn->tc_snd_cwnd-tcp_conn->tc_SND_UNA,
                tcp_conn->tc_snd_cwnd);
@@ -516,7 +546,7 @@ tcp_conn_t *tcp_conn;
                printf("+0x%lx(0x%lx)", tcp_conn->tc_transmit_seq-iss,
                        tcp_conn->tc_transmit_seq);
        }
-       printf(" IRS 0x%lx LO +0x%x(0x%lx) NXT +0x%x(0x%lx) HI +0x%x(0x%lx)",
+       printf(" IRS 0x%lx LO +0x%lx(0x%lx) NXT +0x%lx(0x%lx) HI +0x%lx(0x%lx)",
                irs, tcp_conn->tc_RCV_LO-irs, tcp_conn->tc_RCV_LO,
                tcp_conn->tc_RCV_NXT-irs, tcp_conn->tc_RCV_NXT,
                tcp_conn->tc_RCV_HI-irs, tcp_conn->tc_RCV_HI);
@@ -532,9 +562,31 @@ tcp_conn_t *tcp_conn;
                printf(" TCF_SEND_ACK");
        if (tcp_conn->tc_flags & TCF_FIN_SENT)
                printf(" TCF_FIN_SENT");
+       if (tcp_conn->tc_flags & TCF_BSD_URG)
+               printf(" TCF_BSD_URG");
+       if (tcp_conn->tc_flags & TCF_NO_PUSH)
+               printf(" TCF_NO_PUSH");
+       if (tcp_conn->tc_flags & TCF_PUSH_NOW)
+               printf(" TCF_PUSH_NOW");
+       if (tcp_conn->tc_flags & TCF_PMTU)
+               printf(" TCF_PMTU");
+       printf("\n");
+       writeIpAddr(tcp_conn->tc_locaddr);
+       printf(", %u -> ", ntohs(tcp_conn->tc_locport));
+       writeIpAddr(tcp_conn->tc_remaddr);
+       printf(", %u\n", ntohs(tcp_conn->tc_remport));
+       tcp_fd= tcp_conn->tc_fd;
+       if (!tcp_fd)
+               printf("tc_fd NULL");
+       else
+       {
+               printf("tc_fd #%d: flags 0x%x, r %u@%u, w %u@%u",
+                       tcp_fd-tcp_fd_table, tcp_fd->tf_flags,
+                       tcp_fd->tf_read_count, tcp_fd->tf_read_offset,
+                       tcp_fd->tf_write_count, tcp_fd->tf_write_offset);
+       }
 }
-#endif
 
 /*
- * $PchId: tcp_lib.c,v 1.7 1995/11/21 06:45:27 philip Exp $
+ * $PchId: tcp_lib.c,v 1.14 2005/01/31 21:41:38 philip Exp $
  */
index ed73fb3af76cce714887c46e71a4d5e97e16cc81..0a510bee4a5352a5a554c9566b76da7f3f90b6e3 100644 (file)
@@ -9,6 +9,7 @@ Copyright 1995 Philip Homburg
 #include "clock.h"
 #include "event.h"
 #include "type.h"
+#include "sr.h"
 
 #include "io.h"
 #include "tcp_int.h"
@@ -34,9 +35,10 @@ size_t data_len;
        tcp_fd_t *connuser;
        int tcp_hdr_flags;
        int ip_hdr_len, tcp_hdr_len;
-       u32_t seg_ack, seg_seq, rcv_hi;
-       u16_t seg_wnd;
-       int acceptable_ACK, segm_acceptable;
+       u32_t seg_ack, seg_seq, rcv_hi, snd_una, snd_nxt;
+       u16_t seg_wnd, mtu;
+       size_t mss;
+       int acceptable_ACK, segm_acceptable, send_rst;
 
        ip_hdr_len= (ip_hdr->ih_vers_ihl & IH_IHL_MASK) << 2;
        tcp_hdr_len= (tcp_hdr->th_data_off & TH_DO_MASK) >> 2;
@@ -46,6 +48,11 @@ size_t data_len;
        seg_seq= ntohl(tcp_hdr->th_seq_nr);
        seg_wnd= ntohs(tcp_hdr->th_window);
 
+#if 0
+ { where(); tcp_print_conn(tcp_conn); printf("\n");
+       tcp_print_pack(ip_hdr, tcp_hdr); printf("\n"); }
+#endif
+
        switch (tcp_conn->tc_state)
        {
        case TCS_CLOSED:
@@ -105,7 +112,24 @@ LISTEN:
                if (tcp_hdr_flags & THF_SYN)
                {
                        tcp_extract_ipopt(tcp_conn, ip_hdr);
-                       tcp_extract_tcpopt(tcp_conn, tcp_hdr);
+                       tcp_extract_tcpopt(tcp_conn, tcp_hdr, &mss);
+                       mtu= mss+IP_TCP_MIN_HDR_SIZE;
+                       if (mtu < IP_MIN_MTU)
+                       {
+                               /* No or unrealistic mss, use default MTU */
+                               mtu= IP_DEF_MTU;
+                       }
+                       if (mtu < tcp_conn->tc_max_mtu)
+                       {
+                               tcp_conn->tc_max_mtu= mtu;
+                               tcp_conn->tc_mtu= mtu;
+                               DBLOCK(1, printf(
+                                       "tcp[%d]: conn[%d]: mtu = %d\n",
+                                       tcp_conn->tc_port-tcp_port_table,
+                                       tcp_conn-tcp_conn_table, 
+                                       mtu););
+                       }
+
                        tcp_conn->tc_RCV_LO= seg_seq+1;
                        tcp_conn->tc_RCV_NXT= seg_seq+1;
                        tcp_conn->tc_RCV_HI= tcp_conn->tc_RCV_LO+
@@ -194,8 +218,12 @@ SYN-SENT:
                                        break;
                                else
                                {
+                                       /* HACK: force sending a RST,
+                                        * normally, RSTs are not send
+                                        * if the segment is an ACK.
+                                        */
                                        create_RST (tcp_conn, ip_hdr,
-                                               tcp_hdr, data_len);
+                                               tcp_hdr, data_len+1);
                                        tcp_conn_write(tcp_conn, 1);
                                        break;
                                }
@@ -219,6 +247,24 @@ SYN-SENT:
                }
                if (tcp_hdr_flags & THF_SYN)
                {
+                       tcp_extract_ipopt(tcp_conn, ip_hdr);
+                       tcp_extract_tcpopt(tcp_conn, tcp_hdr, &mss);
+                       mtu= mss+IP_TCP_MIN_HDR_SIZE;
+                       if (mtu < IP_MIN_MTU)
+                       {
+                               /* No or unrealistic mss, use default MTU */
+                               mtu= IP_DEF_MTU;
+                       }
+                       if (mtu < tcp_conn->tc_max_mtu)
+                       {
+                               tcp_conn->tc_max_mtu= mtu;
+                               tcp_conn->tc_mtu= mtu;
+                               DBLOCK(1, printf(
+                                       "tcp[%d]: conn[%d]: mtu = %d\n",
+                                       tcp_conn->tc_port-tcp_port_table,
+                                       tcp_conn-tcp_conn_table, 
+                                       mtu););
+                       }
                        tcp_conn->tc_RCV_LO= seg_seq+1;
                        tcp_conn->tc_RCV_NXT= seg_seq+1;
                        tcp_conn->tc_RCV_HI= tcp_conn->tc_RCV_LO +
@@ -262,6 +308,7 @@ SYN-SENT:
 /*
 SYN-RECEIVED:
        test if segment is acceptable:
+
        Segment Receive Test
        Length  Window
        0       0       SEG.SEQ == RCV.NXT
@@ -271,10 +318,16 @@ SYN-RECEIVED:
                        || (RCV.NXT <= SEG.SEQ+SEG.LEN-1 &&
                        SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND)
        for urgent data: use RCV.WND+1 for RCV.WND
+
+       Special: Send RST if SEG.SEQ < IRS or SEG.SEQ > RCV.NXT+64K (and
+                the packet is not a RST packet itself).
 */
                rcv_hi= tcp_conn->tc_RCV_HI;
                if (tcp_hdr_flags & THF_URG)
                        rcv_hi++;
+               send_rst= tcp_Lmod4G(seg_seq, tcp_conn->tc_IRS) ||
+                       tcp_Gmod4G(seg_seq, tcp_conn->tc_RCV_NXT+0x10000);
+
                if (!data_len)
                {
                        if (rcv_hi == tcp_conn->tc_RCV_NXT)
@@ -316,7 +369,15 @@ SYN-RECEIVED:
 */
                if (!segm_acceptable)
                {
-                       if (!(tcp_hdr_flags & THF_RST))
+                       if (tcp_hdr_flags & THF_RST)
+                               ; /* do nothing */
+                       else if (send_rst)
+                       {
+                               create_RST(tcp_conn, ip_hdr, tcp_hdr,
+                                       data_len);
+                               tcp_conn_write(tcp_conn, 1);
+                       }
+                       else
                        {
                                tcp_conn->tc_flags |= TCF_SEND_ACK;
                                tcp_conn_write(tcp_conn, 1);
@@ -343,6 +404,10 @@ SYN-RECEIVED:
                                tcp_conn->tc_fd= NULL;
 
                                tcp_close_connection (tcp_conn, ECONNREFUSED);
+
+                               /* Pick a new ISS next time */
+                               tcp_conn->tc_ISS= 0;
+
                                if (connuser)
                                        (void)tcp_su4listen(connuser);
                                break;
@@ -510,6 +575,12 @@ TIME-WAIT:
                                        {
                                                tcp_fd_read(tcp_conn, 1);
                                        }
+                                       if (tcp_conn->tc_fd &&
+                                               (tcp_conn->tc_fd->tf_flags &
+                                               TFF_SEL_READ))
+                                       {
+                                               tcp_rsel_read(tcp_conn);
+                                       }
                                }
                        }
                        break;
@@ -592,49 +663,61 @@ TIME-WAIT:
                if (tcp_conn->tc_state != TCS_CLOSING)
                        tcp_conn->tc_stt= 0;
 
-               if (seg_ack == tcp_conn->tc_SND_UNA)
+               snd_una= tcp_conn->tc_SND_UNA;
+               snd_nxt= tcp_conn->tc_SND_NXT;
+               if (seg_ack == snd_una)
                {
+                       
+                       if (tcp_Gmod4G(snd_nxt, snd_una))
+                       {
+                               /* Duplicate ACK */
+                               if (++tcp_conn->tc_snd_dack ==
+                                       TCP_DACK_RETRANS)
+                               {
+                                       tcp_fast_retrans(tcp_conn);
+                               }
+                       }
+
                        /* This ACK doesn't acknowledge any new data, this
                         * is a likely situation if we are only receiving
                         * data. We only update the window if we are
                         * actually sending or if we currently have a
                         * zero window.
                         */
-                       if (tcp_conn->tc_snd_cwnd == tcp_conn->tc_SND_UNA &&
+                       if (tcp_conn->tc_snd_cwnd == snd_una &&
                                seg_wnd != 0)
                        {
                                DBLOCK(2, printf("zero window opened\n"));
                                /* The other side opened up its receive
                                 * window. */
-                               if (seg_wnd > 2*tcp_conn->tc_mss)
-                                       seg_wnd= 2*tcp_conn->tc_mss;
-                               tcp_conn->tc_snd_cwnd=
-                                       tcp_conn->tc_SND_UNA+seg_wnd;
+                               mss= tcp_conn->tc_mtu-IP_TCP_MIN_HDR_SIZE;
+                               if (seg_wnd > 2*mss)
+                                       seg_wnd= 2*mss;
+                               tcp_conn->tc_snd_cwnd= snd_una+seg_wnd;
                                tcp_conn_write(tcp_conn, 1);
                        }
                        if (seg_wnd == 0)
                        {
                                tcp_conn->tc_snd_cwnd= tcp_conn->tc_SND_TRM=
-                                       tcp_conn->tc_SND_UNA;
+                                       snd_una;
                        }
                }
-               else if (tcp_Lmod4G(tcp_conn->tc_SND_UNA, seg_ack)
-                       && tcp_LEmod4G(seg_ack, tcp_conn->
-                       tc_SND_NXT))
+               else if (tcp_Lmod4G(snd_una, seg_ack) &&
+                       tcp_LEmod4G(seg_ack, snd_nxt))
                {
                        tcp_release_retrans(tcp_conn, seg_ack, seg_wnd);
                        if (tcp_conn->tc_state == TCS_CLOSED)
                                break;
                }
                else if (tcp_Gmod4G(seg_ack,
-                       tcp_conn->tc_SND_NXT))
+                       snd_nxt))
                {
                        tcp_conn->tc_flags |= TCF_SEND_ACK;
                        tcp_conn_write(tcp_conn, 1);
                        DBLOCK(1, printf(
                        "got an ack of something I haven't send\n");
                                printf( "seg_ack= %lu, SND_NXT= %lu\n",
-                               seg_ack, tcp_conn->tc_SND_NXT));
+                               seg_ack, snd_nxt));
                        break;
                }
 
@@ -642,7 +725,7 @@ TIME-WAIT:
        process data...
 */
                tcp_extract_ipopt(tcp_conn, ip_hdr);
-               tcp_extract_tcpopt(tcp_conn, tcp_hdr);
+               tcp_extract_tcpopt(tcp_conn, tcp_hdr, &mss);
 
                if (data_len)
                {
@@ -695,13 +778,16 @@ TIME-WAIT:
                        {
                                tcp_fd_read(tcp_conn, 1);
                        }
+                       if (tcp_conn->tc_fd &&
+                               (tcp_conn->tc_fd->tf_flags & TFF_SEL_READ))
+                       {
+                               tcp_rsel_read(tcp_conn);
+                       }
                }
                break;
        default:
-#if !CRAMPED
                printf("tcp_frag2conn: unknown state ");
                tcp_print_state(tcp_conn);
-#endif
                break;
        }
        if (tcp_data != NULL)
@@ -717,7 +803,7 @@ acc_t *tcp_data;
 int data_len;
 {
        u32_t lo_seq, hi_seq, urg_seq, seq_nr, adv_seq, nxt;
-       u16_t urgptr;
+       u32_t urgptr;
        int tcp_hdr_flags;
        unsigned int offset;
        acc_t *tmp_data, *rcvd_data, *adv_data;
@@ -736,11 +822,43 @@ int data_len;
        lo_seq= seq_nr;
        tcp_hdr_flags= tcp_hdr->th_flags & TH_FLAGS_MASK;
 
+       if (tcp_Lmod4G(lo_seq, tcp_conn->tc_RCV_NXT))
+       {
+               DBLOCK(0x10,
+                       printf("segment is a retransmission\n"));
+               offset= tcp_conn->tc_RCV_NXT-lo_seq;
+               tcp_data= bf_delhead(tcp_data, offset);
+               lo_seq += offset;
+               data_len -= offset;
+               if (tcp_hdr_flags & THF_URG)
+               {
+                       printf("process_data: updating urgent pointer\n");
+                       if (urgptr >= offset)
+                               urgptr -= offset;
+                       else
+                               tcp_hdr_flags &= ~THF_URG;
+               }
+       }
+       assert (lo_seq == tcp_conn->tc_RCV_NXT);
+
+       if (tcp_hdr_flags & THF_URG)
+       {
+               if (!(tcp_conn->tc_flags & TCF_BSD_URG))
+               {
+                       /* Update urgent pointer to point past the urgent
+                        * data
+                        */
+                       urgptr++;
+               }
+               if (urgptr == 0)
+                       tcp_hdr_flags &= ~THF_URG;
+       }
+
        if (tcp_hdr_flags & THF_URG)
        {
                if (urgptr > data_len)
                        urgptr= data_len;
-               urg_seq= lo_seq+ urgptr;
+               urg_seq= lo_seq+urgptr;
 
                if (tcp_GEmod4G(urg_seq, tcp_conn->tc_RCV_HI))
                        urg_seq= tcp_conn->tc_RCV_HI;
@@ -764,11 +882,18 @@ int data_len;
                                {
                                        tcp_fd_read(tcp_conn, 1);
                                }
+                               if (tcp_conn->tc_fd &&
+                                       (tcp_conn->tc_fd->tf_flags &
+                                       TFF_SEL_READ))
+                               {
+                                       tcp_rsel_read(tcp_conn);
+                               }
                                return;
                        }
                }
                if (tcp_Gmod4G(urg_seq, tcp_conn->tc_RCV_UP))
                        tcp_conn->tc_RCV_UP= urg_seq;
+#if 0
                if (urgptr < data_len)
                {
                        data_len= urgptr;
@@ -777,6 +902,7 @@ int data_len;
                        tcp_data= tmp_data;
                        tcp_hdr_flags &= ~THF_FIN;
                }
+#endif
                tcp_conn->tc_flags |= TCF_RCV_PUSH;
        }
        else
@@ -789,17 +915,6 @@ int data_len;
                tcp_conn->tc_flags |= TCF_RCV_PUSH;
        }
 
-       if (tcp_Lmod4G(lo_seq, tcp_conn->tc_RCV_NXT))
-       {
-               DBLOCK(0x10,
-                       printf("segment is a retransmission\n"));
-               offset= tcp_conn->tc_RCV_NXT-lo_seq;
-               tcp_data= bf_delhead(tcp_data, offset);
-               lo_seq += offset;
-               data_len -= offset;
-       }
-       assert (lo_seq == tcp_conn->tc_RCV_NXT);
-
        hi_seq= lo_seq+data_len;
        if (tcp_Gmod4G(hi_seq, tcp_conn->tc_RCV_HI))
        {
@@ -828,6 +943,8 @@ int data_len;
 
        if (tcp_conn->tc_fd && (tcp_conn->tc_fd->tf_flags & TFF_READ_IP))
                tcp_fd_read(tcp_conn, 1);
+       if (tcp_conn->tc_fd && (tcp_conn->tc_fd->tf_flags & TFF_SEL_READ))
+               tcp_rsel_read(tcp_conn);
 
        DIFBLOCK(2, (tcp_conn->tc_RCV_NXT == tcp_conn->tc_RCV_HI),
                printf("conn[[%d] full receive buffer\n", 
@@ -837,10 +954,8 @@ int data_len;
                return;
        if (tcp_hdr_flags & THF_FIN)
        {
-#if !CRAMPED
                printf("conn[%d]: advanced data after FIN\n",
                        tcp_conn-tcp_conn_table);
-#endif
                tcp_data= tcp_conn->tc_adv_data;
                tcp_conn->tc_adv_data= NULL;
                bf_afree(tcp_data);
@@ -884,6 +999,8 @@ int data_len;
 
        if (tcp_conn->tc_fd && (tcp_conn->tc_fd->tf_flags & TFF_READ_IP))
                tcp_fd_read(tcp_conn, 1);
+       if (tcp_conn->tc_fd && (tcp_conn->tc_fd->tf_flags & TFF_SEL_READ))
+               tcp_rsel_read(tcp_conn);
 
        adv_data= tcp_conn->tc_adv_data;
        if (adv_data != NULL)
@@ -932,6 +1049,11 @@ int data_len;
                {
                        tcp_fd_read(tcp_conn, 1);
                }
+               if (tcp_conn->tc_fd &&
+                       (tcp_conn->tc_fd->tf_flags & TFF_SEL_READ))
+               {
+                       tcp_rsel_read(tcp_conn);
+               }
        }
 }
 
@@ -956,6 +1078,7 @@ int data_len;
 
        if (tcp_hdr->th_flags & THF_URG)
                return; /* Urgent data is to complicated */
+
        if (tcp_hdr->th_flags & THF_PSH)
                tcp_conn->tc_flags |= TCF_RCV_PUSH;
        seq= ntohl(tcp_hdr->th_seq_nr);
@@ -1003,13 +1126,10 @@ tcp_hdr_t *tcp_hdr;
 int data_len;
 {
        acc_t *tmp_ipopt, *tmp_tcpopt, *tcp_pack;
-       ip_hdropt_t ip_hdropt;
-       tcp_hdropt_t tcp_hdropt;
        acc_t *RST_acc;
        ip_hdr_t *RST_ip_hdr;
        tcp_hdr_t *RST_tcp_hdr;
-       char *ptr2RSThdr;
-       size_t pack_size, ip_hdr_len;
+       size_t pack_size, ip_hdr_len, mss;
 
        DBLOCK(0x10, printf("in create_RST, bad pack is:\n"); 
                tcp_print_pack(ip_hdr, tcp_hdr); tcp_print_state(tcp_conn);
@@ -1043,7 +1163,7 @@ int data_len;
                tmp_tcpopt->acc_linkC++;
 
        tcp_extract_ipopt (tcp_conn, ip_hdr);
-       tcp_extract_tcpopt (tcp_conn, tcp_hdr);
+       tcp_extract_tcpopt (tcp_conn, tcp_hdr, &mss);
 
        RST_acc= tcp_make_header (tcp_conn, &RST_ip_hdr, &RST_tcp_hdr,
                (acc_t *)0);
@@ -1107,6 +1227,7 @@ int enq;                                  /* Enqueue writes. */
        acc_t *data;
        int fin_recv, urg, push, result;
        i32_t old_window, new_window;
+       u16_t mss;
 
        assert(tcp_conn->tc_busy);
 
@@ -1130,7 +1251,13 @@ int enq;                                 /* Enqueue writes. */
        if (fin_recv)
                data_size--;
        if (urg)
+       {
+#if DEBUG
+               printf("tcp_fd_read: RCV_UP = 0x%x, RCV_LO = 0x%x\n",
+                       tcp_conn->tc_RCV_UP, tcp_conn->tc_RCV_LO);
+#endif
                read_size= tcp_conn->tc_RCV_UP-tcp_conn->tc_RCV_LO;
+       }
        else
                read_size= data_size;
 
@@ -1214,16 +1341,34 @@ int enq;                                        /* Enqueue writes. */
                tcp_conn->tc_RCV_LO += read_size;
                data_size -= read_size;
        }
-       if (tcp_conn->tc_RCV_HI-tcp_conn->tc_RCV_LO <= (tcp_conn->
-               tc_rcv_wnd-tcp_conn->tc_mss))
+
+       /* Update IRS and often RCV_UP every 0.5GB */
+       if (tcp_conn->tc_RCV_LO - tcp_conn->tc_IRS > 0x40000000)
+       {
+               tcp_conn->tc_IRS += 0x20000000;
+               DBLOCK(1, printf("tcp_fd_read: updating IRS to 0x%lx\n",
+                       (unsigned long)tcp_conn->tc_IRS););
+               if (tcp_Lmod4G(tcp_conn->tc_RCV_UP, tcp_conn->tc_IRS))
+               {
+                       tcp_conn->tc_RCV_UP= tcp_conn->tc_IRS;
+                       DBLOCK(1, printf(
+                               "tcp_fd_read: updating RCV_UP to 0x%lx\n",
+                               (unsigned long)tcp_conn->tc_RCV_UP););
+               }
+               DBLOCK(1, printf("tcp_fd_read: RCP_LO = 0x%lx\n",
+                       (unsigned long)tcp_conn->tc_RCV_LO););
+       }
+
+       mss= tcp_conn->tc_mtu-IP_TCP_MIN_HDR_SIZE;
+       if (tcp_conn->tc_RCV_HI-tcp_conn->tc_RCV_LO <=
+               tcp_conn->tc_rcv_wnd-mss)
        {
                old_window= tcp_conn->tc_RCV_HI-tcp_conn->tc_RCV_NXT;
                tcp_conn->tc_RCV_HI= tcp_conn->tc_RCV_LO + 
                        tcp_conn->tc_rcv_wnd;
                new_window= tcp_conn->tc_RCV_HI-tcp_conn->tc_RCV_NXT;
                assert(old_window >=0 && new_window >= old_window);
-               if (old_window < tcp_conn->tc_mss &&
-                       new_window >= tcp_conn->tc_mss)
+               if (old_window < mss && new_window >= mss)
                {
                        tcp_conn->tc_flags |= TCF_SEND_ACK;
                        DBLOCK(2, printf("opening window\n"));
@@ -1236,18 +1381,67 @@ int enq;                                        /* Enqueue writes. */
                /* Out of data, clear PUSH flag and reply to a read. */
                tcp_conn->tc_flags &= ~TCF_RCV_PUSH;
        }
-       if (fin_recv || urg || !tcp_fd->tf_read_count)
+       if (fin_recv || urg || tcp_fd->tf_read_offset ||
+               !tcp_fd->tf_read_count)
        {
                tcp_reply_read (tcp_fd, tcp_fd->tf_read_offset);
                return;
        }
-       if (tcp_fd->tf_read_offset)
+}
+
+PUBLIC unsigned
+tcp_sel_read(tcp_conn)
+tcp_conn_t *tcp_conn;
+{
+       tcp_fd_t *tcp_fd;
+       size_t data_size;
+       int fin_recv, urg, push;
+
+       tcp_fd= tcp_conn->tc_fd;
+
+       if (tcp_conn->tc_state == TCS_CLOSED)
+               return 1;
+
+       fin_recv= (tcp_conn->tc_flags & TCF_FIN_RECV);
+       if (fin_recv)
+               return 1;
+
+       data_size= tcp_conn->tc_RCV_NXT-tcp_conn->tc_RCV_LO;
+       if (data_size == 0)
        {
-               tcp_reply_read (tcp_fd, tcp_fd->tf_read_offset);
-               return;
+               /* No data, and no end of file. */
+               return 0;
        }
+
+       urg= tcp_Gmod4G(tcp_conn->tc_RCV_UP, tcp_conn->tc_RCV_LO);
+       push= (tcp_conn->tc_flags & TCF_RCV_PUSH);
+
+       if (!push && !urg && data_size < TCP_MIN_RCV_WND_SIZE)
+       {
+               /* Defer until later. */
+               return 0;
+       }
+
+       return 1;
+}
+
+PUBLIC void
+tcp_rsel_read(tcp_conn)
+tcp_conn_t *tcp_conn;
+{
+       tcp_fd_t *tcp_fd;
+
+       if (tcp_sel_read(tcp_conn) == 0)
+               return;
+
+       tcp_fd= tcp_conn->tc_fd;
+       tcp_fd->tf_flags &= ~TFF_SEL_READ;
+       if (tcp_fd->tf_select_res)
+               tcp_fd->tf_select_res(tcp_fd->tf_srfd, SR_SELECT_READ);
+       else
+               printf("tcp_rsel_read: no select_res\n");
 }
 
 /*
- * $PchId: tcp_recv.c,v 1.13.2.1 2000/05/02 18:53:06 philip Exp $
+ * $PchId: tcp_recv.c,v 1.30 2005/06/28 14:21:35 philip Exp $
  */
index 45803381e110719337fd0512d20a60259cf2f7f8..dca1f9ad0a61369a841ef8bacc728c0740beda37 100644 (file)
@@ -9,6 +9,7 @@ Copyright 1995 Philip Homburg
 #include "clock.h"
 #include "event.h"
 #include "type.h"
+#include "sr.h"
 
 #include "assert.h"
 #include "io.h"
@@ -35,7 +36,13 @@ int enq;                             /* Writes need to be enqueued. */
        if (tcp_conn->tc_flags & TCF_MORE2WRITE)
                return;
 
-       /* XXX - do we really have something to send here? */
+       /* Do we really have something to send here? */
+       if (tcp_conn->tc_SND_UNA == tcp_conn->tc_SND_NXT &&
+               !(tcp_conn->tc_flags & TCF_SEND_ACK) &&
+               !tcp_conn->tc_frag2send)
+       {
+               return;
+       }
 
        tcp_conn->tc_flags |= TCF_MORE2WRITE;
        tcp_conn->tc_send_link= NULL;
@@ -109,12 +116,17 @@ tcp_port_t *tcp_port;
                        {
                                if (r == NW_WOULDBLOCK)
                                        break;
+                               if (r == EPACKSIZE)
+                               {
+                                       tcp_mtu_exceeded(tcp_conn);
+                                       continue;
+                               }
                                if (r == EDSTNOTRCH)
                                {
                                        tcp_notreach(tcp_conn);
                                        continue;
                                }
-                               else if (r == EBADDEST)
+                               if (r == EBADDEST)
                                        continue;
                        }
                        assert(r == NW_OK ||
@@ -151,21 +163,23 @@ tcp_conn_t *tcp_conn;
        acc_t *pack2write, *tmp_pack, *tcp_pack;
        tcp_hdr_t *tcp_hdr;
        ip_hdr_t *ip_hdr;
-       int tot_hdr_size, ip_hdr_len;
+       int tot_hdr_size, ip_hdr_len, no_push, head, more2write;
        u32_t seg_seq, seg_lo_data, queue_lo_data, seg_hi, seg_hi_data;
-       u16_t seg_up;
+       u16_t seg_up, mss;
        u8_t seg_flags;
-       time_t new_dis;
        size_t pack_size;
-       time_t curr_time;
+       clock_t curr_time, new_dis;
        u8_t *optptr;
 
+       mss= tcp_conn->tc_mtu-IP_TCP_MIN_HDR_SIZE;
+
        assert(tcp_conn->tc_busy);
        curr_time= get_time();
        switch (tcp_conn->tc_state)
        {
        case TCS_CLOSED:
-               return 0;
+       case TCS_LISTEN:
+               return NULL;
        case TCS_SYN_RECEIVED:
        case TCS_SYN_SENT:
 
@@ -177,14 +191,19 @@ tcp_conn_t *tcp_conn;
 
                tcp_conn->tc_flags &= ~TCF_SEND_ACK;
 
+               /* Advertise a mss based on the port mtu. The current mtu may
+                * be lower if the other side sends a smaller mss.
+                */
+               mss= tcp_conn->tc_port->tp_mtu-IP_TCP_MIN_HDR_SIZE;
+
                /* Include a max segment size option. */
                assert(tcp_conn->tc_tcpopt == NULL);
                tcp_conn->tc_tcpopt= bf_memreq(4);
                optptr= (u8_t *)ptr2acc_data(tcp_conn->tc_tcpopt);
                optptr[0]= TCP_OPT_MSS;
                optptr[1]= 4;
-               optptr[2]= tcp_conn->tc_mss >> 8;
-               optptr[3]= tcp_conn->tc_mss & 0xFF;
+               optptr[2]= mss >> 8;
+               optptr[3]= mss & 0xFF;
 
                pack2write= tcp_make_header(tcp_conn, &ip_hdr, &tcp_hdr, 
                        (acc_t *)0);
@@ -216,7 +235,7 @@ tcp_conn_t *tcp_conn;
                tcp_hdr->th_seq_nr= htonl(seg_seq);
                tcp_hdr->th_ack_nr= htonl(tcp_conn->tc_RCV_NXT);
                tcp_hdr->th_flags= seg_flags;
-               tcp_hdr->th_window= htons(tcp_conn->tc_mss);
+               tcp_hdr->th_window= htons(mss);
                        /* Initially we allow one segment */
 
                ip_hdr->ih_length= htons(tot_hdr_size);
@@ -293,15 +312,71 @@ tcp_conn_t *tcp_conn;
                        }
 
                        tot_hdr_size= bf_bufsize(pack2write);
-                       if (seg_hi_data - seg_lo_data > tcp_conn->tc_mss -
-                               tot_hdr_size)
+
+                       no_push= (tcp_LEmod4G(tcp_conn->tc_SND_PSH, seg_seq));
+                       head= (seg_seq == tcp_conn->tc_SND_UNA);
+                       if (no_push)
+                       {
+                               /* Shutdown sets SND_PSH */
+                               seg_flags &= ~THF_FIN;
+                               if (seg_hi_data-seg_lo_data <= 1)
+                               {
+                                       /* Allways keep at least one byte
+                                        * for a future push.
+                                        */
+                                       DBLOCK(0x20,
+                                           printf("no data: no push\n"));
+                                       if (head)
+                                       {
+                                               DBLOCK(0x1, printf(
+                                       "no data: setting TCF_NO_PUSH\n"));
+                                               tcp_conn->tc_flags |=
+                                                       TCF_NO_PUSH;
+                                       }
+                                       goto after_data;
+                               }
+                               seg_hi_data--;
+                       }
+
+                       if (tot_hdr_size != IP_TCP_MIN_HDR_SIZE)
+                       {
+                               printf(
+                               "tcp_write`make_pack: tot_hdr_size = %d\n",
+                                       tot_hdr_size);
+                               mss= tcp_conn->tc_mtu-tot_hdr_size;
+                       }
+                       if (seg_hi_data - seg_lo_data > mss)
                        {
-                               seg_hi_data= seg_lo_data + tcp_conn->tc_mss -
-                                       tot_hdr_size;
+                               /* Truncate to at most one segment */
+                               seg_hi_data= seg_lo_data + mss;
                                seg_hi= seg_hi_data;
                                seg_flags &= ~THF_FIN;
                        }
 
+                       if (no_push &&
+                               seg_hi_data-seg_lo_data != mss)
+                       {
+                               DBLOCK(0x20, printf(
+                               "no data: no push for partial segment\n"));
+                               more2write= (tcp_conn->tc_fd &&
+                                       (tcp_conn->tc_fd->tf_flags &
+                                       TFF_WRITE_IP));
+                               DIFBLOCK(2, more2write, 
+                                       printf(
+                       "tcp_send`make_pack: more2write -> !TCF_NO_PUSH\n");
+                               );
+                               if (head && !more2write)
+                               {
+                                       DBLOCK(0x1, printf(
+                               "partial segment: setting TCF_NO_PUSH\n"));
+                                       tcp_conn->tc_flags |= TCF_NO_PUSH;
+                                       tcp_print_conn(tcp_conn);
+                                       printf("\n");
+                               }
+                               goto after_data;
+                       }
+
+
                        if (tcp_Gmod4G(seg_hi, tcp_conn->tc_snd_cwnd))
                        {
                                seg_hi_data= tcp_conn->tc_snd_cwnd;
@@ -309,25 +384,34 @@ tcp_conn_t *tcp_conn;
                                seg_flags &= ~THF_FIN;
                        }
 
-                       if (seg_hi-seg_seq == 0)
+                       if (!head &&
+                               seg_hi_data-seg_lo_data < mss)
                        {
+                               if (tcp_conn->tc_flags & TCF_PUSH_NOW)
+                               {
+                                       DBLOCK(0x20,
+                                       printf("push: no Nagle\n"));
+                               }
+                               else
+                               {
                                DBLOCK(0x20,
-                               printf("no data: no data available\n"));
+                                       printf("no data: partial packet\n"));
+                               seg_flags &= ~THF_FIN;
                                goto after_data;
+                               }
                        }
 
-                       if (seg_seq != tcp_conn->tc_SND_UNA &&
-                               seg_hi_data-seg_lo_data+tot_hdr_size < 
-                               tcp_conn->tc_mss)
+                       if (seg_hi-seg_seq == 0)
                        {
                                DBLOCK(0x20,
-                                       printf("no data: partial packet\n"));
-                               seg_flags &= ~THF_FIN;
+                               printf("no data: no data available\n"));
                                goto after_data;
                        }
 
                        if (tcp_GEmod4G(tcp_conn->tc_SND_UP, seg_lo_data))
                        {
+                               extern int killer_inet;
+
                                if (tcp_GEmod4G(tcp_conn->tc_SND_UP,
                                        seg_hi_data))
                                {
@@ -338,7 +422,8 @@ tcp_conn_t *tcp_conn;
                                        seg_up= tcp_conn->tc_SND_UP-seg_seq;
                                }
                                seg_flags |= THF_URG;
-                               if ((tcp_conn->tc_flags & TCF_BSD_URG) &&
+                               if (!killer_inet &&
+                                       (tcp_conn->tc_flags & TCF_BSD_URG) &&
                                        seg_up == 0)
                                {
                                        /* A zero urgent pointer doesn't mean
@@ -432,11 +517,9 @@ after_data:
                        tcp_conn->tc_senddis= new_dis;
 
                return pack2write;
-#if !CRAMPED
        default:
                DBLOCK(1, tcp_print_conn(tcp_conn); printf("\n"));
                ip_panic(( "Illegal state" ));
-#endif
        }
        assert(0);
        return NULL;
@@ -451,17 +534,24 @@ tcp_conn_t *tcp_conn;
 u32_t seg_ack;
 u16_t new_win;
 {
+       tcp_fd_t *tcp_fd;
        size_t size, offset;
        acc_t *pack;
-       time_t retrans_time, curr_time, rtt;
+       clock_t retrans_time, curr_time, rtt, artt, drtt, srtt;
        u32_t queue_lo, queue_hi;
        u16_t mss, cthresh;
        unsigned window;
 
+       DBLOCK(0x10, printf("tcp_release_retrans, conn[%d]: ack %lu, win %u\n",
+               tcp_conn-tcp_conn_table, (unsigned long)seg_ack, new_win););
+
        assert(tcp_conn->tc_busy);
        assert (tcp_GEmod4G(seg_ack, tcp_conn->tc_SND_UNA));
        assert (tcp_LEmod4G(seg_ack, tcp_conn->tc_SND_NXT));
 
+       tcp_conn->tc_snd_dack= 0;
+       mss= tcp_conn->tc_mtu-IP_TCP_MIN_HDR_SIZE;
+
        curr_time= get_time();
        if (tcp_conn->tc_rt_seq != 0 && 
                tcp_Gmod4G(seg_ack, tcp_conn->tc_rt_seq))
@@ -470,11 +560,6 @@ u16_t new_win;
                retrans_time= curr_time-tcp_conn->tc_rt_time;
                rtt= tcp_conn->tc_rtt;
 
-               DBLOCK(0x20, printf(
-               "tcp_release_retrans, conn[%d]: retrans_time= %ld ms\n",
-                       tcp_conn-tcp_conn_table, retrans_time*1000/HZ));
-
-
                tcp_conn->tc_rt_seq= 0;
 
                if (rtt == TCP_RTT_GRAN*CLOCK_GRAN &&
@@ -482,18 +567,25 @@ u16_t new_win;
                {
                        /* Common in fast networks. Nothing to do. */
                }
-               else if (rtt >= retrans_time && rtt <= 2*retrans_time)
-               {
-                       /* Nothing to do. We assume that a factor 2 for
-                        * variance is enough.
-                        */
-               }
-               else if (retrans_time > rtt)
+               else
                {
-                       /* Retrans time is really too small. */
+                       srtt= retrans_time * TCP_RTT_SCALE;
 
-                       tcp_conn->tc_rtt= rtt*2;
-                       if (tcp_conn->tc_rtt > TCP_RTT_MAX)
+                       artt= tcp_conn->tc_artt;
+                       artt= ((TCP_RTT_SMOOTH-1)*artt+srtt)/TCP_RTT_SMOOTH;
+
+                       srtt -= artt;
+                       if (srtt < 0)
+                               srtt= -srtt;
+                       drtt= tcp_conn->tc_drtt;
+                       drtt= ((TCP_RTT_SMOOTH-1)*drtt+srtt)/TCP_RTT_SMOOTH;
+
+                       rtt= (artt+TCP_DRTT_MULT*drtt-1)/TCP_RTT_SCALE+1;
+                       if (rtt < TCP_RTT_GRAN*CLOCK_GRAN)
+                       {
+                               rtt= TCP_RTT_GRAN*CLOCK_GRAN;
+                       }
+                       else if (rtt > TCP_RTT_MAX)
                        {
 #if DEBUG
                                static int warned /* = 0 */;
@@ -506,50 +598,33 @@ u16_t new_win;
                                        warned= 1;
                                }
 #endif
-                               tcp_conn->tc_rtt= TCP_RTT_MAX;
+                               rtt= TCP_RTT_MAX;
                        }
-                       assert (tcp_conn->tc_rtt);
-
                        DBLOCK(0x10, printf(
-"tcp_release_retrans, conn[%d]: (was too small) retrans_time= %ld ms, rtt= %ld ms\n",
-                               tcp_conn-tcp_conn_table, retrans_time*1000/HZ,
-                               tcp_conn->tc_rtt*1000/HZ));
+       "tcp_release_retrans, conn[%d]: retrans_time= %ld ms, rtt = %ld ms\n",
+                               tcp_conn-tcp_conn_table,
+                               retrans_time*1000/HZ,
+                               rtt*1000/HZ));
 
+                       DBLOCK(0x10, printf(
+       "tcp_release_retrans: artt= %ld -> %ld, drtt= %ld -> %ld\n",
+                               tcp_conn->tc_artt, artt,
+                               tcp_conn->tc_drtt, drtt));
 
-               }
-               else if (seg_ack - tcp_conn->tc_rt_seq == tcp_conn->tc_mss)
-               {
-                       /* Retrans time is really too big. */
-                       rtt= (rtt*3)>>2;
-                       if (rtt < TCP_RTT_GRAN*CLOCK_GRAN)
-                               rtt= TCP_RTT_GRAN*CLOCK_GRAN;
+                       tcp_conn->tc_artt= artt;
+                       tcp_conn->tc_drtt= drtt;
                        tcp_conn->tc_rtt= rtt;
-                       assert (tcp_conn->tc_rtt);
-
-                       DBLOCK(0x10, printf(
-"tcp_release_retrans, conn[%d]: (was too big) retrans_time= %ld ms, rtt= %ld ms\n",
-                               tcp_conn-tcp_conn_table, retrans_time*1000/HZ,
-                               tcp_conn->tc_rtt*1000/HZ));
                }
-               else
-               {
-                       /* Retrans time might be too big. Try a bit smaller. */
-                       rtt= (rtt*31)>>5;
-                       if (rtt < TCP_RTT_GRAN*CLOCK_GRAN)
-                               rtt= TCP_RTT_GRAN*CLOCK_GRAN;
-                       tcp_conn->tc_rtt= rtt;
-                       assert (tcp_conn->tc_rtt);
 
-                       DBLOCK(0x20, printf(
-"tcp_release_retrans, conn[%d]: (maybe too big) retrans_time= %ld ms, rtt= %ld ms\n",
-                               tcp_conn-tcp_conn_table, retrans_time*1000/HZ,
-                               tcp_conn->tc_rtt*1000/HZ));
+               if (tcp_conn->tc_mtu != tcp_conn->tc_max_mtu &&
+                       curr_time > tcp_conn->tc_mtutim+TCP_PMTU_INCR_IV)
+               {
+                       tcp_mtu_incr(tcp_conn);
                }
        }
 
        /* Update the current window. */
        window= tcp_conn->tc_snd_cwnd-tcp_conn->tc_SND_UNA;
-       mss= tcp_conn->tc_mss;
        assert(seg_ack != tcp_conn->tc_SND_UNA);
 
        /* For every real ACK we try to increase the current window
@@ -588,6 +663,22 @@ u16_t new_win;
        }
        assert(tcp_GEmod4G(tcp_conn->tc_snd_cwnd, seg_ack));
 
+       /* Advance ISS every 0.5GB to avoid problem with wrap around */
+       if (tcp_conn->tc_SND_UNA - tcp_conn->tc_ISS > 0x40000000)
+       {
+               tcp_conn->tc_ISS += 0x20000000;
+               DBLOCK(1, printf(
+                       "tcp_release_retrans: updating ISS to 0x%lx\n",
+                       (unsigned long)tcp_conn->tc_ISS););
+               if (tcp_Lmod4G(tcp_conn->tc_SND_UP, tcp_conn->tc_ISS))
+               {
+                       tcp_conn->tc_SND_UP= tcp_conn->tc_ISS;
+                       DBLOCK(1, printf(
+                       "tcp_release_retrans: updating SND_UP to 0x%lx\n",
+                               (unsigned long)tcp_conn->tc_SND_UP););
+               }
+       }
+
        if (queue_lo == tcp_conn->tc_ISS)
                queue_lo++;
 
@@ -608,10 +699,6 @@ u16_t new_win;
        if (!size)
        {
                bf_afree(pack);
-
-               /* Reset window if a write is completed */
-               tcp_conn->tc_snd_cwnd= tcp_conn->tc_SND_UNA +
-                       2*tcp_conn->tc_mss;
        }
        else
        {
@@ -622,18 +709,25 @@ u16_t new_win;
        if (tcp_Gmod4G(tcp_conn->tc_SND_TRM, tcp_conn->tc_snd_cwnd))
                tcp_conn->tc_SND_TRM= tcp_conn->tc_snd_cwnd;
 
-       /* Copy in new data if a write request is pending and
-        * SND_NXT-SND_TRM is less than 1 mss.
+       /* Copy in new data if an ioctl is pending or if a write request is
+        * pending and either the write can be completed or at least one
+        * mss buffer space is available.
         */
-       if (tcp_conn->tc_fd)
+       tcp_fd= tcp_conn->tc_fd;
+       if (tcp_fd)
        {
-               if ((tcp_conn->tc_fd->tf_flags &
-                       (TFF_WRITE_IP|TFF_IOCTL_IP)) &&
-                       tcp_conn->tc_SND_NXT-tcp_conn->tc_SND_TRM <
-                       tcp_conn->tc_mss)
+               if (tcp_fd->tf_flags & TFF_IOCTL_IP) 
                {
                        tcp_fd_write(tcp_conn);
                }
+               if ((tcp_fd->tf_flags & TFF_WRITE_IP) &&
+                       (size+tcp_fd->tf_write_count <= TCP_MAX_SND_WND_SIZE ||
+                       size <= TCP_MAX_SND_WND_SIZE-mss))
+               {
+                       tcp_fd_write(tcp_conn);
+               }
+               if (tcp_fd->tf_flags & TFF_SEL_WRITE) 
+                       tcp_rsel_write(tcp_conn);
        }
        else
        {
@@ -647,6 +741,12 @@ u16_t new_win;
                }
        }
 
+       if (!size && !tcp_conn->tc_send_data)
+       {
+               /* Reset window if a write is completed */
+               tcp_conn->tc_snd_cwnd= tcp_conn->tc_SND_UNA + mss;
+       }
+
        DIFBLOCK(2, (tcp_conn->tc_snd_cwnd == tcp_conn->tc_SND_TRM),
                printf("not sending: zero window\n"));
 
@@ -658,6 +758,49 @@ u16_t new_win;
 
 }
 
+/*
+tcp_fast_retrans
+*/
+
+PUBLIC void tcp_fast_retrans(tcp_conn)
+tcp_conn_t *tcp_conn;
+{
+       u16_t mss, mss2;
+
+       /* Update threshold sequence number for retransmission calculation. */
+       if (tcp_Gmod4G(tcp_conn->tc_SND_TRM, tcp_conn->tc_rt_threshold))
+               tcp_conn->tc_rt_threshold= tcp_conn->tc_SND_TRM;
+
+       tcp_conn->tc_SND_TRM= tcp_conn->tc_SND_UNA;
+
+       mss= tcp_conn->tc_mtu-IP_TCP_MIN_HDR_SIZE;
+       mss2= 2*mss;
+
+       if (tcp_conn->tc_snd_cwnd == tcp_conn->tc_SND_UNA)
+               tcp_conn->tc_snd_cwnd++;
+       if (tcp_Gmod4G(tcp_conn->tc_snd_cwnd, tcp_conn->tc_SND_UNA + mss2))
+       {
+               tcp_conn->tc_snd_cwnd= tcp_conn->tc_SND_UNA + mss2;
+               if (tcp_Gmod4G(tcp_conn->tc_SND_TRM, tcp_conn->tc_snd_cwnd))
+                       tcp_conn->tc_SND_TRM= tcp_conn->tc_snd_cwnd;
+
+               tcp_conn->tc_snd_cthresh /= 2;
+               if (tcp_conn->tc_snd_cthresh < mss2)
+                       tcp_conn->tc_snd_cthresh= mss2;
+       }
+
+       tcp_conn_write(tcp_conn, 1);
+}
+
+#if 0
+PUBLIC void do_tcp_timeout(tcp_conn)
+tcp_conn_t *tcp_conn;
+{
+       tcp_send_timeout(tcp_conn-tcp_conn_table,
+               &tcp_conn->tc_transmit_timer);
+}
+#endif
+
 /*
 tcp_send_timeout
 */
@@ -668,7 +811,12 @@ struct timer *timer;
 {
        tcp_conn_t *tcp_conn;
        u16_t mss, mss2;
-       time_t curr_time, stt, timeout;
+       u32_t snd_una, snd_nxt;
+       clock_t curr_time, rtt, stt, timeout;
+       acc_t *pkt;
+       int new_ttl, no_push;
+
+       DBLOCK(0x20, printf("tcp_send_timeout: conn[%d]\n", conn));
 
        curr_time= get_time();
 
@@ -677,20 +825,87 @@ struct timer *timer;
        assert(tcp_conn->tc_state != TCS_CLOSED);
        assert(tcp_conn->tc_state != TCS_LISTEN);
 
-       if (tcp_conn->tc_SND_NXT == tcp_conn->tc_SND_UNA)
+       snd_una= tcp_conn->tc_SND_UNA;
+       snd_nxt= tcp_conn->tc_SND_NXT;
+       no_push= (tcp_conn->tc_flags & TCF_NO_PUSH);
+       if (snd_nxt == snd_una || no_push)
        {
-               /* Nothing to do */
-               assert(tcp_conn->tc_SND_TRM == tcp_conn->tc_SND_UNA);
+               /* Nothing more to send */
+               assert(tcp_conn->tc_SND_TRM == snd_una || no_push);
 
                /* A new write sets the timer if tc_transmit_seq == SND_UNA */
                tcp_conn->tc_transmit_seq= tcp_conn->tc_SND_UNA;
                tcp_conn->tc_stt= 0;
                tcp_conn->tc_0wnd_to= 0;
                assert(!tcp_conn->tc_fd ||
-                       !(tcp_conn->tc_fd->tf_flags & TFF_WRITE_IP));
-               return;
+                       !(tcp_conn->tc_fd->tf_flags & TFF_WRITE_IP) ||
+                       (tcp_print_conn(tcp_conn), printf("\n"), 0));
+
+               if (snd_nxt != snd_una)
+               {
+                       assert(no_push);
+                       DBLOCK(1, printf("not setting keepalive timer\n"););
+
+                       /* No point in setting the keepalive timer if we
+                        * still have to send more data.
+                        */
+                       return;
+               }
+
+               assert(tcp_conn->tc_send_data == NULL);
+               DBLOCK(0x20, printf("keep alive timer\n"));
+               if (tcp_conn->tc_ka_snd != tcp_conn->tc_SND_NXT ||
+                       tcp_conn->tc_ka_rcv != tcp_conn->tc_RCV_NXT)
+               {
+                       tcp_conn->tc_ka_snd= tcp_conn->tc_SND_NXT;
+                       tcp_conn->tc_ka_rcv= tcp_conn->tc_RCV_NXT;
+                       DBLOCK(0x20, printf(
+"tcp_send_timeout: conn[%d] setting keepalive timer (+%ld ms)\n",
+                               tcp_conn-tcp_conn_table,
+                               tcp_conn->tc_ka_time*1000/HZ));
+                       clck_timer(&tcp_conn->tc_transmit_timer,
+                               curr_time+tcp_conn->tc_ka_time,
+                               tcp_send_timeout,
+                               tcp_conn-tcp_conn_table);
+                       return;
+               }
+               DBLOCK(0x10, printf(
+               "tcp_send_timeout, conn[%d]: triggering keep alive probe\n",
+                       tcp_conn-tcp_conn_table));
+               tcp_conn->tc_ka_snd--;
+               if (!(tcp_conn->tc_flags & TCF_FIN_SENT))
+               {
+                       pkt= bf_memreq(1);
+                       *ptr2acc_data(pkt)= '\xff';     /* a random char */
+                       tcp_conn->tc_send_data= pkt; pkt= NULL;
+               }
+               tcp_conn->tc_SND_UNA--;
+               if (tcp_conn->tc_SND_UNA == tcp_conn->tc_ISS)
+               {
+                       /* We didn't send anything so far. Retrying the
+                        * SYN is too hard. Decrement ISS and hope
+                        * that the other side doesn't care.
+                        */
+                       tcp_conn->tc_ISS--;
+               }
+
+               /* Set tc_transmit_seq and tc_stt to trigger packet */
+               tcp_conn->tc_transmit_seq= tcp_conn->tc_SND_UNA;
+               tcp_conn->tc_stt= curr_time;
+
+               /* Set tc_rt_seq for round trip measurements */
+               tcp_conn->tc_rt_time= curr_time;
+               tcp_conn->tc_rt_seq= tcp_conn->tc_SND_UNA;
+
+               /* Set PSH to make sure that data gets sent */
+               tcp_conn->tc_SND_PSH= tcp_conn->tc_SND_NXT;
+               assert(tcp_check_conn(tcp_conn));
+
+               /* Fall through */
        }
 
+       rtt= tcp_conn->tc_rtt;
+
        if (tcp_conn->tc_transmit_seq != tcp_conn->tc_SND_UNA)
        {
                /* Some data has been acknowledged since the last time the
@@ -702,16 +917,16 @@ struct timer *timer;
                DBLOCK(0x20, printf(
        "tcp_send_timeout: conn[%d] setting timer to %ld ms (+%ld ms)\n",
                        tcp_conn-tcp_conn_table,
-                       (curr_time+tcp_conn->tc_rtt)*1000/HZ,
-                       tcp_conn->tc_rtt*1000/HZ));
+                       (curr_time+rtt)*1000/HZ, rtt*1000/HZ));
 
                clck_timer(&tcp_conn->tc_transmit_timer,
-                       curr_time+tcp_conn->tc_rtt,
-                       tcp_send_timeout, tcp_conn-tcp_conn_table);
+                       curr_time+rtt, tcp_send_timeout,
+                       tcp_conn-tcp_conn_table);
                return;
        }
 
-       if (tcp_conn->tc_stt == 0)
+       stt= tcp_conn->tc_stt;
+       if (stt == 0)
        {
                /* Some packet arrived but did not acknowledge any data.
                 * Apparently, the other side is still alive and has a
@@ -723,8 +938,8 @@ struct timer *timer;
 
                if (tcp_conn->tc_0wnd_to < TCP_0WND_MIN)
                        tcp_conn->tc_0wnd_to= TCP_0WND_MIN;
-               else if (tcp_conn->tc_0wnd_to < tcp_conn->tc_rtt)
-                       tcp_conn->tc_0wnd_to= tcp_conn->tc_rtt;
+               else if (tcp_conn->tc_0wnd_to < rtt)
+                       tcp_conn->tc_0wnd_to= rtt;
                else
                {
                        tcp_conn->tc_0wnd_to *= 2;
@@ -732,10 +947,9 @@ struct timer *timer;
                                tcp_conn->tc_0wnd_to= TCP_0WND_MAX;
                }
                tcp_conn->tc_stt= curr_time;
-               
                tcp_conn->tc_rt_seq= 0;
 
-               DBLOCK(0x20, printf(
+               DBLOCK(0x10, printf(
        "tcp_send_timeout: conn[%d] setting timer to %ld ms (+%ld ms)\n",
                        tcp_conn-tcp_conn_table,
                        (curr_time+tcp_conn->tc_0wnd_to)*1000/HZ,
@@ -746,6 +960,7 @@ struct timer *timer;
                        tcp_send_timeout, tcp_conn-tcp_conn_table);
                return;
        }
+       assert(stt <= curr_time);
 
        DIFBLOCK(0x10, (tcp_conn->tc_fd == 0),
                printf("conn[%d] timeout in abondoned connection\n",
@@ -755,9 +970,9 @@ struct timer *timer;
         * probe, which is almost the same.
         */
 
-       DBLOCK(0x20, printf("tcp_send_timeout: conn[%d] una= %u, rtt= %dms\n",
+       DBLOCK(0x20, printf("tcp_send_timeout: conn[%d] una= %lu, rtt= %ldms\n",
                tcp_conn-tcp_conn_table,
-               tcp_conn->tc_SND_UNA, tcp_conn->tc_rtt*1000/HZ));
+               (unsigned long)tcp_conn->tc_SND_UNA, rtt*1000/HZ));
 
        /* Update threshold sequence number for retransmission calculation. */
        if (tcp_Gmod4G(tcp_conn->tc_SND_TRM, tcp_conn->tc_rt_threshold))
@@ -765,7 +980,25 @@ struct timer *timer;
 
        tcp_conn->tc_SND_TRM= tcp_conn->tc_SND_UNA;
 
-       mss= tcp_conn->tc_mss;
+       if (tcp_conn->tc_flags & TCF_PMTU &&
+               curr_time > stt+TCP_PMTU_BLACKHOLE)
+       {
+               /* We can't tell the difference between a PMTU blackhole 
+                * and a broken link. Assume a PMTU blackhole, and switch
+                * off PMTU discovery.
+                */
+               DBLOCK(1, printf(
+                       "tcp[%d]: PMTU blackhole (or broken link) on route to ",
+                       tcp_conn-tcp_conn_table);
+                       writeIpAddr(tcp_conn->tc_remaddr);
+                       printf(", max mtu = %u\n", tcp_conn->tc_max_mtu););
+               tcp_conn->tc_flags &= ~TCF_PMTU;
+               tcp_conn->tc_mtutim= curr_time;
+               if (tcp_conn->tc_max_mtu > IP_DEF_MTU)
+                       tcp_conn->tc_mtu= IP_DEF_MTU;
+       }
+
+       mss= tcp_conn->tc_mtu-IP_TCP_MIN_HDR_SIZE;
        mss2= 2*mss;
 
        if (tcp_conn->tc_snd_cwnd == tcp_conn->tc_SND_UNA)
@@ -781,8 +1014,6 @@ struct timer *timer;
                        tcp_conn->tc_snd_cthresh= mss2;
        }
 
-       stt= tcp_conn->tc_stt;
-       assert(stt <= curr_time);
        if (curr_time-stt > tcp_conn->tc_rt_dead)
        {
                tcp_close_connection(tcp_conn, ETIMEDOUT);
@@ -790,8 +1021,8 @@ struct timer *timer;
        }
 
        timeout= (curr_time-stt) >> 3;
-       if (timeout < tcp_conn->tc_rtt)
-               timeout= tcp_conn->tc_rtt;
+       if (timeout < rtt)
+               timeout= rtt;
        timeout += curr_time;
 
        DBLOCK(0x20, printf(
@@ -802,11 +1033,24 @@ struct timer *timer;
        clck_timer(&tcp_conn->tc_transmit_timer, timeout,
                tcp_send_timeout, tcp_conn-tcp_conn_table);
 
+#if 0
        if (tcp_conn->tc_rt_seq == 0)
        {
-               tcp_conn->tc_rt_time= curr_time-tcp_conn->tc_rtt;
+               printf("tcp_send_timeout: conn[%d]: setting tc_rt_time\n",
+                       tcp_conn-tcp_conn_table);
+               tcp_conn->tc_rt_time= curr_time-rtt;
                tcp_conn->tc_rt_seq= tcp_conn->tc_SND_UNA;
        }
+#endif
+
+       if (tcp_conn->tc_state == TCS_SYN_SENT ||
+               (curr_time-stt >= tcp_conn->tc_ttl*HZ))
+       {
+               new_ttl= tcp_conn->tc_ttl+1;
+               if (new_ttl> IP_MAX_TTL)
+                       new_ttl= IP_MAX_TTL;
+               tcp_conn->tc_ttl= new_ttl;
+       }
 
        tcp_conn_write(tcp_conn, 0);
 }
@@ -818,8 +1062,8 @@ tcp_conn_t *tcp_conn;
        tcp_fd_t *tcp_fd;
        int urg, nourg, push;
        u32_t max_seq;
-       size_t max_count, max_trans, write_count, send_count;
-       acc_t *data, *tmp_acc, *send_data;
+       size_t max_trans, write_count;
+       acc_t *data, *send_data;
 
        assert(tcp_conn->tc_busy);
        tcp_fd= tcp_conn->tc_fd;
@@ -872,10 +1116,7 @@ tcp_conn_t *tcp_conn;
        urg= (tcp_fd->tf_flags & TFF_WR_URG);
        push= (tcp_fd->tf_flags & TFF_PUSH_DATA);
 
-       max_seq= tcp_conn->tc_SND_UNA + tcp_conn->tc_snd_wnd;
-       if (urg)
-               max_seq++;
-       max_count= max_seq - tcp_conn->tc_SND_UNA;
+       max_seq= tcp_conn->tc_SND_UNA + TCP_MAX_SND_WND_SIZE;
        max_trans= max_seq - tcp_conn->tc_SND_NXT;
        if (tcp_fd->tf_write_count <= max_trans)
                write_count= tcp_fd->tf_write_count;
@@ -937,6 +1178,63 @@ tcp_conn_t *tcp_conn;
        }
 }
 
+PUBLIC unsigned tcp_sel_write(tcp_conn)
+tcp_conn_t *tcp_conn;
+{
+       tcp_fd_t *tcp_fd;
+       int urg, nourg;
+       u32_t max_seq;
+       size_t max_trans;
+
+       tcp_fd= tcp_conn->tc_fd;
+
+       if (tcp_conn->tc_state == TCS_CLOSED)
+               return 1;
+       
+       urg= (tcp_fd->tf_flags & TFF_WR_URG);
+
+       max_seq= tcp_conn->tc_SND_UNA + TCP_MAX_SND_WND_SIZE;
+       max_trans= max_seq - tcp_conn->tc_SND_NXT;
+       if (max_trans)
+       {
+               if (tcp_conn->tc_flags & TCF_BSD_URG)
+               {
+                       if (tcp_Gmod4G(tcp_conn->tc_SND_NXT,
+                               tcp_conn->tc_SND_UNA))
+                       {
+                               nourg= tcp_LEmod4G(tcp_conn->tc_SND_UP,
+                                       tcp_conn->tc_SND_UNA);
+                               if ((urg && nourg) || (!urg && !nourg))
+                               {
+                                       DBLOCK(0x20,
+                                               printf("not sending\n"));
+                                       return 0;
+                               }
+                       }
+               }
+               return 1;
+       }
+
+       return 0;
+}
+
+PUBLIC void
+tcp_rsel_write(tcp_conn)
+tcp_conn_t *tcp_conn;
+{
+       tcp_fd_t *tcp_fd;
+
+       if (tcp_sel_write(tcp_conn) == 0)
+               return;
+
+       tcp_fd= tcp_conn->tc_fd;
+       tcp_fd->tf_flags &= ~TFF_SEL_WRITE;
+       if (tcp_fd->tf_select_res)
+               tcp_fd->tf_select_res(tcp_fd->tf_srfd, SR_SELECT_WRITE);
+       else
+               printf("tcp_rsel_write: no select_res\n");
+}
+
 /*
 tcp_shutdown
 */
@@ -957,40 +1255,39 @@ tcp_conn_t *tcp_conn;
        if (tcp_conn->tc_flags & TCF_FIN_SENT)
                return;
        tcp_conn->tc_flags |= TCF_FIN_SENT;
+       tcp_conn->tc_flags &= ~TCF_NO_PUSH;
        tcp_conn->tc_SND_NXT++;
+       tcp_conn->tc_SND_PSH= tcp_conn->tc_SND_NXT;
 
        assert (tcp_check_conn(tcp_conn) ||
                (tcp_print_conn(tcp_conn), printf("\n"), 0));
 
        tcp_conn_write(tcp_conn, 1);
 
-       /* Start the timer (if necessary) */
+       /* Start the timer */
        tcp_set_send_timer(tcp_conn);
 }
 
 PUBLIC void tcp_set_send_timer(tcp_conn)
 tcp_conn_t *tcp_conn;
 {
-       time_t curr_time;
+       clock_t curr_time;
+       clock_t rtt;
 
        assert(tcp_conn->tc_state != TCS_CLOSED);
        assert(tcp_conn->tc_state != TCS_LISTEN);
 
        curr_time= get_time();
-
-       /* Start the timer */
+       rtt= tcp_conn->tc_rtt;
 
        DBLOCK(0x20, printf(
        "tcp_set_send_timer: conn[%d] setting timer to %ld ms (+%ld ms)\n",
                tcp_conn-tcp_conn_table,
-               (curr_time+tcp_conn->tc_rtt)*1000/HZ,
-               tcp_conn->tc_rtt*1000/HZ));
+               (curr_time+rtt)*1000/HZ, rtt*1000/HZ));
 
+       /* Start the timer */
        clck_timer(&tcp_conn->tc_transmit_timer,
-               curr_time+tcp_conn->tc_rtt,
-               tcp_send_timeout, tcp_conn-tcp_conn_table);
-               tcp_conn->tc_stt= curr_time;
-
+               curr_time+rtt, tcp_send_timeout, tcp_conn-tcp_conn_table);
        tcp_conn->tc_stt= curr_time;
 }
 
@@ -1007,7 +1304,8 @@ int error;
        tcp_fd_t *tcp_fd;
        tcp_conn_t *tc;
 
-       assert (tcp_check_conn(tcp_conn));
+       assert (tcp_check_conn(tcp_conn) ||
+               (tcp_print_conn(tcp_conn), printf("\n"), 0));
        assert (tcp_conn->tc_flags & TCF_INUSE);
 
        tcp_conn->tc_error= error;
@@ -1027,6 +1325,8 @@ int error;
                if (tcp_fd->tf_flags & TFF_READ_IP)
                        tcp_fd_read (tcp_conn, 1);
                assert (!(tcp_fd->tf_flags & TFF_READ_IP));
+               if (tcp_fd->tf_flags & TFF_SEL_READ)
+                       tcp_rsel_read (tcp_conn);
 
                if (tcp_fd->tf_flags & TFF_WRITE_IP)
                {
@@ -1041,12 +1341,15 @@ int error;
                }
                if (tcp_fd->tf_flags & TFF_IOCTL_IP)
                        assert(tcp_fd->tf_ioreq != NWIOTCPSHUTDOWN);
+               if (tcp_fd->tf_flags & TFF_SEL_WRITE) 
+                       tcp_rsel_write(tcp_conn);
 
                if (tcp_conn->tc_connInprogress)
                        tcp_restart_connect(tcp_conn->tc_fd);
                assert (!tcp_conn->tc_connInprogress);
                assert (!(tcp_fd->tf_flags & TFF_IOCTL_IP) ||
-                       (printf("req= 0x%lx\n", tcp_fd->tf_ioreq), 0));
+                       (printf("req= 0x%lx\n",
+                       (unsigned long)tcp_fd->tf_ioreq), 0));
                tcp_conn->tc_busy--;
        }
 
@@ -1120,5 +1423,5 @@ int error;
 }
 
 /*
- * $PchId: tcp_send.c,v 1.12 1996/12/17 07:57:11 philip Exp $
+ * $PchId: tcp_send.c,v 1.32 2005/06/28 14:21:52 philip Exp $
  */
index d4a23b0ab8805b522a8d1064c19d2f8e07a6fc7b..7a588b051d8d52c7739a2a657c5e866de993d816 100644 (file)
@@ -12,9 +12,10 @@ typedef struct acc *(*get_userdata_t) ARGS(( int fd, size_t offset,
 typedef int (*put_userdata_t) ARGS(( int fd, size_t offset,
        struct acc *data, int for_ioctl ));
 typedef void (*put_pkt_t) ARGS(( int fd, struct acc *data, size_t datalen ));
+typedef void (*select_res_t) ARGS(( int fd, unsigned ops ));
 
 #endif /* INET_TYPE_H */
 
 /*
- * $PchId: type.h,v 1.5 1995/11/21 06:51:58 philip Exp $
+ * $PchId: type.h,v 1.6 2005/06/28 14:22:04 philip Exp $
  */
index 95211a7c8c8f548f79a9114b33408b71752b95c4..9fdf0016d90e53c516897adf0d4c91587835433f 100644 (file)
@@ -15,74 +15,22 @@ Copyright 1995 Philip Homburg
 #include "ip.h"
 #include "sr.h"
 #include "udp.h"
+#include "udp_int.h"
 
 THIS_FILE
 
-#define UDP_FD_NR              (4*IP_PORT_MAX)
-#define UDP_PORT_HASH_NR       16              /* Must be a power of 2 */
-
-typedef struct udp_port
-{
-       int up_flags;
-       int up_state;
-       int up_ipfd;
-       int up_ipdev;
-       acc_t *up_wr_pack;
-       ipaddr_t up_ipaddr;
-       struct udp_fd *up_next_fd;
-       struct udp_fd *up_write_fd;
-       struct udp_fd *up_port_any;
-       struct udp_fd *up_port_hash[UDP_PORT_HASH_NR];
-} udp_port_t;
-
-#define UPF_EMPTY      0x0
-#define UPF_WRITE_IP   0x1
-#define UPF_WRITE_SP   0x2
-#define UPF_READ_IP    0x4
-#define UPF_READ_SP    0x8
-#define UPF_SUSPEND    0x10
-#define UPF_MORE2WRITE 0x20
-
-#define UPS_EMPTY      0
-#define UPS_SETPROTO   1
-#define UPS_GETCONF    2
-#define UPS_MAIN       3
-#define UPS_ERROR      4
-
-typedef struct udp_fd
-{
-       int uf_flags;
-       udp_port_t *uf_port;
-       ioreq_t uf_ioreq;
-       int uf_srfd;
-       nwio_udpopt_t uf_udpopt;
-       get_userdata_t uf_get_userdata;
-       put_userdata_t uf_put_userdata;
-       acc_t *uf_rdbuf_head;
-       acc_t *uf_rdbuf_tail;
-       size_t uf_rd_count;
-       size_t uf_wr_count;
-       time_t uf_exp_tim;
-       struct udp_fd *uf_port_next;
-} udp_fd_t;
-
-#define UFF_EMPTY      0x0
-#define UFF_INUSE      0x1
-#define UFF_IOCTL_IP   0x2
-#define UFF_READ_IP    0x4
-#define UFF_WRITE_IP   0x8
-#define UFF_OPTSET     0x10
-
 FORWARD void read_ip_packets ARGS(( udp_port_t *udp_port ));
 FORWARD void udp_buffree ARGS(( int priority ));
 #ifdef BUF_CONSISTENCY_CHECK
 FORWARD void udp_bufcheck ARGS(( void ));
 #endif
 FORWARD void udp_main ARGS(( udp_port_t *udp_port ));
+FORWARD int udp_select ARGS(( int fd, unsigned operations ));
 FORWARD acc_t *udp_get_data ARGS(( int fd, size_t offset, size_t count, 
        int for_ioctl ));
 FORWARD int udp_put_data ARGS(( int fd, size_t offset, acc_t *data,    
        int for_ioctl ));
+FORWARD int udp_peek ARGS(( udp_fd_t * ));
 FORWARD void udp_restart_write_port ARGS(( udp_port_t *udp_port ));
 FORWARD void udp_ip_arrived ARGS(( int port, acc_t *pack, size_t pack_size ));
 FORWARD void reply_thr_put ARGS(( udp_fd_t *udp_fd, int reply,
@@ -96,24 +44,23 @@ FORWARD int udp_packet2user ARGS(( udp_fd_t *udp_fd ));
 FORWARD void restart_write_fd ARGS(( udp_fd_t *udp_fd ));
 FORWARD u16_t pack_oneCsum ARGS(( acc_t *pack ));
 FORWARD void udp_rd_enqueue ARGS(( udp_fd_t *udp_fd, acc_t *pack,
-                                                       time_t exp_tim ));
+                                                       clock_t exp_tim ));
 FORWARD void hash_fd ARGS(( udp_fd_t *udp_fd ));
 FORWARD void unhash_fd ARGS(( udp_fd_t *udp_fd ));
 
-PRIVATE udp_port_t *udp_port_table;
-PRIVATE udp_fd_t udp_fd_table[UDP_FD_NR];
+PUBLIC udp_port_t *udp_port_table;
+PUBLIC udp_fd_t udp_fd_table[UDP_FD_NR];
 
 PUBLIC void udp_prep()
 {
-       udp_port_table= alloc(ip_conf_nr * sizeof(udp_port_table[0]));
+       udp_port_table= alloc(udp_conf_nr * sizeof(udp_port_table[0]));
 }
 
 PUBLIC void udp_init()
 {
        udp_fd_t *udp_fd;
        udp_port_t *udp_port;
-       struct ip_conf *icp;
-       int i, j;
+       int i, j, ifno;
 
        assert (BUF_S >= sizeof(struct nwio_ipopt));
        assert (BUF_S >= sizeof(struct nwio_ipconf));
@@ -122,13 +69,11 @@ PUBLIC void udp_init()
        assert (UDP_HDR_SIZE == sizeof(udp_hdr_t));
        assert (UDP_IO_HDR_SIZE == sizeof(udp_io_hdr_t));
 
-#if ZERO
        for (i= 0, udp_fd= udp_fd_table; i<UDP_FD_NR; i++, udp_fd++)
        {
                udp_fd->uf_flags= UFF_EMPTY;
                udp_fd->uf_rdbuf_head= NULL;
        }
-#endif
 
 #ifndef BUF_CONSISTENCY_CHECK
        bf_logon(udp_buffree);
@@ -136,31 +81,147 @@ PUBLIC void udp_init()
        bf_logon(udp_buffree, udp_bufcheck);
 #endif
 
-       for (i= 0, udp_port= udp_port_table, icp= ip_conf;
-               i<ip_conf_nr; i++, udp_port++, icp++)
+       for (i= 0, udp_port= udp_port_table; i<udp_conf_nr; i++, udp_port++)
        {
-               udp_port->up_ipdev= i;
+               udp_port->up_ipdev= udp_conf[i].uc_port;
 
-#if ZERO
                udp_port->up_flags= UPF_EMPTY;
                udp_port->up_state= UPS_EMPTY;
-#endif
                udp_port->up_next_fd= udp_fd_table;
-#if ZERO
                udp_port->up_write_fd= NULL;
                udp_port->up_port_any= NULL;
                for (j= 0; j<UDP_PORT_HASH_NR; j++)
                        udp_port->up_port_hash[j]= NULL;
-#endif
 
-               sr_add_minor(if2minor(icp->ic_ifno, UDP_DEV_OFF),
+               ifno= ip_conf[udp_port->up_ipdev].ic_ifno;
+               sr_add_minor(if2minor(ifno, UDP_DEV_OFF),
                        i, udp_open, udp_close, udp_read,
-                       udp_write, udp_ioctl, udp_cancel);
+                       udp_write, udp_ioctl, udp_cancel, udp_select);
 
                udp_main(udp_port);
        }
 }
 
+PUBLIC int udp_open (port, srfd, get_userdata, put_userdata, put_pkt,
+       select_res)
+int port;
+int srfd;
+get_userdata_t get_userdata;
+put_userdata_t put_userdata;
+put_pkt_t put_pkt;
+select_res_t select_res;
+{
+       int i;
+       udp_fd_t *udp_fd;
+
+       for (i= 0; i<UDP_FD_NR && (udp_fd_table[i].uf_flags & UFF_INUSE);
+               i++);
+
+       if (i>= UDP_FD_NR)
+       {
+               DBLOCK(1, printf("out of fds\n"));
+               return EAGAIN;
+       }
+
+       udp_fd= &udp_fd_table[i];
+
+       udp_fd->uf_flags= UFF_INUSE;
+       udp_fd->uf_port= &udp_port_table[port];
+       udp_fd->uf_srfd= srfd;
+       udp_fd->uf_udpopt.nwuo_flags= UDP_DEF_OPT;
+       udp_fd->uf_get_userdata= get_userdata;
+       udp_fd->uf_put_userdata= put_userdata;
+       assert(udp_fd->uf_rdbuf_head == NULL);
+       udp_fd->uf_port_next= NULL;
+
+       return i;
+
+}
+
+PUBLIC int udp_ioctl (fd, req)
+int fd;
+ioreq_t req;
+{
+       udp_fd_t *udp_fd;
+       udp_port_t *udp_port;
+       nwio_udpopt_t *udp_opt;
+       acc_t *opt_acc;
+       int result;
+
+       udp_fd= &udp_fd_table[fd];
+
+assert (udp_fd->uf_flags & UFF_INUSE);
+
+       udp_port= udp_fd->uf_port;
+       udp_fd->uf_flags |= UFF_IOCTL_IP;
+       udp_fd->uf_ioreq= req;
+
+       if (udp_port->up_state != UPS_MAIN)
+               return NW_SUSPEND;
+
+       switch(req)
+       {
+       case NWIOSUDPOPT:
+               result= udp_setopt(udp_fd);
+               break;
+       case NWIOGUDPOPT:
+               opt_acc= bf_memreq(sizeof(*udp_opt));
+assert (opt_acc->acc_length == sizeof(*udp_opt));
+               udp_opt= (nwio_udpopt_t *)ptr2acc_data(opt_acc);
+
+               *udp_opt= udp_fd->uf_udpopt;
+               udp_opt->nwuo_locaddr= udp_fd->uf_port->up_ipaddr;
+               result= (*udp_fd->uf_put_userdata)(udp_fd->uf_srfd, 0, opt_acc,
+                       TRUE);
+               if (result == NW_OK)
+                       reply_thr_put(udp_fd, NW_OK, TRUE);
+               break;
+       case NWIOUDPPEEK:
+               result= udp_peek(udp_fd);
+               break;
+       default:
+               reply_thr_get(udp_fd, EBADIOCTL, TRUE);
+               result= NW_OK;
+               break;
+       }
+       if (result != NW_SUSPEND)
+               udp_fd->uf_flags &= ~UFF_IOCTL_IP;
+       return result;
+}
+
+PUBLIC int udp_read (fd, count)
+int fd;
+size_t count;
+{
+       udp_fd_t *udp_fd;
+       acc_t *tmp_acc, *next_acc;
+
+       udp_fd= &udp_fd_table[fd];
+       if (!(udp_fd->uf_flags & UFF_OPTSET))
+       {
+               reply_thr_put(udp_fd, EBADMODE, FALSE);
+               return NW_OK;
+       }
+
+       udp_fd->uf_rd_count= count;
+
+       if (udp_fd->uf_rdbuf_head)
+       {
+               if (get_time() <= udp_fd->uf_exp_tim)
+                       return udp_packet2user (udp_fd);
+               tmp_acc= udp_fd->uf_rdbuf_head;
+               while (tmp_acc)
+               {
+                       next_acc= tmp_acc->acc_ext_link;
+                       bf_afree(tmp_acc);
+                       tmp_acc= next_acc;
+               }
+               udp_fd->uf_rdbuf_head= NULL;
+       }
+       udp_fd->uf_flags |= UFF_READ_IP;
+       return NW_SUSPEND;
+}
+
 PRIVATE void udp_main(udp_port)
 udp_port_t *udp_port;
 {
@@ -174,7 +235,7 @@ udp_port_t *udp_port;
 
                udp_port->up_ipfd= ip_open(udp_port->up_ipdev, 
                        udp_port->up_ipdev, udp_get_data, udp_put_data,
-                       udp_ip_arrived);
+                       udp_ip_arrived, 0 /* no select_res */);
                if (udp_port->up_ipfd < 0)
                {
                        udp_port->up_state= UPS_ERROR;
@@ -220,47 +281,20 @@ udp_port_t *udp_port;
                }
                read_ip_packets(udp_port);
                return;
-#if !CRAMPED
        default:
                DBLOCK(1, printf("udp_port_table[%d].up_state= %d\n",
                        udp_port->up_ipdev, udp_port->up_state));
                ip_panic(( "unknown state" ));
-#endif
+               break;
        }
 }
 
-int udp_open (port, srfd, get_userdata, put_userdata, put_pkt)
-int port;
-int srfd;
-get_userdata_t get_userdata;
-put_userdata_t put_userdata;
-put_pkt_t put_pkt;
+PRIVATE int udp_select(fd, operations)
+int fd;
+unsigned operations;
 {
-       int i;
-       udp_fd_t *udp_fd;
-
-       for (i= 0; i<UDP_FD_NR && (udp_fd_table[i].uf_flags & UFF_INUSE);
-               i++);
-
-       if (i>= UDP_FD_NR)
-       {
-               DBLOCK(1, printf("out of fds\n"));
-               return EAGAIN;
-       }
-
-       udp_fd= &udp_fd_table[i];
-
-       udp_fd->uf_flags= UFF_INUSE;
-       udp_fd->uf_port= &udp_port_table[port];
-       udp_fd->uf_srfd= srfd;
-       udp_fd->uf_udpopt.nwuo_flags= UDP_DEF_OPT;
-       udp_fd->uf_get_userdata= get_userdata;
-       udp_fd->uf_put_userdata= put_userdata;
-       assert(udp_fd->uf_rdbuf_head == NULL);
-       udp_fd->uf_port_next= NULL;
-
-       return i;
-
+       printf("udp_select: not implemented\n");
+       return 0;
 }
 
 PRIVATE acc_t *udp_get_data (port, offset, count, for_ioctl)
@@ -342,10 +376,8 @@ assert (udp_port->up_wr_pack);
                }
                break;
        default:
-#if !CRAMPED
                printf("udp_get_data(%d, 0x%x, 0x%x) called but up_state= 0x%x\n",
                        port, offset, count, udp_port->up_state);
-#endif
                break;
        }
        return NULL;
@@ -412,72 +444,20 @@ assert (!offset); /* This isn't a valid assertion but ip sends only
                        udp_ip_arrived(fd, data, bf_bufsize(data));
                }
                break;
-#if !CRAMPED
        default:
                ip_panic((
-               "udp_put_data(%d, 0x%x, 0x%x) called but up_state= 0x%x\n",
+               "udp_put_data(%d, 0x%x, %p) called but up_state= 0x%x\n",
                                        fd, offset, data, udp_port->up_state ));
-#endif
        }
        return NW_OK;
 }
 
-int udp_ioctl (fd, req)
-int fd;
-ioreq_t req;
-{
-       udp_fd_t *udp_fd;
-       udp_port_t *udp_port;
-       nwio_udpopt_t *udp_opt;
-       acc_t *opt_acc;
-       int result;
-
-       udp_fd= &udp_fd_table[fd];
-
-assert (udp_fd->uf_flags & UFF_INUSE);
-
-       udp_port= udp_fd->uf_port;
-       udp_fd->uf_flags |= UFF_IOCTL_IP;
-       udp_fd->uf_ioreq= req;
-
-       if (udp_port->up_state != UPS_MAIN)
-               return NW_SUSPEND;
-
-       switch(req)
-       {
-       case NWIOSUDPOPT:
-               result= udp_setopt(udp_fd);
-               break;
-       case NWIOGUDPOPT:
-               opt_acc= bf_memreq(sizeof(*udp_opt));
-assert (opt_acc->acc_length == sizeof(*udp_opt));
-               udp_opt= (nwio_udpopt_t *)ptr2acc_data(opt_acc);
-
-               *udp_opt= udp_fd->uf_udpopt;
-               udp_opt->nwuo_locaddr= udp_fd->uf_port->up_ipaddr;
-               result= (*udp_fd->uf_put_userdata)(udp_fd->uf_srfd, 0, opt_acc,
-                       TRUE);
-               if (result == NW_OK)
-                       reply_thr_put(udp_fd, NW_OK, TRUE);
-               break;
-       default:
-               reply_thr_get(udp_fd, EBADIOCTL, TRUE);
-               result= NW_OK;
-               break;
-       }
-       if (result != NW_SUSPEND)
-               udp_fd->uf_flags &= ~UFF_IOCTL_IP;
-       return result;
-}
-
 PRIVATE int udp_setopt(udp_fd)
 udp_fd_t *udp_fd;
 {
        udp_fd_t *fd_ptr;
        nwio_udpopt_t oldopt, newopt;
        acc_t *data;
-       int result;
-       udpport_t port;
        unsigned int new_en_flags, new_di_flags, old_en_flags, old_di_flags,
                all_flags, flags;
        unsigned long new_flags;
@@ -675,20 +655,20 @@ int fd;
 {
        udpport_t port, nw_port;
 
-       nw_port= htons(0xC000+fd);
-       if (is_unused_port(nw_port))
-               return nw_port;
-
-       for (port= 0xC000+UDP_FD_NR; port < 0xFFFF; port++)
+       for (port= 0x8000+fd; port < 0xffff-UDP_FD_NR; port+= UDP_FD_NR)
+       {
+               nw_port= htons(port);
+               if (is_unused_port(nw_port))
+                       return nw_port;
+       }
+       for (port= 0x8000; port < 0xffff; port++)
        {
                nw_port= htons(port);
                if (is_unused_port(nw_port))
                        return nw_port;
        }
-#if !CRAMPED
        ip_panic(( "unable to find unused port (shouldn't occur)" ));
        return 0;
-#endif
 }
 
 /*
@@ -759,26 +739,34 @@ assert(result == NW_OK);
 }
 
 
-PUBLIC int udp_read (fd, count)
-int fd;
-size_t count;
+PRIVATE int udp_peek (udp_fd)
+udp_fd_t *udp_fd;
 {
-       udp_fd_t *udp_fd;
-       acc_t *tmp_acc, *next_acc;
+       acc_t *pack, *tmp_acc, *next_acc;
+       int result;
 
-       udp_fd= &udp_fd_table[fd];
        if (!(udp_fd->uf_flags & UFF_OPTSET))
        {
-               reply_thr_put(udp_fd, EBADMODE, FALSE);
+               udp_fd->uf_flags &= ~UFF_IOCTL_IP;
+               reply_thr_put(udp_fd, EBADMODE, TRUE);
                return NW_OK;
        }
 
-       udp_fd->uf_rd_count= count;
-
        if (udp_fd->uf_rdbuf_head)
        {
                if (get_time() <= udp_fd->uf_exp_tim)
-                       return udp_packet2user (udp_fd);
+               {
+                       pack= bf_cut(udp_fd->uf_rdbuf_head, 0,
+                               sizeof(udp_io_hdr_t));
+                       result= (*udp_fd->uf_put_userdata)(udp_fd->uf_srfd,
+                               (size_t)0, pack, TRUE);
+
+                       udp_fd->uf_flags &= ~UFF_IOCTL_IP;
+                       result= (*udp_fd->uf_put_userdata)(udp_fd->uf_srfd,
+                               result, (acc_t *)0, TRUE);
+                       assert (result == 0);
+                       return result;
+               }
                tmp_acc= udp_fd->uf_rdbuf_head;
                while (tmp_acc)
                {
@@ -788,7 +776,7 @@ size_t count;
                }
                udp_fd->uf_rdbuf_head= NULL;
        }
-       udp_fd->uf_flags |= UFF_READ_IP;
+       udp_fd->uf_flags |= UFF_PEEK_IP;
        return NW_SUSPEND;
 }
 
@@ -847,7 +835,7 @@ udp_fd_t *udp_fd;
        udp_fd->uf_flags &= ~UFF_READ_IP;
        result= (*udp_fd->uf_put_userdata)(udp_fd->uf_srfd, result,
                        (acc_t *)0, FALSE);
-assert (result == 0);
+       assert (result == 0);
 
        return result;
 }
@@ -864,12 +852,12 @@ size_t pack_size;
        udp_hdr_t *udp_hdr;
        udp_io_hdr_t *udp_io_hdr;
        size_t ip_hdr_size, udp_size, data_size, opt_size;
-       ipaddr_t src_addr, dst_addr;
+       ipaddr_t src_addr, dst_addr, ipaddr;
        udpport_t src_port, dst_port;
        u8_t u16[2];
        u16_t chksum;
        unsigned long dst_type, flags;
-       time_t  exp_tim;
+       clock_t exp_tim;
        int i, delivered, hash;
 
        udp_port= &udp_port_table[port];
@@ -886,19 +874,29 @@ size_t pack_size;
                ip_hdr= (ip_hdr_t *)ptr2acc_data(ip_hdr_acc);
        }
 
-       udp_acc= bf_delhead(pack, ip_hdr_size);
-       pack= NULL;
-
        pack_size -= ip_hdr_size;
        if (pack_size < UDP_HDR_SIZE)
        {
-               DBLOCK(1, printf("packet too small\n"));
+               if (pack_size == 0 && ip_hdr->ih_proto == 0)
+               {
+                       /* IP layer reports new IP address */
+                       ipaddr= ip_hdr->ih_src;
+                       udp_port->up_ipaddr= ipaddr;
+                       DBLOCK(1, printf("udp_ip_arrived: using address ");
+                               writeIpAddr(ipaddr); printf("\n"));
+               }
+               else
+                       DBLOCK(1, printf("packet too small\n"));
 
                bf_afree(ip_hdr_acc);
-               bf_afree(udp_acc);
+               bf_afree(pack);
                return;
        }
 
+       udp_acc= bf_delhead(pack, ip_hdr_size);
+       pack= NULL;
+
+
        udp_acc= bf_packIffLess(udp_acc, UDP_HDR_SIZE);
        udp_hdr= (udp_hdr_t *)ptr2acc_data(udp_acc);
        udp_size= ntohs(udp_hdr->uh_length);
@@ -1262,12 +1260,10 @@ assert (!udp_port->up_wr_pack);
        ip_hdr->ih_vers_ihl= (IP_MIN_HDR_SIZE+ip_opt_size) >> 2;
        ip_hdr->ih_tos= UDP_TOS;
        ip_hdr->ih_flags_fragoff= HTONS(UDP_IP_FLAGS);
-       ip_hdr->ih_ttl= UDP_TTL;
+       ip_hdr->ih_ttl= IP_DEF_TTL;
        ip_hdr->ih_proto= IPPROTO_UDP;
        if (flags & NWUO_RA_SET)
        {
-               DBLOCK(1, printf("NWUO_RA_SET\n"));
-
                ip_hdr->ih_dst= udp_fd->uf_udpopt.nwuo_remaddr;
        }
        else
@@ -1445,12 +1441,11 @@ assert (udp_fd->uf_flags & UFF_WRITE_IP);
        case SR_CANCEL_IOCTL:
 assert (udp_fd->uf_flags & UFF_IOCTL_IP);
                udp_fd->uf_flags &= ~UFF_IOCTL_IP;
+               udp_fd->uf_flags &= ~UFF_PEEK_IP;
                reply_thr_get(udp_fd, EINTR, TRUE);
                break;
-#if !CRAMPED
        default:
                ip_panic(( "got unknown cancel request" ));
-#endif
        }
        return NW_OK;
 }
@@ -1459,9 +1454,8 @@ PRIVATE void udp_buffree (priority)
 int priority;
 {
        int i;
-       time_t curr_tim;
        udp_fd_t *udp_fd;
-       acc_t *tmp_acc, *next_acc;
+       acc_t *tmp_acc;
 
        if (priority ==  UDP_PRI_FDBUFS_EXTRA)
        {
@@ -1494,9 +1488,10 @@ int priority;
 PRIVATE void udp_rd_enqueue(udp_fd, pack, exp_tim)
 udp_fd_t *udp_fd;
 acc_t *pack;
-time_t exp_tim;
+clock_t exp_tim;
 {
        acc_t *tmp_acc;
+       int result;
 
        if (pack->acc_linkC != 1)
        {
@@ -1513,6 +1508,20 @@ time_t exp_tim;
        else
                udp_fd->uf_rdbuf_tail->acc_ext_link= pack;
        udp_fd->uf_rdbuf_tail= pack;
+
+       if (udp_fd->uf_flags & UFF_PEEK_IP)
+       {
+               pack= bf_cut(udp_fd->uf_rdbuf_head, 0,
+                       sizeof(udp_io_hdr_t));
+               result= (*udp_fd->uf_put_userdata)(udp_fd->uf_srfd,
+                       (size_t)0, pack, TRUE);
+
+               udp_fd->uf_flags &= ~UFF_IOCTL_IP;
+               udp_fd->uf_flags &= ~UFF_PEEK_IP;
+               result= (*udp_fd->uf_put_userdata)(udp_fd->uf_srfd,
+                       result, (acc_t *)0, TRUE);
+               assert (result == 0);
+       }
 }
 
 PRIVATE void hash_fd(udp_fd)
@@ -1581,7 +1590,7 @@ PRIVATE void udp_bufcheck()
        udp_fd_t *udp_fd;
        acc_t *tmp_acc;
 
-       for (i= 0, udp_port= udp_port_table; i<ip_conf_nr; i++, udp_port++)
+       for (i= 0, udp_port= udp_port_table; i<udp_conf_nr; i++, udp_port++)
        {
                if (udp_port->up_wr_pack)
                        bf_check_acc(udp_port->up_wr_pack);
@@ -1599,5 +1608,5 @@ PRIVATE void udp_bufcheck()
 #endif
 
 /*
- * $PchId: udp.c,v 1.10 1996/08/06 06:48:05 philip Exp $
+ * $PchId: udp.c,v 1.25 2005/06/28 14:14:44 philip Exp $
  */
index b2190ca78f831d27324e4a0ba3796853fbd2501b..4a0b582909c029f3f0feb73bc0d27909815c733e 100644 (file)
@@ -12,7 +12,6 @@ Copyright 1995 Philip Homburg
 #define UDP_READ_EXP_TIME      (10L * HZ)
 #define UDP_TOS                        0
 #define UDP_IP_FLAGS           0
-#define UDP_TTL                        30
 
 #define UDP0   0
 
@@ -22,7 +21,7 @@ void udp_prep ARGS(( void ));
 void udp_init ARGS(( void ));
 int udp_open ARGS(( int port, int srfd,
        get_userdata_t get_userdata, put_userdata_t put_userdata, 
-       put_pkt_t put_pkt ));
+       put_pkt_t put_pkt, select_res_t select_res ));
 int udp_ioctl ARGS(( int fd, ioreq_t req ));
 int udp_read ARGS(( int fd, size_t count ));
 int udp_write ARGS(( int fd, size_t count ));
@@ -33,5 +32,5 @@ int udp_cancel ARGS(( int fd, int which_operation ));
 
 
 /*
- * $PchId: udp.h,v 1.6 1996/05/07 20:53:31 philip Exp $
+ * $PchId: udp.h,v 1.9 2005/06/28 14:12:05 philip Exp $
  */
diff --git a/servers/inet/generic/udp_int.h b/servers/inet/generic/udp_int.h
new file mode 100644 (file)
index 0000000..9977f5c
--- /dev/null
@@ -0,0 +1,70 @@
+/*
+generic/udp_int.h
+
+Created:       March 2001 by Philip Homburg <philip@f-mnx.phicoh.com>
+
+Some internals of the UDP module
+*/
+
+#define UDP_FD_NR              (4*IP_PORT_MAX)
+#define UDP_PORT_HASH_NR       16              /* Must be a power of 2 */
+
+typedef struct udp_port
+{
+       int up_flags;
+       int up_state;
+       int up_ipfd;
+       int up_ipdev;
+       acc_t *up_wr_pack;
+       ipaddr_t up_ipaddr;
+       struct udp_fd *up_next_fd;
+       struct udp_fd *up_write_fd;
+       struct udp_fd *up_port_any;
+       struct udp_fd *up_port_hash[UDP_PORT_HASH_NR];
+} udp_port_t;
+
+#define UPF_EMPTY      0x0
+#define UPF_WRITE_IP   0x1
+#define UPF_WRITE_SP   0x2
+#define UPF_READ_IP    0x4
+#define UPF_READ_SP    0x8
+#define UPF_SUSPEND    0x10
+#define UPF_MORE2WRITE 0x20
+
+#define UPS_EMPTY      0
+#define UPS_SETPROTO   1
+#define UPS_GETCONF    2
+#define UPS_MAIN       3
+#define UPS_ERROR      4
+
+typedef struct udp_fd
+{
+       int uf_flags;
+       udp_port_t *uf_port;
+       ioreq_t uf_ioreq;
+       int uf_srfd;
+       nwio_udpopt_t uf_udpopt;
+       get_userdata_t uf_get_userdata;
+       put_userdata_t uf_put_userdata;
+       acc_t *uf_rdbuf_head;
+       acc_t *uf_rdbuf_tail;
+       size_t uf_rd_count;
+       size_t uf_wr_count;
+       clock_t uf_exp_tim;
+       struct udp_fd *uf_port_next;
+} udp_fd_t;
+
+#define UFF_EMPTY      0x0
+#define UFF_INUSE      0x1
+#define UFF_IOCTL_IP   0x2
+#define UFF_READ_IP    0x4
+#define UFF_WRITE_IP   0x8
+#define UFF_OPTSET     0x10
+#define UFF_PEEK_IP    0x20
+
+EXTERN udp_port_t *udp_port_table;
+EXTERN udp_fd_t udp_fd_table[UDP_FD_NR];
+
+/*
+ * $PchId: udp_int.h,v 1.4 2004/08/03 11:12:01 philip Exp $
+ */
index 7cc6114d77cd27d517ac8772bef8835fc5dbb4ae..1970af81d3f930a5e29b077c2fa73000634b6235 100644 (file)
@@ -3,12 +3,6 @@
 
 Copyright 1995 Philip Homburg
 
-Changes: 
-   Oct 10, 2004          Get own process number with SYS_GETINFO  (Jorrit N. Herder)
-   Sep 30, 2004          Updated system calls done in clock.c.  (Jorrit N. Herder)
-   Sep 15, 2004          Exit on HARD_STOP notification  (Jorrit N. Herder)
-   Aug 24, 2004   Alarms no longer from SYNALRM task  (Jorrit N. Herder)
-
 The valid messages and their parameters are:
 
 from FS:
@@ -35,65 +29,175 @@ from FS:
 | NW_CANCEL    | minor dev | proc nr |       |          |         |
 |_______________|___________|_________|_______|__________|_________|
 
-from the Ethernet task:
+from DL_ETH:
  _______________________________________________________________________
 |              |           |         |          |            |         |
 | m_type       |  DL_PORT  | DL_PROC | DL_COUNT |  DL_STAT   | DL_TIME |
 |_______________|___________|_________|__________|____________|_________|
 |              |           |         |          |            |         |
-| DL_TASK_INT  | minor dev | proc nr | rd_count |  0  | stat |  time   |
+| DL_INIT_REPLY        | minor dev | proc nr | rd_count |  0  | stat |  time   |
 |_______________|___________|_________|__________|____________|_________|
 |              |           |         |          |            |         |
-| DL_TASK_REPLY        | minor dev | proc nr | rd_count | err | stat |  time   |         |
+| DL_TASK_REPLY        | minor dev | proc nr | rd_count | err | stat |  time   |
 |_______________|___________|_________|__________|____________|_________|
 */
 
 #include "inet.h"
 
-#define _MINIX 1
+#define _MINIX_SOURCE 1
 
+#include <fcntl.h>
+#include <time.h>
 #include <unistd.h>
 #include <sys/svrctl.h>
-#include <minix/syslib.h>
-#include <minix/utils.h>
 
 #include "mq.h"
+#include "qp.h"
 #include "proto.h"
 #include "generic/type.h"
 
+#include "generic/arp.h"
 #include "generic/assert.h"
 #include "generic/buf.h"
 #include "generic/clock.h"
 #include "generic/eth.h"
 #include "generic/event.h"
-#if !CRAMPED
-#include "generic/arp.h"
 #include "generic/ip.h"
 #include "generic/psip.h"
+#include "generic/rand256.h"
 #include "generic/sr.h"
 #include "generic/tcp.h"
 #include "generic/udp.h"
-#endif
 
 THIS_FILE
 
+#define RANDOM_DEV_NAME        "/dev/random"
+
 int this_proc;         /* Process number of this server. */
 
+#ifdef __minix_vmd
+static int synal_tasknr= ANY;
+#endif
+
+/* Killing Solaris */
+int killer_inet= 0;
+
 #ifdef BUF_CONSISTENCY_CHECK
 extern int inet_buf_debug;
 #endif
 
 _PROTOTYPE( void main, (void) );
 
+FORWARD _PROTOTYPE( void nw_conf, (void) );
 FORWARD _PROTOTYPE( void nw_init, (void) );
 
 PUBLIC void main()
 {
        mq_t *mq;
        int r;
-       int source;
+       int source, timerand, fd;
+       struct fssignon device;
+#ifdef __minix_vmd
+       struct systaskinfo info;
+#endif
+       u8_t randbits[32];
+       struct timeval tv;
+
+       printf("Hello, in inet\n");
+#if DEBUG
+       printf("Starting inet...\n");
+       printf("%s\n", version);
+#endif
+
+       /* Read configuration. */
+       nw_conf();
+
+       /* Get a random number */
+       timerand= 1;
+       fd= open(RANDOM_DEV_NAME, O_RDONLY | O_NONBLOCK);
+       if (fd != -1)
+       {
+               r= read(fd, randbits, sizeof(randbits));
+               if (r == sizeof(randbits))
+                       timerand= 0;
+               else
+               {
+                       printf("unable to read random data from %s: %s\n",
+                               RANDOM_DEV_NAME, r == -1 ? strerror(errno) :
+                               r == 0 ? "EOF" : "not enough data");
+               }
+               close(fd);
+       }
+       else
+       {
+               printf("unable to open random device %s: %s\n",
+                       RANDOM_DEV_NAME, strerror(errno));
+       }
+       if (timerand)
+       {
+               printf("using current time for random-number seed\n");
+#ifdef __minix_vmd
+               r= sysutime(UTIME_TIMEOFDAY, &tv);
+#else /* Minix 3 */
+               r= gettimeofday(&tv, NULL);
+#endif
+               if (r == -1)
+               {
+                       printf("sysutime failed: %s\n", strerror(errno));
+                       exit(1);
+               }
+               memcpy(randbits, &tv, sizeof(tv));
+       }
+       init_rand256(randbits);
+
+       /* Sign on as a server at all offices in the proper order. */
+       if (svrctl(MMSIGNON, (void *) NULL) == -1) {
+               printf("inet: server signon failed\n");
+               exit(1);
+       }
+#ifdef __minix_vmd
+       if (svrctl(SYSSIGNON, (void *) &info) == -1) pause();
+
+       /* Our new identity as a server. */
+       this_proc = info.proc_nr;
+#else /* Minix 3 */
+       if (svrctl(SYSSIGNON, (void *) NULL) == -1) pause();
+
+       /* Our new identity as a server. */
+       if (getprocnr(&this_proc) != OK)
+               ip_panic(( "unable to get own process nr\n"));
+#endif
+
+       /* Register the device group. */
+       device.dev= ip_dev;
+       device.style= STYLE_CLONE;
+       if (svrctl(FSSIGNON, (void *) &device) == -1) {
+               printf("inet: error %d on registering ethernet devices\n",
+                       errno);
+               pause();
+       }
+
+#ifdef BUF_CONSISTENCY_CHECK
+       inet_buf_debug= (getenv("inetbufdebug") && 
+               (strcmp(getenv("inetbufdebug"), "on") == 0));
+       inet_buf_debug= 100;
+       if (inet_buf_debug)
+       {
+               ip_warning(( "buffer consistency check enabled" ));
+       }
+#endif
+
+       if (getenv("killerinet"))
+       {
+               ip_warning(( "killer inet active" ));
+               killer_inet= 1;
+       }
 
-       DBLOCK(1, printf("%s\n", version));
+#ifdef __minix_vmd
+       r= sys_findproc(SYN_AL_NAME, &synal_tasknr, 0);
+       if (r != OK)
+               ip_panic(( "unable to find synchronous alarm task: %d\n", r ));
+#endif
 
        nw_init();
        while (TRUE)
@@ -103,7 +207,7 @@ PUBLIC void main()
                {
                        static int buf_debug_count= 0;
 
-                       if (buf_debug_count++ > inet_buf_debug)
+                       if (++buf_debug_count >= inet_buf_debug)
                        {
                                buf_debug_count= 0;
                                if (!bf_consistency_check())
@@ -126,22 +230,36 @@ PUBLIC void main()
                        ip_panic(("out of messages"));
 
                r= receive (ANY, &mq->mq_mess);
-               if (r<0) {
+               if (r<0)
+               {
                        ip_panic(("unable to receive: %d", r));
                }
                reset_time();
                source= mq->mq_mess.m_source;
-               if (source == FS_PROC_NR) {
+               if (source == FS_PROC_NR)
+               {
                        sr_rec(mq);
-               } else if (mq->mq_mess.m_type & NOTIFICATION ) 
+               }
+#ifdef __minix_vmd
+               else if (source == synal_tasknr)
                {
-                       if (mq->mq_mess.m_type == SYN_ALARM) {
+                       clck_tick (&mq->mq_mess);
+                       mq_free(mq);
+               }
+#else /* Minix 3 */
+               else if (mq->mq_mess.m_type & NOTIFICATION)
+               {
+                       if (mq->mq_mess.m_type == SYN_ALARM)
+                       {
                                clck_tick(&mq->mq_mess);
                                mq_free(mq);
-                       } else if (mq->mq_mess.m_type == HARD_STOP) {
+                       }
+                       else if (mq->mq_mess.m_type == HARD_STOP)
+                       {
                                sys_exit(0);
                        }
-               }
+               } 
+#endif
                else
                {
 compare(mq->mq_mess.m_type, ==, DL_TASK_REPLY);
@@ -152,12 +270,8 @@ compare(mq->mq_mess.m_type, ==, DL_TASK_REPLY);
        ip_panic(("task is not allowed to terminate"));
 }
 
-PRIVATE void nw_init()
+PRIVATE void nw_conf()
 {
-       struct fssignon device;
-       int pnr;
-
-       /* Read configuration. */
        read_conf();
        eth_prep();
        arp_prep();
@@ -165,59 +279,23 @@ PRIVATE void nw_init()
        ip_prep();
        tcp_prep();
        udp_prep();
+}
 
-
-       /* Sign on as a server at all offices in the proper order. */
-       if (svrctl(MMSIGNON, (void *) NULL) == -1) {
-               printf("inet: server signon failed\n");
-               exit(1);
-       }
-       if (svrctl(SYSSIGNON, (void *) NULL) == -1) pause();
-
-       /* Our new identity as a server. */
-       if (getprocnr(&this_proc) != OK)
-               ip_panic(( "unable to get own process nr\n"));
-
-       /* Register the device group. */
-       device.dev= ip_dev;
-       device.style= STYLE_CLONE;
-       if (svrctl(FSSIGNON, (void *) &device) == -1) {
-               printf("inet: error %d on registering ethernet devices\n",
-                       errno);
-               pause();
-       }
-
-
-#ifdef BUF_CONSISTENCY_CHECK
-       inet_buf_debug= 100;
-       if (inet_buf_debug)
-       {
-               ip_warning(( "buffer consistency check enabled" ));
-       }
-#endif
+PRIVATE void nw_init()
+{
        mq_init();
+       qp_init();
        bf_init();
        clck_init();
        sr_init();
        eth_init();
-#if ENABLE_ARP
        arp_init();
-#endif
-#if ENABLE_PSIP
        psip_init();
-#endif
-#if ENABLE_IP
        ip_init();
-#endif
-#if ENABLE_TCP
        tcp_init();
-#endif
-#if ENABLE_UDP
        udp_init();
-#endif
 }
 
-#if !CRAMPED
 PUBLIC void panic0(file, line)
 char *file;
 int line;
@@ -229,19 +307,13 @@ PUBLIC void inet_panic()
 {
        printf("\ninet stacktrace: ");
        stacktrace();
-       panic("INET","aborted due to a panic",NO_NUM);
-}
-
-#else /* CRAMPED */
-
-PUBLIC void inet_panic(file, line)
-char *file;
-int line;
-{
-       printf("panic at %s, %d\n", file, line);
-       panic("INET","aborted due to a panic",NO_NUM);
-}
+#ifdef __minix_vmd
+       sys_abort(RBT_PANIC);
+#else /* Minix 3 */
+       (panic)("INET","aborted due to a panic",NO_NUM);
 #endif
+       for(;;);
+}
 
 #if !NDEBUG
 PUBLIC void bad_assertion(file, line, what)
@@ -251,7 +323,7 @@ char *what;
 {
        panic0(file, line);
        printf("assertion \"%s\" failed", what);
-       inet_panic();
+       panic();
 }
 
 
@@ -264,10 +336,10 @@ int rhs;
 {
        panic0(file, line);
        printf("compare (%d) %s (%d) failed", lhs, what, rhs);
-       inet_panic();
+       panic();
 }
 #endif /* !NDEBUG */
 
 /*
- * $PchId: inet.c,v 1.12 1996/12/17 07:58:19 philip Exp $
+ * $PchId: inet.c,v 1.23 2005/06/28 14:27:22 philip Exp $
  */
index 3e5840fea43ea7063cb7162faa1b5cf1fe6583a9..0b1ace85e3b82dd55263a6e8cfbbdf5e5da377bd 100644 (file)
@@ -11,24 +11,35 @@ Copyright 1995 Philip Homburg
 
 #define _SYSTEM        1       /* get OK and negative error codes */
 
-#include <ansi.h>
-
-#define CRAMPED (_EM_WSIZE==2) /* 64K code and data is quite cramped. */
-#define ZERO 0 /* Used to comment out initialization code that does nothing. */
-
 #include <sys/types.h>
-#include <minix/type.h>
 #include <errno.h>
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
 
+#ifdef __minix_vmd
+
+#include <minix/ansi.h>
+#include <minix/cfg_public.h>
+#include <minix/type.h>
+
+#else /* Assume at least Minix 3.x */
+
+#include <unistd.h>
+#include <sys/time.h>
 #include <minix/config.h>
 #include <minix/type.h>
+#include <minix/utils.h>
+
+#define _NORETURN      /* Should be non empty for GCC */
+
+typedef int ioreq_t;
+
+#endif
+
 #include <minix/const.h>
 #include <minix/com.h>
 #include <minix/syslib.h>
-#include <minix/utils.h>
 #include <net/hton.h>
 #include <net/gen/ether.h>
 #include <net/gen/eth_hdr.h>
@@ -43,11 +54,14 @@ Copyright 1995 Philip Homburg
 #include <net/gen/psip_io.h>
 #include <net/gen/route.h>
 #include <net/gen/tcp.h>
+#include <net/gen/tcp.h>
 #include <net/gen/tcp_hdr.h>
 #include <net/gen/tcp_io.h>
 #include <net/gen/udp.h>
 #include <net/gen/udp_hdr.h>
 #include <net/gen/udp_io.h>
+
+#include <net/gen/arp_io.h>
 #include <net/ioctl.h>
 
 #include "const.h"
@@ -58,25 +72,14 @@ Copyright 1995 Philip Homburg
 #define PRIVATE        static
 #define FORWARD        static
 
-typedef int ioreq_t;
-
 #define THIS_FILE static char *this_file= __FILE__;
 
-#if CRAMPED
-
-/* Minimum panic info. */
-#define ip_panic(print_list)  inet_panic(this_file, __LINE__)
-_PROTOTYPE( void inet_panic, (char *file, int line) );
-
-#else /* !CRAMPED */
-
-/* Maximum panic info. */
-#define ip_panic(print_list)  \
-       (panic0(this_file, __LINE__), printf print_list, inet_panic())
 _PROTOTYPE( void panic0, (char *file, int line) );
-_PROTOTYPE( void inet_panic, (void) );
+_PROTOTYPE( void inet_panic, (void) ) _NORETURN; 
 
-#endif /* !CRAMPED */
+#define ip_panic(print_list)  \
+       (panic0(this_file, __LINE__), printf print_list, panic())
+#define panic() inet_panic()
 
 #if DEBUG
 #define ip_warning(print_list)  \
@@ -86,6 +89,9 @@ _PROTOTYPE( void inet_panic, (void) );
                printf("\ninet stacktrace: "), \
                stacktrace() \
        )
+#else
+#define ip_warning(print_list) ((void) 0)
+#endif
 
 #define DBLOCK(level, code) \
        do { if ((level) & DEBUG) { where(); code; } } while(0)
@@ -93,21 +99,19 @@ _PROTOTYPE( void inet_panic, (void) );
        do { if (((level) & DEBUG) && (condition)) \
                { where(); code; } } while(0)
 
-#else /* !DEBUG */
-#define ip_warning(print_list) 0
-#define DBLOCK(level, code)    0
-#define DIFBLOCK(level, condition, code)       0
-#endif
+#if _ANSI
+#define ARGS(x) x
+#else /* _ANSI */
+#define ARGS(x) ()
+#endif /* _ANSI */
 
-#define ARGS(x) _ARGS(x)
-
-extern char version[];
 extern int this_proc;
+extern char version[];
 
 void stacktrace ARGS(( void ));
 
 #endif /* INET__INET_H */
 
 /*
- * $PchId: inet.h,v 1.8 1996/05/07 21:05:04 philip Exp $
+ * $PchId: inet.h,v 1.16 2005/06/28 14:27:54 philip Exp $
  */
index c2554f341ab36ee03e0d6908228d145fff301403..735232e79a28efa7d84fc631209eeec131494e7b 100644 (file)
@@ -9,42 +9,41 @@ Modified:     Apr 07, 2001 by Kees J. Bot
 Copyright 1995 Philip Homburg
 */
 
-#define _MINIX 1
+#define _MINIX_SOURCE 1
+#define _POSIX_SOURCE 1
 
 #include <stdlib.h>
+#include <unistd.h>
 #include <fcntl.h>
 #include <string.h>
 #include <errno.h>
 #include <sys/types.h>
 #include <sys/stat.h>
-#include <minix/config.h>
 #include <minix/type.h>
 #include <minix/syslib.h>
-#include <minix/utils.h>
-#include <unistd.h>
 #include "inet_config.h"
 
-#define CRAMPED (_EM_WSIZE==2) /* 64K code and data is quite cramped. */
-#if CRAMPED
-#endif
-
 struct eth_conf eth_conf[IP_PORT_MAX];
 struct psip_conf psip_conf[IP_PORT_MAX];
 struct ip_conf ip_conf[IP_PORT_MAX];
+struct tcp_conf tcp_conf[IP_PORT_MAX];
+struct udp_conf udp_conf[IP_PORT_MAX];
 dev_t ip_dev;
 
 int eth_conf_nr;
-#if ENABLE_PSIP
 int psip_conf_nr;
-#endif
 int ip_conf_nr;
+int tcp_conf_nr;
+int udp_conf_nr;
+
+int ip_forward_directed_bcast= 0;      /* Default is off */
 
 static u8_t iftype[IP_PORT_MAX];       /* Interface in use as? */
 static int ifdefault= -1;              /* Default network interface. */
 
 static void fatal(char *label)
 {
-       printf("init: %s: Error %d\n", label, errno);
+       printf("init: %s: %s\n", label, strerror(errno));
        exit(1);
 }
 
@@ -206,7 +205,7 @@ static unsigned number(char *str, unsigned max)
 
 void read_conf(void)
 {
-       int i, j, ifno, type, port;
+       int i, j, ifno, type, port, enable;
        struct eth_conf *ecp;
        struct psip_conf *pcp;
        struct ip_conf *icp;
@@ -226,13 +225,25 @@ void read_conf(void)
                        type= NETTYPE_ETH;
                        port= eth_conf_nr;
                        token(1);
-                       ecp->ec_task= alloc(strlen(word)+1);
-                       strcpy(ecp->ec_task, word);
-                       token(1);
-                       ecp->ec_port= number(word, IP_PORT_MAX-1);
+                       if (strcmp(word, "vlan") == 0) {
+                               token(1);
+                               ecp->ec_vlan= number(word, (1<<12)-1);
+                               token(1);
+                               if (strncmp(word, "eth", 3) != 0) {
+                                       printf(
+                               "inet: VLAN eth%d can't be built on %s\n",
+                                               ifno, word);
+                                       exit(1);
+                               }
+                               ecp->ec_port= number(word+3, IP_PORT_MAX-1);
+                       } else {
+                               ecp->ec_task= alloc(strlen(word)+1);
+                               strcpy(ecp->ec_task, word);
+                               token(1);
+                               ecp->ec_port= number(word, IP_PORT_MAX-1);
+                       }
                        ecp++;
                        eth_conf_nr++;
-#if ENABLE_PSIP
                } else
                if (strncmp(word, "psip", 4) == 0) {
                        pcp->pc_ifno= ifno= number(word+4, IP_PORT_MAX-1);
@@ -240,7 +251,6 @@ void read_conf(void)
                        port= psip_conf_nr;
                        pcp++;
                        psip_conf_nr++;
-#endif
                } else {
                        printf("inet: Unknown device '%s'\n", word);
                        error();
@@ -249,27 +259,58 @@ void read_conf(void)
                icp->ic_ifno= ifno;
                icp->ic_devtype= type;
                icp->ic_port= port;
+               tcp_conf[tcp_conf_nr].tc_port= ip_conf_nr;
+               udp_conf[udp_conf_nr].uc_port= ip_conf_nr;
+
+               enable= 7;      /* 1 = IP, 2 = TCP, 4 = UDP */
 
                token(0);
                if (word[0] == '{') {
                        token(0);
-                       if (strcmp(word, "default") == 0) {
-                               if (ifdefault != -1) {
-                                       printf(
-                       "inet: ip%d and ip%d can't both be default\n",
-                                               ifdefault, ifno);
-                                       error();
+                       while (word[0] != '}') {
+                               if (strcmp(word, "default") == 0) {
+                                       if (ifdefault != -1) {
+                                               printf(
+                               "inet: ip%d and ip%d can't both be default\n",
+                                                       ifdefault, ifno);
+                                               error();
+                                       }
+                                       ifdefault= ifno;
+                                       token(0);
+                               } else
+                               if (strcmp(word, "no") == 0) {
+                                       token(1);
+                                       if (strcmp(word, "ip") == 0) {
+                                               enable= 0;
+                                       } else
+                                       if (strcmp(word, "tcp") == 0) {
+                                               enable &= ~2;
+                                       } else
+                                       if (strcmp(word, "udp") == 0) {
+                                               enable &= ~4;
+                                       } else {
+                                               printf(
+                                               "inet: Can't do 'no %s'\n",
+                                                       word);
+                                               exit(1);
+                                       }
+                                       token(0);
+                               } else {
+                                       printf("inet: Unknown option '%s'\n",
+                                               word);
+                                       exit(1);
                                }
-                               ifdefault= ifno;
-                               token(0);
+                               if (word[0] == ';') token(0);
+                               else
+                               if (word[0] != '}') error();
                        }
-                       if (word[0] == ';') token(0);
-                       if (word[0] != '}') error();
                        token(0);
                }
                if (word[0] != ';' && word[0] != 0) error();
-               icp++;
-               ip_conf_nr++;
+
+               if (enable & 1) icp++, ip_conf_nr++;
+               if (enable & 2) tcp_conf_nr++;
+               if (enable & 4) udp_conf_nr++;
        }
 
        if (ifdefault == -1) {
@@ -277,6 +318,27 @@ void read_conf(void)
                exit(1);
        }
 
+       /* Translate VLAN network references to port numbers. */
+       for (i= 0; i < eth_conf_nr; i++) {
+               ecp= &eth_conf[i];
+               if (eth_is_vlan(ecp)) {
+                       for (j= 0; j < eth_conf_nr; j++) {
+                               if (eth_conf[j].ec_ifno == ecp->ec_port
+                                       && !eth_is_vlan(&eth_conf[j])
+                               ) {
+                                       ecp->ec_port= j;
+                                       break;
+                               }
+                       }
+                       if (j == eth_conf_nr) {
+                               printf(
+                               "inet: VLAN eth%d can't be built on eth%d\n",
+                                       ecp->ec_ifno, ecp->ec_port);
+                               exit(1);
+                       }
+               }
+       }
+
        /* Set umask 0 so we can creat mode 666 devices. */
        (void) umask(0);
 
@@ -300,5 +362,5 @@ void *alloc(size_t size)
 }
 
 /*
- * $PchId: inet_config.c,v 1.6 1998/10/23 20:15:27 philip Exp $
+ * $PchId: inet_config.c,v 1.10 2003/08/21 09:26:02 philip Exp $
  */
index 3f0ba6e7fb1d87758dbf5b64d67af6eca2ddb4fd..b4d6fce4867f196167292384d19121723350f2d5 100644 (file)
@@ -12,28 +12,26 @@ Copyright 1995 Philip Homburg
 #ifndef INET__INET_CONFIG_H
 #define INET__INET_CONFIG_H
 
-#define ENABLE_ARP     1
-#define ENABLE_IP      1
-#define ENABLE_PSIP    1
-#define ENABLE_TCP     1
-#define ENABLE_UDP     1
-
 /* Inet configuration file. */
 #define PATH_INET_CONF "/etc/inet.conf"
 
-#define IP_PORT_MAX  (1*sizeof(char*)) /* Up to this many network devices */
+#define IP_PORT_MAX    32      /* Up to this many network devices */
 extern int eth_conf_nr;                /* Number of ethernets */
 extern int psip_conf_nr;       /* Number of Pseudo IP networks */
-extern int ip_conf_nr;         /* Number of configured TCP/IP layers */
+extern int ip_conf_nr;         /* Number of configured IP layers */
+extern int tcp_conf_nr;                /* Number of configured TCP layers */
+extern int udp_conf_nr;                /* Number of configured UDP layers */
 
 extern dev_t ip_dev;           /* Device number of /dev/ip */
 
 struct eth_conf
 {
-       char *ec_task;          /* Kernel ethernet task name */
-       u8_t ec_port;           /* Task port */
+       char *ec_task;          /* Kernel ethernet task name if nonnull */
+       u8_t ec_port;           /* Task port (!vlan) or Ethernet port (vlan) */
        u8_t ec_ifno;           /* Interface number of /dev/eth* */
+       u16_t ec_vlan;          /* VLAN number of this net if task == NULL */
 };
+#define eth_is_vlan(ecp)       ((ecp)->ec_task == NULL)
 
 struct psip_conf
 {
@@ -47,6 +45,16 @@ struct ip_conf
        u8_t ic_ifno;           /* Interface number of /dev/ip*, tcp*, udp* */
 };
 
+struct tcp_conf
+{
+       u8_t tc_port;           /* IP port number */
+};
+
+struct udp_conf
+{
+       u8_t uc_port;           /* IP port number */
+};
+
 /* Types of networks. */
 #define NETTYPE_ETH    1
 #define NETTYPE_PSIP   2
@@ -64,12 +72,17 @@ struct ip_conf
 extern struct eth_conf eth_conf[IP_PORT_MAX];
 extern struct psip_conf psip_conf[IP_PORT_MAX];
 extern struct ip_conf ip_conf[IP_PORT_MAX];
+extern struct tcp_conf tcp_conf[IP_PORT_MAX];
+extern struct udp_conf udp_conf[IP_PORT_MAX];
 void read_conf(void);
 extern char *sbrk(int);
 void *alloc(size_t size);
 
+/* Options */
+extern int ip_forward_directed_bcast;
+
 #endif /* INET__INET_CONFIG_H */
 
 /*
- * $PchId: inet_config.h,v 1.6 1998/10/23 20:14:28 philip Exp $
+ * $PchId: inet_config.h,v 1.10 2003/08/21 09:24:33 philip Exp $
  */
diff --git a/servers/inet/minix3/queryparam.c b/servers/inet/minix3/queryparam.c
new file mode 100644 (file)
index 0000000..d8b37c1
--- /dev/null
@@ -0,0 +1,151 @@
+/*     queryparam() - allow program parameters to be queried
+ *                                                     Author: Kees J. Bot
+ *                                                             21 Apr 1994
+ */
+#define nil 0
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <minix3/queryparam.h>
+
+#if EXAMPLE
+struct stat st[2];
+
+struct export_param_list ex_st_list[]= {
+       QP_VARIABLE(st),
+       QP_ARRAY(st),
+       QP_FIELD(st_dev, struct stat),
+       QP_FIELD(st_ino, struct stat),
+       ...
+       QP_END()
+};
+
+struct buf { block_t b_blocknr; ... } *buf;
+size_t nr_bufs;
+
+struct export_param_list ex_buf_list[]=
+       QP_VECTOR(buf, buf, nr_bufs),
+       QP_FIELD(b_blocknr),
+       ...
+       QP_END()
+};
+
+struct export_params ex_st= { ex_st_list, 0 };
+struct export_params ex_buf= { ex_buf_list, 0 };
+#endif
+
+#define between(a, c, z)    ((unsigned) ((c) - (a)) <= (unsigned) ((z) - (a)))
+
+static int isvar(int c)
+{
+       return between('a', c, 'z') || between('A', c, 'Z')
+                               || between('0', c, '9') || c == '_';
+}
+
+static struct export_params *params;
+
+void qp_export(struct export_params *ex_params)
+{
+       /* Add a set of exported parameters. */
+
+       if (ex_params->next == nil) {
+               ex_params->next= params;
+               params= ex_params;
+       }
+}
+
+int queryparam(int qgetc(void), void **poffset, size_t *psize)
+{
+       char *prefix;
+       struct export_params *ep;
+       struct export_param_list *epl;
+       size_t offset= 0;
+       size_t size= -1;
+       size_t n;
+       static size_t retval;
+       int c, firstc;
+
+       firstc= c= (*qgetc)();
+       if (c == '&' || c == '$') c= (*qgetc)();
+       if (!isvar(c)) goto fail;
+
+       if ((ep= params) == nil) goto fail;
+       epl= ep->list;
+
+       while (c != 0 && c != ',') {
+               prefix= "x";
+               n= 0;
+
+               for (;;) {
+                       while (epl->name == nil) {
+                               if ((ep= ep->next) == nil) goto fail;
+                               epl= ep->list;
+                       }
+                       if (strncmp(prefix, epl->name, n) == 0) {
+                               prefix= epl->name;
+                               while (prefix[n] != 0 && c == prefix[n]) {
+                                       n++;
+                                       c= (*qgetc)();
+                               }
+                       }
+                       if (prefix[n] == 0 && (!isvar(c) || prefix[0] == '[')) {
+                               /* Got a match. */
+                               break;
+                       }
+                       epl++;
+               }
+
+               if (prefix[0] == '[') {
+                       /* Array reference. */
+                       size_t idx= 0, cnt= 1, max= size / epl->size;
+
+                       while (between('0', c, '9')) {
+                               idx= idx * 10 + (c - '0');
+                               if (idx > max) goto fail;
+                               c= (*qgetc)();
+                       }
+                       if (c == ':') {
+                               cnt= 0;
+                               while (between('0', (c= (*qgetc)()), '9')) {
+                                       cnt= cnt * 10 + (c - '0');
+                               }
+                       }
+                       if (c != ']') goto fail;
+                       if (idx + cnt > max) cnt= max - idx;
+                       offset+= idx * epl->size;
+                       size= cnt * epl->size;
+                       c= (*qgetc)();
+               } else
+               if (epl->size == -1) {
+                       /* Vector. */
+                       offset= (size_t) * (void **) epl->offset;
+                       size= (* (size_t *) epl[1].offset) * epl[1].size;
+               } else {
+                       /* Variable or struct field. */
+                       offset+= (size_t) epl->offset;
+                       if ((size_t) epl->offset > size) goto fail;
+                       size-= (size_t) epl->offset;
+                       if (size < epl->size) goto fail;
+                       size= epl->size;
+               }
+       }
+       if (firstc == '&' || firstc == '$') {
+               retval= firstc == '&' ? offset : size;
+               offset= (size_t) &retval;
+               size= sizeof(retval);
+       }
+       if (c != 0 && c != ',') goto fail;
+       *poffset= (void *) offset;
+       *psize= size;
+       return c != 0;
+fail:
+       while (c != 0 && c != ',') c= (*qgetc)();
+       *poffset= nil;
+       *psize= 0;
+       return c != 0;
+}
+
+/*
+ * $PchId: queryparam.c,v 1.1 2005/06/28 14:30:56 philip Exp $
+ */
diff --git a/servers/inet/minix3/queryparam.h b/servers/inet/minix3/queryparam.h
new file mode 100644 (file)
index 0000000..7415fab
--- /dev/null
@@ -0,0 +1,45 @@
+/*     queryparam.h - query program parameters         Author: Kees J. Bot
+ *                                                             22 Apr 1994
+ */
+#ifndef _MINIX__QUERYPARAM_H
+#define _MINIX__QUERYPARAM_H
+
+#include <ansi.h>
+
+typedef size_t _mnx_size_t;
+
+struct export_param_list {
+       char    *name;          /* "variable", "[", ".field", or NULL. */
+       void    *offset;        /* Address of a variable or field offset. */
+       size_t  size;           /* Size of the resulting object. */
+};
+
+struct export_params {
+       struct export_param_list *list; /* List of exported parameters. */
+       struct export_params     *next; /* Link several sets of parameters. */
+};
+
+#ifdef __STDC__
+#define qp_stringize(var)      #var
+#define qp_dotstringize(var)   "." #var
+#else
+#define qp_stringize(var)      "var"
+#define qp_dotstringize(var)   ".var"
+#endif
+#define QP_VARIABLE(var)       { qp_stringize(var), &(var), sizeof(var) }
+#define QP_ARRAY(var)          { "[", 0, sizeof((var)[0]) }
+#define QP_VECTOR(var,ptr,len) { qp_stringize(var), &(ptr), -1 },\
+                               { "[", &(len), sizeof(*(ptr)) }
+#define QP_FIELD(field, type)  { qp_dotstringize(field), \
+                                       (void *)offsetof(type, field), \
+                                       sizeof(((type *)0)->field) }
+#define QP_END()               { 0, 0, 0 }
+
+void qp_export _ARGS((struct export_params *_ex_params));
+int queryparam _ARGS((int (*_qgetc) _ARGS((void)), void **_paddress,
+                                                       _mnx_size_t *_psize));
+_mnx_size_t paramvalue _ARGS((char **_value, void *_address,
+                                                       _mnx_size_t _size));
+#endif /* _MINIX__QUERYPARAM_H */
+
+/* $PchId: queryparam.h,v 1.1 2005/06/28 14:31:26 philip Exp $ */
index f8bd24a430bfe21b54fa3dfc545dacc850f6e74a..bdc7d54c728d03290bc9316a79d44b4467732055 100644 (file)
@@ -18,12 +18,10 @@ Copyright 1995 Philip Homburg
 #include "generic/eth_int.h"
 #include "generic/sr.h"
 
-#include <minix/syslib.h>
-#define _MINIX
-#include <unistd.h>
-
 THIS_FILE
 
+static int recv_debug= 0;
+
 FORWARD _PROTOTYPE( void setup_read, (eth_port_t *eth_port) );
 FORWARD _PROTOTYPE( void read_int, (eth_port_t *eth_port, int count) );
 FORWARD _PROTOTYPE( void write_int, (eth_port_t *eth_port) );
@@ -33,26 +31,30 @@ FORWARD _PROTOTYPE( eth_port_t *find_port, (message *m) );
 
 PUBLIC void osdep_eth_init()
 {
-       int i, r, tasknr;
+       int i, r, tasknr, rport;
        struct eth_conf *ecp;
-       eth_port_t *eth_port;
-       message mess, repl_mess;
+       eth_port_t *eth_port, *rep;
+       message mess;
 
-       for (i= 0, eth_port= eth_port_table, ecp= eth_conf;
-               i<eth_conf_nr; i++, eth_port++, ecp++)
+       /* First initialize normal ethernet interfaces */
+       for (i= 0, ecp= eth_conf, eth_port= eth_port_table;
+               i<eth_conf_nr; i++, ecp++, eth_port++)
        {
-#if DEAD_CODE
-               r = sys_getprocnr(&tasknr, ecp->ec_task, strlen(ecp->ec_task));
-#endif
+               if (eth_is_vlan(ecp))
+                       continue;
+#ifdef __minix_vmd
+               r= sys_findproc(ecp->ec_task, &tasknr, 0);
+#else /* Minix 3 */
                r = findproc(ecp->ec_task, &tasknr);
+#endif 
                if (r != OK)
                {
-                       ip_panic(( "unable to find task %s: %d\n",
-                               ecp->ec_task, r ));
+                       printf("eth%d: unable to find task %s: %d\n",
+                               i, ecp->ec_task, r);
+                       continue;
                }
 
-
-               eth_port->etp_osdep.etp_port= ecp->ec_port;
+               eth_port->etp_osdep.etp_port= ecp->ec_port;
                eth_port->etp_osdep.etp_task= tasknr;
                ev_init(&eth_port->etp_osdep.etp_recvev);
 
@@ -64,11 +66,9 @@ PUBLIC void osdep_eth_init()
                r= send(eth_port->etp_osdep.etp_task, &mess);
                if (r<0)
                {
-#if !CRAMPED
                        printf(
                "osdep_eth_init: unable to send to ethernet task, error= %d\n",
                                r);
-#endif
                        continue;
                }
 
@@ -77,29 +77,79 @@ PUBLIC void osdep_eth_init()
 
                if (mess.m3_i1 == ENXIO)
                {
-#if !CRAMPED
                        printf(
                "osdep_eth_init: no ethernet device at task=%d,port=%d\n",
-                               eth_port->etp_osdep.etp_task,
+                               eth_port->etp_osdep.etp_task, 
                                eth_port->etp_osdep.etp_port);
-#endif
                        continue;
                }
-               if (mess.m3_i1 != eth_port->etp_osdep.etp_port)
-                       ip_panic(("osdep_eth_init: DL_INIT error or wrong port: %d\n",
+               if (mess.m3_i1 < 0)
+                       ip_panic(("osdep_eth_init: DL_INIT returned error %d\n",
                                mess.m3_i1));
+                       
+               if (mess.m3_i1 != eth_port->etp_osdep.etp_port)
+               {
+                       ip_panic((
+       "osdep_eth_init: got reply for wrong port (got %d, expected %d)\n",
+                               mess.m3_i1, eth_port->etp_osdep.etp_port));
+               }
 
                eth_port->etp_ethaddr= *(ether_addr_t *)mess.m3_ca1;
 
                sr_add_minor(if2minor(ecp->ec_ifno, ETH_DEV_OFF),
                        i, eth_open, eth_close, eth_read, 
-                       eth_write, eth_ioctl, eth_cancel);
+                       eth_write, eth_ioctl, eth_cancel, eth_select);
 
                eth_port->etp_flags |= EPF_ENABLED;
+               eth_port->etp_vlan= 0;
+               eth_port->etp_vlan_port= NULL;
                eth_port->etp_wr_pack= 0;
                eth_port->etp_rd_pack= 0;
                setup_read (eth_port);
-               eth_port++;
+       }
+
+       /* And now come the VLANs */
+       for (i= 0, ecp= eth_conf, eth_port= eth_port_table;
+               i<eth_conf_nr; i++, ecp++, eth_port++)
+       {
+               if (!eth_is_vlan(ecp))
+                       continue;
+
+               eth_port->etp_osdep.etp_port= ecp->ec_port;
+               eth_port->etp_osdep.etp_task= ANY;
+               ev_init(&eth_port->etp_osdep.etp_recvev);
+
+               rport= eth_port->etp_osdep.etp_port;
+               assert(rport >= 0 && rport < eth_conf_nr);
+               rep= &eth_port_table[rport];
+               if (!rep->etp_flags & EPF_ENABLED)
+               {
+                       printf(
+                       "eth%d: underlying ethernet device %d not enabled",
+                               i, rport);
+                       continue;
+               }
+               if (rep->etp_vlan != 0)
+               {
+                       printf(
+                       "eth%d: underlying ethernet device %d is a VLAN",
+                               i, rport);
+                       continue;
+               }
+               
+               eth_port->etp_ethaddr= rep->etp_ethaddr;
+
+               sr_add_minor(if2minor(ecp->ec_ifno, ETH_DEV_OFF),
+                       i, eth_open, eth_close, eth_read, 
+                       eth_write, eth_ioctl, eth_cancel, eth_select);
+
+               eth_port->etp_flags |= EPF_ENABLED;
+               eth_port->etp_vlan= ecp->ec_vlan;
+               eth_port->etp_vlan_port= rep;
+               assert(eth_port->etp_vlan != 0);
+               eth_port->etp_wr_pack= 0;
+               eth_port->etp_rd_pack= 0;
+               eth_reg_vlan(rep, eth_port);
        }
 }
 
@@ -116,6 +166,9 @@ acc_t *pack;
        int multicast, r;
        ev_arg_t ev_arg;
 
+       assert(!no_ethWritePort);
+       assert(!eth_port->etp_vlan);
+
        assert(eth_port->etp_wr_pack == NULL);
        eth_port->etp_wr_pack= pack;
 
@@ -173,7 +226,8 @@ acc_t *pack;
                        ip_panic(("unable to receive"));
 
                loc_port= eth_port;
-               if (loc_port->etp_osdep.etp_port != block_msg.DL_PORT)
+               if (loc_port->etp_osdep.etp_port != block_msg.DL_PORT ||
+                       loc_port->etp_osdep.etp_task != block_msg.m_source)
                {
                        loc_port= find_port(&block_msg);
                }
@@ -187,6 +241,12 @@ acc_t *pack;
                }
                if (block_msg.DL_STAT & DL_PACK_RECV)
                {
+                       if (recv_debug)
+                       {
+                               printf(
+                       "eth_write_port(block_msg): eth%d got DL_PACK_RECV\n",
+                                       loc_port-eth_port_table);
+                       }
                        loc_port->etp_osdep.etp_recvrepl= block_msg;
                        ev_arg.ev_ptr= loc_port;
                        ev_enqueue(&loc_port->etp_osdep.etp_recvev,
@@ -202,12 +262,18 @@ acc_t *pack;
                ip_panic(("unable to receive"));
 
        assert(mess1.m_type == DL_TASK_REPLY &&
-               mess1.DL_PORT == mess1.DL_PORT &&
+               mess1.DL_PORT == eth_port->etp_osdep.etp_port &&
                mess1.DL_PROC == this_proc);
        assert((mess1.DL_STAT >> 16) == OK);
 
        if (mess1.DL_STAT & DL_PACK_RECV)
        {
+               if (recv_debug)
+               {
+                       printf(
+                       "eth_write_port(mess1): eth%d got DL_PACK_RECV\n",
+                               mess1.DL_PORT);
+               }
                eth_port->etp_osdep.etp_recvrepl= mess1;
                ev_arg.ev_ptr= eth_port;
                ev_enqueue(&eth_port->etp_osdep.etp_recvev, eth_recvev,
@@ -220,7 +286,7 @@ acc_t *pack;
        }
 
        /* If the port is in promiscuous mode or the packet is
-        * broadcasted/multicasted, enqueue the reply packet.
+        * broad- or multicast, enqueue the reply packet.
         */
        eth_dst_ptr= (u8_t *)ptr2acc_data(pack);
        multicast= (*eth_dst_ptr & 1);  /* low order bit indicates multicast */
@@ -268,18 +334,25 @@ message *m;
        if (stat & DL_PACK_SEND)
                write_int(loc_port);
        if (stat & DL_PACK_RECV)
+       {
+               if (recv_debug)
+               {
+                       printf("eth_rec: eth%d got DL_PACK_RECV\n",
+                               m->DL_PORT);
+               }
                read_int(loc_port, m->DL_COUNT);
+       }
 }
 
-#ifndef notdef
 PUBLIC int eth_get_stat(eth_port, eth_stat)
 eth_port_t *eth_port;
 eth_stat_t *eth_stat;
 {
-       acc_t *acc;
        int result;
        message mess, mlocked;
 
+       assert(!eth_port->etp_vlan);
+
        mess.m_type= DL_GETSTAT;
        mess.DL_PORT= eth_port->etp_osdep.etp_port;
        mess.DL_PROC= this_proc;
@@ -314,9 +387,7 @@ assert (result == 0);
        }
        return OK;
 }
-#endif
 
-#ifndef notdef
 PUBLIC void eth_set_rec_conf (eth_port, flags)
 eth_port_t *eth_port;
 u32_t flags;
@@ -325,6 +396,8 @@ u32_t flags;
        unsigned dl_flags;
        message mess, repl_mess;
 
+       assert(!eth_port->etp_vlan);
+
        dl_flags= DL_NOMODE;
        if (flags & NWEO_EN_BROAD)
                dl_flags |= DL_BROAD_REQ;
@@ -341,10 +414,10 @@ u32_t flags;
        do
        {
                result= send (eth_port->etp_osdep.etp_task, &mess);
-               if (result == ELOCKED)
-               /* Ethernet task is sending to this task, I hope */
+               if (result == ELOCKED)  /* etp_task is sending to this task,
+                                          I hope */
                {
-                       if (receive (eth_port->etp_osdep.etp_task,
+                       if (receive (eth_port->etp_osdep.etp_task, 
                                &repl_mess)< 0)
                        {
                                ip_panic(("unable to receive"));
@@ -368,7 +441,6 @@ u32_t flags;
        }
        eth_port->etp_osdep.etp_recvconf= flags;
 }
-#endif
 
 PRIVATE void write_int(eth_port)
 eth_port_t *eth_port;
@@ -383,7 +455,13 @@ eth_port_t *eth_port;
        eth_dst_ptr= (u8_t *)ptr2acc_data(pack);
        multicast= (*eth_dst_ptr & 1);  /* low order bit indicates multicast */
        if (multicast || (eth_port->etp_osdep.etp_recvconf & NWEO_EN_PROMISC))
+       {
+               assert(!no_ethWritePort);
+               no_ethWritePort= 1;
                eth_arrive(eth_port, pack, bf_bufsize(pack));
+               assert(no_ethWritePort);
+               no_ethWritePort= 0;
+       }
        else
                bf_afree(pack);
 
@@ -402,7 +480,11 @@ int count;
        cut_pack= bf_cut(pack, 0, count);
        bf_afree(pack);
 
+       assert(!no_ethWritePort);
+       no_ethWritePort= 1;
        eth_arrive(eth_port, cut_pack, count);
+       assert(no_ethWritePort);
+       no_ethWritePort= 0;
        
        eth_port->etp_flags &= ~(EPF_READ_IP|EPF_READ_SP);
        setup_read(eth_port);
@@ -418,6 +500,7 @@ eth_port_t *eth_port;
        ev_arg_t ev_arg;
        int i, r;
 
+       assert(!eth_port->etp_vlan);
        assert(!(eth_port->etp_flags & (EPF_READ_IP|EPF_READ_SP)));
 
        do
@@ -425,7 +508,7 @@ eth_port_t *eth_port;
                assert (!eth_port->etp_rd_pack);
 
                iovec= eth_port->etp_osdep.etp_rd_iovec;
-               pack= bf_memreq (ETH_MAX_PACK_SIZE);
+               pack= bf_memreq (ETH_MAX_PACK_SIZE_TAGGED);
 
                for (i=0, pack_ptr= pack; i<RD_IOVEC && pack_ptr;
                        i++, pack_ptr= pack_ptr->acc_next)
@@ -443,6 +526,11 @@ eth_port_t *eth_port;
 
                for (;;)
                {
+                       if (recv_debug)
+                       {
+                               printf("eth%d: sending DL_READV\n",
+                                       mess1.DL_PORT);
+                       }
                        r= send (eth_port->etp_osdep.etp_task, &mess1);
                        if (r != ELOCKED)
                                break;
@@ -453,7 +541,9 @@ eth_port_t *eth_port;
                                ip_panic(("unable to receive"));
 
                        loc_port= eth_port;
-                       if (loc_port->etp_osdep.etp_port != block_msg.DL_PORT)
+                       if (loc_port->etp_osdep.etp_port != block_msg.DL_PORT ||
+                               loc_port->etp_osdep.etp_task !=
+                               block_msg.m_source)
                        {
                                loc_port= find_port(&block_msg);
                        }
@@ -468,6 +558,12 @@ eth_port_t *eth_port;
                        }
                        if (block_msg.DL_STAT & DL_PACK_RECV)
                        {
+                               if (recv_debug)
+                               {
+                                       printf(
+                       "setup_read(block_msg): eth%d got DL_PACK_RECV\n",
+                                               block_msg.DL_PORT);
+                               }
                                assert(loc_port != eth_port);
                                loc_port->etp_osdep.etp_recvrepl= block_msg;
                                ev_arg.ev_ptr= loc_port;
@@ -490,11 +586,21 @@ eth_port_t *eth_port;
 
                if (mess1.DL_STAT & DL_PACK_RECV)
                {
+                       if (recv_debug)
+                       {
+                               printf(
+                       "setup_read(mess1): eth%d: got DL_PACK_RECV\n",
+                                       mess1.DL_PORT);
+                       }
                        /* packet received */
                        pack_ptr= bf_cut(pack, 0, mess1.DL_COUNT);
                        bf_afree(pack);
 
+                       assert(!no_ethWritePort);
+                       no_ethWritePort= 1;
                        eth_arrive(eth_port, pack_ptr, mess1.DL_COUNT);
+                       assert(no_ethWritePort);
+                       no_ethWritePort= 0;
                }
                else
                {
@@ -525,11 +631,17 @@ ev_arg_t ev_arg;
        m_ptr= &eth_port->etp_osdep.etp_recvrepl;
 
        assert(m_ptr->m_type == DL_TASK_REPLY);
-       assert(eth_port->etp_osdep.etp_port == m_ptr->DL_PORT);
+       assert(eth_port->etp_osdep.etp_port == m_ptr->DL_PORT &&
+               eth_port->etp_osdep.etp_task == m_ptr->m_source);
 
        assert(m_ptr->DL_STAT & DL_PACK_RECV);
        m_ptr->DL_STAT &= ~DL_PACK_RECV;
 
+       if (recv_debug)
+       {
+               printf("eth_recvev: eth%d got DL_PACK_RECV\n", m_ptr->DL_PORT);
+       }
+
        read_int(eth_port, m_ptr->DL_COUNT);
 }
 
@@ -545,7 +657,8 @@ ev_arg_t ev_arg;
        m_ptr= &eth_port->etp_osdep.etp_sendrepl;
 
        assert (m_ptr->m_type == DL_TASK_REPLY);
-       assert(eth_port->etp_osdep.etp_port == m_ptr->DL_PORT);
+       assert(eth_port->etp_osdep.etp_port == m_ptr->DL_PORT &&
+               eth_port->etp_osdep.etp_task == m_ptr->m_source);
 
        assert(m_ptr->DL_STAT & DL_PACK_SEND);
        m_ptr->DL_STAT &= ~DL_PACK_SEND;
@@ -562,7 +675,8 @@ message *m;
 
        for (i=0, loc_port= eth_port_table; i<eth_conf_nr; i++, loc_port++)
        {
-               if (loc_port->etp_osdep.etp_port == m->DL_PORT)
+               if (loc_port->etp_osdep.etp_port == m->DL_PORT &&
+                       loc_port->etp_osdep.etp_task == m->m_source)
                        break;
        }
        assert (i<eth_conf_nr);
@@ -570,5 +684,5 @@ message *m;
 }
 
 /*
- * $PchId: mnx_eth.c,v 1.8 1995/11/21 06:41:57 philip Exp $
+ * $PchId: mnx_eth.c,v 1.16 2005/06/28 14:24:37 philip Exp $
  */
index 836edf36488dbaf72acd66eb91eba44a67d30ef4..863156402884ba895fc4e94b82c2d0f2b7b613bb 100644 (file)
@@ -54,5 +54,5 @@ mq_t *mq;
 }
 
 /*
- * $PchId: mq.c,v 1.6 1996/05/07 21:10:16 philip Exp $
+ * $PchId: mq.c,v 1.7 1998/10/23 20:10:47 philip Exp $
  */
index 11caca691ca77c51412d95da8c307aa7bb0c9a13..f6742f85f691ab5382ec0d485436957f742882bd 100644 (file)
@@ -29,5 +29,5 @@ typedef struct osdep_eth_port
 #endif /* INET__OSDEP_ETH_H */
 
 /*
- * $PchId: osdep_eth.h,v 1.5 1995/11/21 06:41:28 philip Exp $
+ * $PchId: osdep_eth.h,v 1.6 2001/04/20 06:39:54 philip Exp $
  */
diff --git a/servers/inet/qp.c b/servers/inet/qp.c
new file mode 100644 (file)
index 0000000..7144022
--- /dev/null
@@ -0,0 +1,174 @@
+/*
+inet/qp.c
+
+Query parameters
+
+Created:       June 1995 by Philip Homburg <philip@f-mnx.phicoh.com>
+*/
+
+#include "inet.h"
+
+#include <sys/svrctl.h>
+#ifdef __minix_vmd
+#include <minix/queryparam.h>
+#else /* Minix 3 */
+#include <minix3/queryparam.h>
+#endif
+
+#include "generic/buf.h"
+#include "generic/clock.h"
+#include "generic/event.h"
+#include "generic/type.h"
+#include "generic/sr.h"
+
+#include "generic/tcp_int.h"
+#include "generic/udp_int.h"
+#include "mq.h"
+#include "qp.h"
+#include "sr_int.h"
+
+FORWARD int get_userdata ARGS(( int proc, vir_bytes vaddr, vir_bytes vlen,
+       void *buffer ));
+FORWARD int put_userdata ARGS(( int proc, vir_bytes vaddr, vir_bytes vlen,
+       void *buffer ));
+FORWARD int iqp_getc ARGS(( void ));
+FORWARD void iqp_putc ARGS(( int c ));
+
+PRIVATE struct export_param_list inet_ex_list[]=
+{
+       QP_VARIABLE(sr_fd_table),
+       QP_VARIABLE(ip_dev),
+       QP_VARIABLE(tcp_fd_table),
+       QP_VARIABLE(tcp_conn_table),
+       QP_VARIABLE(tcp_cancel_f),
+       QP_VECTOR(udp_port_table, udp_port_table, ip_conf_nr),
+       QP_VARIABLE(udp_fd_table),
+       QP_END()
+};
+
+PRIVATE struct export_params inet_ex_params= { inet_ex_list, NULL };
+
+PRIVATE struct queryvars {
+       int proc;
+       struct svrqueryparam qpar;
+       char parbuf[256], valbuf[256];
+       char *param, *value;
+       int r;
+} *qvars;
+
+PUBLIC void qp_init()
+{
+       qp_export(&inet_ex_params);
+}
+
+PUBLIC int qp_query(proc, argp)
+int proc;
+vir_bytes argp;
+{
+       /* Return values, sizes, or addresses of variables in MM space. */
+
+       struct queryvars qv;
+       void *addr;
+       size_t n, size;
+       int byte;
+       int more;
+       static char hex[]= "0123456789ABCDEF";
+
+       qv.r= get_userdata(proc, argp, sizeof(qv.qpar), &qv.qpar);
+
+       /* Export these to mq_getc() and mq_putc(). */
+       qvars= &qv;
+       qv.proc= proc;
+       qv.param= qv.parbuf + sizeof(qv.parbuf);
+       qv.value= qv.valbuf;
+
+       do {
+               more= queryparam(iqp_getc, &addr, &size);
+               for (n= 0; n < size; n++) {
+                       byte= ((u8_t *) addr)[n];
+                       iqp_putc(hex[byte >> 4]);
+                       iqp_putc(hex[byte & 0x0F]);
+               }
+               iqp_putc(more ? ',' : 0);
+       } while (more);
+       return qv.r;
+}
+
+
+PRIVATE int iqp_getc()
+{
+       /* Return one character of the names to search for. */
+       struct queryvars *qv= qvars;
+       size_t n;
+
+       if (qv->r != OK || qv->qpar.psize == 0) return 0;
+       if (qv->param == qv->parbuf + sizeof(qv->parbuf)) {
+               /* Need to fill the parameter buffer. */
+               n= sizeof(qv->parbuf);
+               if (qv->qpar.psize < n) n= qv->qpar.psize;
+               qv->r= get_userdata(qv->proc, (vir_bytes) qv->qpar.param, n,
+                                                               qv->parbuf);
+               if (qv->r != OK) return 0;
+               qv->qpar.param+= n;
+               qv->param= qv->parbuf;
+       }
+       qv->qpar.psize--;
+       return (u8_t) *qv->param++;
+}
+
+
+PRIVATE void iqp_putc(c)
+int c;
+{
+       /* Send one character back to the user. */
+       struct queryvars *qv= qvars;
+       size_t n;
+
+       if (qv->r != OK || qv->qpar.vsize == 0) return;
+       *qv->value++= c;
+       qv->qpar.vsize--;
+       if (qv->value == qv->valbuf + sizeof(qv->valbuf)
+                                       || c == 0 || qv->qpar.vsize == 0) {
+               /* Copy the value buffer to user space. */
+               n= qv->value - qv->valbuf;
+               qv->r= put_userdata(qv->proc, (vir_bytes) qv->qpar.value, n,
+                                                               qv->valbuf);
+               qv->qpar.value+= n;
+               qv->value= qv->valbuf;
+       }
+}
+
+PRIVATE int get_userdata(proc, vaddr, vlen, buffer)
+int proc;
+vir_bytes vaddr;
+vir_bytes vlen;
+void *buffer;
+{
+#ifdef __minix_vmd
+       return sys_copy(proc, SEG_D, (phys_bytes)vaddr, this_proc, SEG_D,
+               (phys_bytes)buffer, (phys_bytes)vlen);
+#else /* Minix 3 */
+       return sys_vircopy(proc, D, vaddr, SELF, D, (vir_bytes)buffer, vlen);
+#endif
+}
+
+
+PRIVATE int put_userdata(proc, vaddr, vlen, buffer)
+int proc;
+vir_bytes vaddr;
+vir_bytes vlen;
+void *buffer;
+{
+#ifdef __minix_vmd
+       return sys_copy(this_proc, SEG_D, (phys_bytes)buffer,
+               proc, SEG_D, (phys_bytes)vaddr, (phys_bytes)vlen);
+#else /* Minix 3 */
+       return sys_vircopy(SELF, D, (vir_bytes)buffer, proc, D, vaddr, vlen);
+#endif
+}
+
+
+
+/*
+ * $PchId: qp.c,v 1.7 2005/06/28 14:25:25 philip Exp $
+ */
diff --git a/servers/inet/qp.h b/servers/inet/qp.h
new file mode 100644 (file)
index 0000000..f4e6716
--- /dev/null
@@ -0,0 +1,21 @@
+/*
+inet/qp.h
+
+Handle queryparams requests
+
+Created:       June 1995 by Philip Homburg <philip@f-mnx.phicoh.com>
+
+Copyright 1995 Philip Homburg
+*/
+
+#ifndef INET__QP_H
+#define INET__QP_H
+
+void qp_init ARGS(( void ));
+int qp_query ARGS(( int proc, vir_bytes argp ));
+
+#endif /* INET__QP_H */
+
+/*
+ * $PchId: qp.h,v 1.4 2005/01/29 18:08:06 philip Exp $
+ */
diff --git a/servers/inet/sha2.c b/servers/inet/sha2.c
new file mode 100644 (file)
index 0000000..1e14d9f
--- /dev/null
@@ -0,0 +1,1095 @@
+/*     $FreeBSD: src/sys/crypto/sha2/sha2.c,v 1.2.2.2 2002/03/05 08:36:47 ume Exp $    */
+/*     $KAME: sha2.c,v 1.8 2001/11/08 01:07:52 itojun Exp $    */
+
+/*
+ * sha2.c
+ *
+ * Version 1.0.0beta1
+ *
+ * Written by Aaron D. Gifford <me@aarongifford.com>
+ *
+ * Copyright 2000 Aaron D. Gifford.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the copyright holder nor the names of contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) AND CONTRIBUTOR(S) ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR(S) OR CONTRIBUTOR(S) BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+
+#include <sys/types.h>
+/* #include <sys/time.h> */
+/* #include <sys/systm.h> */
+/* #include <machine/endian.h> */
+#include "sha2.h"
+
+/*
+ * ASSERT NOTE:
+ * Some sanity checking code is included using assert().  On my FreeBSD
+ * system, this additional code can be removed by compiling with NDEBUG
+ * defined.  Check your own systems manpage on assert() to see how to
+ * compile WITHOUT the sanity checking code on your system.
+ *
+ * UNROLLED TRANSFORM LOOP NOTE:
+ * You can define SHA2_UNROLL_TRANSFORM to use the unrolled transform
+ * loop version for the hash transform rounds (defined using macros
+ * later in this file).  Either define on the command line, for example:
+ *
+ *   cc -DSHA2_UNROLL_TRANSFORM -o sha2 sha2.c sha2prog.c
+ *
+ * or define below:
+ *
+ *   #define SHA2_UNROLL_TRANSFORM
+ *
+ */
+
+#if defined(__bsdi__) || defined(__FreeBSD__)
+#define assert(x)
+#endif
+
+
+/*** SHA-256/384/512 Machine Architecture Definitions *****************/
+/*
+ * SHA2_BYTE_ORDER NOTE:
+ *
+ * Please make sure that your system defines SHA2_BYTE_ORDER.  If your
+ * architecture is little-endian, make sure it also defines
+ * SHA2_LITTLE_ENDIAN and that the two (SHA2_BYTE_ORDER and SHA2_LITTLE_ENDIAN) are
+ * equivilent.
+ *
+ * If your system does not define the above, then you can do so by
+ * hand like this:
+ *
+ *   #define SHA2_LITTLE_ENDIAN 1234
+ *   #define SHA2_BIG_ENDIAN    4321
+ *
+ * And for little-endian machines, add:
+ *
+ *   #define SHA2_BYTE_ORDER SHA2_LITTLE_ENDIAN 
+ *
+ * Or for big-endian machines:
+ *
+ *   #define SHA2_BYTE_ORDER SHA2_BIG_ENDIAN
+ *
+ * The FreeBSD machine this was written on defines BYTE_ORDER
+ * appropriately by including <sys/types.h> (which in turn includes
+ * <machine/endian.h> where the appropriate definitions are actually
+ * made).
+ */
+#if !defined(SHA2_BYTE_ORDER) || (SHA2_BYTE_ORDER != SHA2_LITTLE_ENDIAN && SHA2_BYTE_ORDER != SHA2_BIG_ENDIAN)
+#error Define SHA2_BYTE_ORDER to be equal to either SHA2_LITTLE_ENDIAN or SHA2_BIG_ENDIAN
+#endif
+
+/*
+ * Define the followingsha2_* types to types of the correct length on
+ * the native archtecture.   Most BSD systems and Linux define u_intXX_t
+ * types.  Machines with very recent ANSI C headers, can use the
+ * uintXX_t definintions from inttypes.h by defining SHA2_USE_INTTYPES_H
+ * during compile or in the sha.h header file.
+ *
+ * Machines that support neither u_intXX_t nor inttypes.h's uintXX_t
+ * will need to define these three typedefs below (and the appropriate
+ * ones in sha.h too) by hand according to their system architecture.
+ *
+ * Thank you, Jun-ichiro itojun Hagino, for suggesting using u_intXX_t
+ * types and pointing out recent ANSI C support for uintXX_t in inttypes.h.
+ */
+#if 0 /*def SHA2_USE_INTTYPES_H*/
+
+typedef uint8_t  sha2_byte;    /* Exactly 1 byte */
+typedef uint32_t sha2_word32;  /* Exactly 4 bytes */
+typedef uint64_t sha2_word64;  /* Exactly 8 bytes */
+
+#else /* SHA2_USE_INTTYPES_H */
+
+typedef u_int8_t  sha2_byte;   /* Exactly 1 byte */
+typedef u_int32_t sha2_word32; /* Exactly 4 bytes */
+typedef u_int64_t sha2_word64; /* Exactly 8 bytes */
+
+#endif /* SHA2_USE_INTTYPES_H */
+
+
+/*** SHA-256/384/512 Various Length Definitions ***********************/
+/* NOTE: Most of these are in sha2.h */
+#define SHA256_SHORT_BLOCK_LENGTH      (SHA256_BLOCK_LENGTH - 8)
+#define SHA384_SHORT_BLOCK_LENGTH      (SHA384_BLOCK_LENGTH - 16)
+#define SHA512_SHORT_BLOCK_LENGTH      (SHA512_BLOCK_LENGTH - 16)
+
+
+/*** ENDIAN REVERSAL MACROS *******************************************/
+#if SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN
+#define REVERSE32(w,x) { \
+       sha2_word32 tmp = (w); \
+       tmp = (tmp >> 16) | (tmp << 16); \
+       (x) = ((tmp & 0xff00ff00UL) >> 8) | ((tmp & 0x00ff00ffUL) << 8); \
+}
+#define REVERSE64(w,x) { \
+       sha2_word64 tmp = (w); \
+       tmp = (tmp >> 32) | (tmp << 32); \
+       tmp = ((tmp & 0xff00ff00ff00ff00ULL) >> 8) | \
+             ((tmp & 0x00ff00ff00ff00ffULL) << 8); \
+       (x) = ((tmp & 0xffff0000ffff0000ULL) >> 16) | \
+             ((tmp & 0x0000ffff0000ffffULL) << 16); \
+}
+#if MINIX_64BIT
+#undef REVERSE64
+#define REVERSE64(w,x) { \
+       u32_t hi, lo; \
+       REVERSE32(ex64hi((w)), lo); \
+       REVERSE32(ex64lo((w)), hi); \
+       (x) = make64(lo, hi); \
+}
+#endif /* MINIX_64BIT */
+#endif /* SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN */
+
+/*
+ * Macro for incrementally adding the unsigned 64-bit integer n to the
+ * unsigned 128-bit integer (represented using a two-element array of
+ * 64-bit words):
+ */
+#define ADDINC128(w,n) { \
+       (w)[0] += (sha2_word64)(n); \
+       if ((w)[0] < (n)) { \
+               (w)[1]++; \
+       } \
+}
+
+/*** THE SIX LOGICAL FUNCTIONS ****************************************/
+/*
+ * Bit shifting and rotation (used by the six SHA-XYZ logical functions:
+ *
+ *   NOTE:  The naming of R and S appears backwards here (R is a SHIFT and
+ *   S is a ROTATION) because the SHA-256/384/512 description document
+ *   (see http://csrc.nist.gov/cryptval/shs/sha256-384-512.pdf) uses this
+ *   same "backwards" definition.
+ */
+/* Shift-right (used in SHA-256, SHA-384, and SHA-512): */
+#define R(b,x)                 ((x) >> (b))
+/* 32-bit Rotate-right (used in SHA-256): */
+#define S32(b,x)       (((x) >> (b)) | ((x) << (32 - (b))))
+/* 64-bit Rotate-right (used in SHA-384 and SHA-512): */
+#define S64(b,x)       (((x) >> (b)) | ((x) << (64 - (b))))
+
+/* Two of six logical functions used in SHA-256, SHA-384, and SHA-512: */
+#define Ch(x,y,z)      (((x) & (y)) ^ ((~(x)) & (z)))
+#define Maj(x,y,z)     (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+
+/* Four of six logical functions used in SHA-256: */
+#define Sigma0_256(x)  (S32(2,  (x)) ^ S32(13, (x)) ^ S32(22, (x)))
+#define Sigma1_256(x)  (S32(6,  (x)) ^ S32(11, (x)) ^ S32(25, (x)))
+#define sigma0_256(x)  (S32(7,  (x)) ^ S32(18, (x)) ^ R(3 ,   (x)))
+#define sigma1_256(x)  (S32(17, (x)) ^ S32(19, (x)) ^ R(10,   (x)))
+
+/* Four of six logical functions used in SHA-384 and SHA-512: */
+#define Sigma0_512(x)  (S64(28, (x)) ^ S64(34, (x)) ^ S64(39, (x)))
+#define Sigma1_512(x)  (S64(14, (x)) ^ S64(18, (x)) ^ S64(41, (x)))
+#define sigma0_512(x)  (S64( 1, (x)) ^ S64( 8, (x)) ^ R( 7,   (x)))
+#define sigma1_512(x)  (S64(19, (x)) ^ S64(61, (x)) ^ R( 6,   (x)))
+
+/*** INTERNAL FUNCTION PROTOTYPES *************************************/
+/* NOTE: These should not be accessed directly from outside this
+ * library -- they are intended for private internal visibility/use
+ * only.
+ */
+void SHA512_Last(SHA512_CTX*);
+void SHA256_Transform(SHA256_CTX*, const sha2_word32*);
+void SHA512_Transform(SHA512_CTX*, const sha2_word64*);
+
+
+/*** SHA-XYZ INITIAL HASH VALUES AND CONSTANTS ************************/
+/* Hash constant words K for SHA-256: */
+const static sha2_word32 K256[64] = {
+       0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL,
+       0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL,
+       0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL,
+       0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL,
+       0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL,
+       0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL,
+       0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL,
+       0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL,
+       0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL,
+       0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
+       0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL,
+       0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL,
+       0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL,
+       0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL,
+       0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL,
+       0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL
+};
+
+/* Initial hash value H for SHA-256: */
+const static sha2_word32 sha256_initial_hash_value[8] = {
+       0x6a09e667UL,
+       0xbb67ae85UL,
+       0x3c6ef372UL,
+       0xa54ff53aUL,
+       0x510e527fUL,
+       0x9b05688cUL,
+       0x1f83d9abUL,
+       0x5be0cd19UL
+};
+
+#if !NO_64BIT
+/* Hash constant words K for SHA-384 and SHA-512: */
+const static sha2_word64 K512[80] = {
+       0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
+       0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
+       0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
+       0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
+       0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
+       0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
+       0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
+       0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
+       0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
+       0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
+       0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
+       0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
+       0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
+       0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
+       0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
+       0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
+       0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
+       0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
+       0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
+       0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
+       0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
+       0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
+       0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
+       0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
+       0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
+       0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
+       0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
+       0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
+       0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
+       0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
+       0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
+       0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
+       0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
+       0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
+       0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
+       0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
+       0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
+       0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
+       0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
+       0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
+};
+
+/* Initial hash value H for SHA-384 */
+const static sha2_word64 sha384_initial_hash_value[8] = {
+       0xcbbb9d5dc1059ed8ULL,
+       0x629a292a367cd507ULL,
+       0x9159015a3070dd17ULL,
+       0x152fecd8f70e5939ULL,
+       0x67332667ffc00b31ULL,
+       0x8eb44a8768581511ULL,
+       0xdb0c2e0d64f98fa7ULL,
+       0x47b5481dbefa4fa4ULL
+};
+
+/* Initial hash value H for SHA-512 */
+const static sha2_word64 sha512_initial_hash_value[8] = {
+       0x6a09e667f3bcc908ULL,
+       0xbb67ae8584caa73bULL,
+       0x3c6ef372fe94f82bULL,
+       0xa54ff53a5f1d36f1ULL,
+       0x510e527fade682d1ULL,
+       0x9b05688c2b3e6c1fULL,
+       0x1f83d9abfb41bd6bULL,
+       0x5be0cd19137e2179ULL
+};
+#endif /* !NO_64BIT */
+
+/*
+ * Constant used by SHA256/384/512_End() functions for converting the
+ * digest to a readable hexadecimal character string:
+ */
+static const char *sha2_hex_digits = "0123456789abcdef";
+
+
+/*** SHA-256: *********************************************************/
+void SHA256_Init(SHA256_CTX* context) {
+       if (context == (SHA256_CTX*)0) {
+               return;
+       }
+       bcopy(sha256_initial_hash_value, context->state, SHA256_DIGEST_LENGTH);
+       bzero(context->buffer, SHA256_BLOCK_LENGTH);
+#if MINIX_64BIT
+       context->bitcount= cvu64(0);
+#else /* !MINIX_64BIT */
+       context->bitcount = 0;
+#endif /* MINIX_64BIT */
+}
+
+#ifdef SHA2_UNROLL_TRANSFORM
+
+/* Unrolled SHA-256 round macros: */
+
+#if SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN
+
+#define ROUND256_0_TO_15(a,b,c,d,e,f,g,h)      \
+       REVERSE32(*data++, W256[j]); \
+       T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + \
+             K256[j] + W256[j]; \
+       (d) += T1; \
+       (h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)); \
+       j++
+
+
+#else /* SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN */
+
+#define ROUND256_0_TO_15(a,b,c,d,e,f,g,h)      \
+       T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + \
+            K256[j] + (W256[j] = *data++); \
+       (d) += T1; \
+       (h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)); \
+       j++
+
+#endif /* SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN */
+
+#define ROUND256(a,b,c,d,e,f,g,h)      \
+       s0 = W256[(j+1)&0x0f]; \
+       s0 = sigma0_256(s0); \
+       s1 = W256[(j+14)&0x0f]; \
+       s1 = sigma1_256(s1); \
+       T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + K256[j] + \
+            (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0); \
+       (d) += T1; \
+       (h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)); \
+       j++
+
+void SHA256_Transform(SHA256_CTX* context, const sha2_word32* data) {
+       sha2_word32     a, b, c, d, e, f, g, h, s0, s1;
+       sha2_word32     T1, *W256;
+       int             j;
+
+       W256 = (sha2_word32*)context->buffer;
+
+       /* Initialize registers with the prev. intermediate value */
+       a = context->state[0];
+       b = context->state[1];
+       c = context->state[2];
+       d = context->state[3];
+       e = context->state[4];
+       f = context->state[5];
+       g = context->state[6];
+       h = context->state[7];
+
+       j = 0;
+       do {
+               /* Rounds 0 to 15 (unrolled): */
+               ROUND256_0_TO_15(a,b,c,d,e,f,g,h);
+               ROUND256_0_TO_15(h,a,b,c,d,e,f,g);
+               ROUND256_0_TO_15(g,h,a,b,c,d,e,f);
+               ROUND256_0_TO_15(f,g,h,a,b,c,d,e);
+               ROUND256_0_TO_15(e,f,g,h,a,b,c,d);
+               ROUND256_0_TO_15(d,e,f,g,h,a,b,c);
+               ROUND256_0_TO_15(c,d,e,f,g,h,a,b);
+               ROUND256_0_TO_15(b,c,d,e,f,g,h,a);
+       } while (j < 16);
+
+       /* Now for the remaining rounds to 64: */
+       do {
+               ROUND256(a,b,c,d,e,f,g,h);
+               ROUND256(h,a,b,c,d,e,f,g);
+               ROUND256(g,h,a,b,c,d,e,f);
+               ROUND256(f,g,h,a,b,c,d,e);
+               ROUND256(e,f,g,h,a,b,c,d);
+               ROUND256(d,e,f,g,h,a,b,c);
+               ROUND256(c,d,e,f,g,h,a,b);
+               ROUND256(b,c,d,e,f,g,h,a);
+       } while (j < 64);
+
+       /* Compute the current intermediate hash value */
+       context->state[0] += a;
+       context->state[1] += b;
+       context->state[2] += c;
+       context->state[3] += d;
+       context->state[4] += e;
+       context->state[5] += f;
+       context->state[6] += g;
+       context->state[7] += h;
+
+       /* Clean up */
+       a = b = c = d = e = f = g = h = T1 = 0;
+}
+
+#else /* SHA2_UNROLL_TRANSFORM */
+
+void SHA256_Transform(SHA256_CTX* context, const sha2_word32* data) {
+       sha2_word32     a, b, c, d, e, f, g, h, s0, s1;
+       sha2_word32     T1, T2, *W256;
+       int             j;
+
+       W256 = (sha2_word32*)context->buffer;
+
+       /* Initialize registers with the prev. intermediate value */
+       a = context->state[0];
+       b = context->state[1];
+       c = context->state[2];
+       d = context->state[3];
+       e = context->state[4];
+       f = context->state[5];
+       g = context->state[6];
+       h = context->state[7];
+
+       j = 0;
+       do {
+#if SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN
+               /* Copy data while converting to host byte order */
+               REVERSE32(*data++,W256[j]);
+               /* Apply the SHA-256 compression function to update a..h */
+               T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + W256[j];
+#else /* SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN */
+               /* Apply the SHA-256 compression function to update a..h with copy */
+               T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + (W256[j] = *data++);
+#endif /* SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN */
+               T2 = Sigma0_256(a) + Maj(a, b, c);
+               h = g;
+               g = f;
+               f = e;
+               e = d + T1;
+               d = c;
+               c = b;
+               b = a;
+               a = T1 + T2;
+
+               j++;
+       } while (j < 16);
+
+       do {
+               /* Part of the message block expansion: */
+               s0 = W256[(j+1)&0x0f];
+               s0 = sigma0_256(s0);
+               s1 = W256[(j+14)&0x0f]; 
+               s1 = sigma1_256(s1);
+
+               /* Apply the SHA-256 compression function to update a..h */
+               T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + 
+                    (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0);
+               T2 = Sigma0_256(a) + Maj(a, b, c);
+               h = g;
+               g = f;
+               f = e;
+               e = d + T1;
+               d = c;
+               c = b;
+               b = a;
+               a = T1 + T2;
+
+               j++;
+       } while (j < 64);
+
+       /* Compute the current intermediate hash value */
+       context->state[0] += a;
+       context->state[1] += b;
+       context->state[2] += c;
+       context->state[3] += d;
+       context->state[4] += e;
+       context->state[5] += f;
+       context->state[6] += g;
+       context->state[7] += h;
+
+       /* Clean up */
+       a = b = c = d = e = f = g = h = T1 = T2 = 0;
+}
+
+#endif /* SHA2_UNROLL_TRANSFORM */
+
+void SHA256_Update(SHA256_CTX* context, const sha2_byte *data, size_t len) {
+       unsigned int    freespace, usedspace;
+
+       if (len == 0) {
+               /* Calling with no data is valid - we do nothing */
+               return;
+       }
+
+       /* Sanity check: */
+       assert(context != (SHA256_CTX*)0 && data != (sha2_byte*)0);
+
+#if MINIX_64BIT
+       usedspace= rem64u(context->bitcount, SHA256_BLOCK_LENGTH*8)/8;
+#else /* !MINIX_64BIT */
+       usedspace = (context->bitcount >> 3) % SHA256_BLOCK_LENGTH;
+#endif /* MINIX_64BIT */
+       if (usedspace > 0) {
+               /* Calculate how much free space is available in the buffer */
+               freespace = SHA256_BLOCK_LENGTH - usedspace;
+
+               if (len >= freespace) {
+                       /* Fill the buffer completely and process it */
+                       bcopy(data, &context->buffer[usedspace], freespace);
+#if MINIX_64BIT
+                       context->bitcount= add64u(context->bitcount,
+                               freespace << 3);
+#else /* !MINIX_64BIT */
+                       context->bitcount += freespace << 3;
+#endif /* MINIX_64BIT */
+                       len -= freespace;
+                       data += freespace;
+                       SHA256_Transform(context, (sha2_word32*)context->buffer);
+               } else {
+                       /* The buffer is not yet full */
+                       bcopy(data, &context->buffer[usedspace], len);
+#if MINIX_64BIT
+                       context->bitcount= add64u(context->bitcount, len << 3);
+#else /* !MINIX_64BIT */
+                       context->bitcount += len << 3;
+#endif /* MINIX_64BIT */
+                       /* Clean up: */
+                       usedspace = freespace = 0;
+                       return;
+               }
+       }
+       while (len >= SHA256_BLOCK_LENGTH) {
+               /* Process as many complete blocks as we can */
+               SHA256_Transform(context, (const sha2_word32*)data);
+#if MINIX_64BIT
+               context->bitcount= add64u(context->bitcount,
+                       SHA256_BLOCK_LENGTH << 3);
+#else /* !MINIX_64BIT */
+               context->bitcount += SHA256_BLOCK_LENGTH << 3;
+#endif /* MINIX_64BIT */
+               len -= SHA256_BLOCK_LENGTH;
+               data += SHA256_BLOCK_LENGTH;
+       }
+       if (len > 0) {
+               /* There's left-overs, so save 'em */
+               bcopy(data, context->buffer, len);
+#if MINIX_64BIT
+               context->bitcount= add64u(context->bitcount, len << 3);
+#else /* !MINIX_64BIT */
+               context->bitcount += len << 3;
+#endif /* MINIX_64BIT */
+       }
+       /* Clean up: */
+       usedspace = freespace = 0;
+}
+
+void SHA256_Final(sha2_byte digest[], SHA256_CTX* context) {
+       sha2_word32     *d = (sha2_word32*)digest;
+       unsigned int    usedspace;
+
+       /* Sanity check: */
+       assert(context != (SHA256_CTX*)0);
+
+       /* If no digest buffer is passed, we don't bother doing this: */
+       if (digest != (sha2_byte*)0) {
+#if MINIX_64BIT
+               usedspace= rem64u(context->bitcount, SHA256_BLOCK_LENGTH*8)/8;
+#else /* !MINIX_64BIT */
+               usedspace = (context->bitcount >> 3) % SHA256_BLOCK_LENGTH;
+#endif /* MINIX_64BIT */
+#if SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN
+               /* Convert FROM host byte order */
+               REVERSE64(context->bitcount,context->bitcount);
+#endif
+               if (usedspace > 0) {
+                       /* Begin padding with a 1 bit: */
+                       context->buffer[usedspace++] = 0x80;
+
+                       if (usedspace <= SHA256_SHORT_BLOCK_LENGTH) {
+                               /* Set-up for the last transform: */
+                               bzero(&context->buffer[usedspace], SHA256_SHORT_BLOCK_LENGTH - usedspace);
+                       } else {
+                               if (usedspace < SHA256_BLOCK_LENGTH) {
+                                       bzero(&context->buffer[usedspace], SHA256_BLOCK_LENGTH - usedspace);
+                               }
+                               /* Do second-to-last transform: */
+                               SHA256_Transform(context, (sha2_word32*)context->buffer);
+
+                               /* And set-up for the last transform: */
+                               bzero(context->buffer, SHA256_SHORT_BLOCK_LENGTH);
+                       }
+               } else {
+                       /* Set-up for the last transform: */
+                       bzero(context->buffer, SHA256_SHORT_BLOCK_LENGTH);
+
+                       /* Begin padding with a 1 bit: */
+                       *context->buffer = 0x80;
+               }
+               /* Set the bit count: */
+               *(sha2_word64*)&context->buffer[SHA256_SHORT_BLOCK_LENGTH] = context->bitcount;
+
+               /* Final transform: */
+               SHA256_Transform(context, (sha2_word32*)context->buffer);
+
+#if SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN
+               {
+                       /* Convert TO host byte order */
+                       int     j;
+                       for (j = 0; j < 8; j++) {
+                               REVERSE32(context->state[j],context->state[j]);
+                               *d++ = context->state[j];
+                       }
+               }
+#else
+               bcopy(context->state, d, SHA256_DIGEST_LENGTH);
+#endif
+       }
+
+       /* Clean up state data: */
+       bzero(context, sizeof(context));
+       usedspace = 0;
+}
+
+char *SHA256_End(SHA256_CTX* context, char buffer[]) {
+       sha2_byte       digest[SHA256_DIGEST_LENGTH], *d = digest;
+       int             i;
+
+       /* Sanity check: */
+       assert(context != (SHA256_CTX*)0);
+
+       if (buffer != (char*)0) {
+               SHA256_Final(digest, context);
+
+               for (i = 0; i < SHA256_DIGEST_LENGTH; i++) {
+                       *buffer++ = sha2_hex_digits[(*d & 0xf0) >> 4];
+                       *buffer++ = sha2_hex_digits[*d & 0x0f];
+                       d++;
+               }
+               *buffer = (char)0;
+       } else {
+               bzero(context, sizeof(context));
+       }
+       bzero(digest, SHA256_DIGEST_LENGTH);
+       return buffer;
+}
+
+char* SHA256_Data(const sha2_byte* data, size_t len, char digest[SHA256_DIGEST_STRING_LENGTH]) {
+       SHA256_CTX      context;
+
+       SHA256_Init(&context);
+       SHA256_Update(&context, data, len);
+       return SHA256_End(&context, digest);
+}
+
+#if !NO_64BIT
+
+/*** SHA-512: *********************************************************/
+void SHA512_Init(SHA512_CTX* context) {
+       if (context == (SHA512_CTX*)0) {
+               return;
+       }
+       bcopy(sha512_initial_hash_value, context->state, SHA512_DIGEST_LENGTH);
+       bzero(context->buffer, SHA512_BLOCK_LENGTH);
+       context->bitcount[0] = context->bitcount[1] =  0;
+}
+
+#ifdef SHA2_UNROLL_TRANSFORM
+
+/* Unrolled SHA-512 round macros: */
+#if SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN
+
+#define ROUND512_0_TO_15(a,b,c,d,e,f,g,h)      \
+       REVERSE64(*data++, W512[j]); \
+       T1 = (h) + Sigma1_512(e) + Ch((e), (f), (g)) + \
+             K512[j] + W512[j]; \
+       (d) += T1, \
+       (h) = T1 + Sigma0_512(a) + Maj((a), (b), (c)), \
+       j++
+
+
+#else /* SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN */
+
+#define ROUND512_0_TO_15(a,b,c,d,e,f,g,h)      \
+       T1 = (h) + Sigma1_512(e) + Ch((e), (f), (g)) + \
+             K512[j] + (W512[j] = *data++); \
+       (d) += T1; \
+       (h) = T1 + Sigma0_512(a) + Maj((a), (b), (c)); \
+       j++
+
+#endif /* SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN */
+
+#define ROUND512(a,b,c,d,e,f,g,h)      \
+       s0 = W512[(j+1)&0x0f]; \
+       s0 = sigma0_512(s0); \
+       s1 = W512[(j+14)&0x0f]; \
+       s1 = sigma1_512(s1); \
+       T1 = (h) + Sigma1_512(e) + Ch((e), (f), (g)) + K512[j] + \
+             (W512[j&0x0f] += s1 + W512[(j+9)&0x0f] + s0); \
+       (d) += T1; \
+       (h) = T1 + Sigma0_512(a) + Maj((a), (b), (c)); \
+       j++
+
+void SHA512_Transform(SHA512_CTX* context, const sha2_word64* data) {
+       sha2_word64     a, b, c, d, e, f, g, h, s0, s1;
+       sha2_word64     T1, *W512 = (sha2_word64*)context->buffer;
+       int             j;
+
+       /* Initialize registers with the prev. intermediate value */
+       a = context->state[0];
+       b = context->state[1];
+       c = context->state[2];
+       d = context->state[3];
+       e = context->state[4];
+       f = context->state[5];
+       g = context->state[6];
+       h = context->state[7];
+
+       j = 0;
+       do {
+               ROUND512_0_TO_15(a,b,c,d,e,f,g,h);
+               ROUND512_0_TO_15(h,a,b,c,d,e,f,g);
+               ROUND512_0_TO_15(g,h,a,b,c,d,e,f);
+               ROUND512_0_TO_15(f,g,h,a,b,c,d,e);
+               ROUND512_0_TO_15(e,f,g,h,a,b,c,d);
+               ROUND512_0_TO_15(d,e,f,g,h,a,b,c);
+               ROUND512_0_TO_15(c,d,e,f,g,h,a,b);
+               ROUND512_0_TO_15(b,c,d,e,f,g,h,a);
+       } while (j < 16);
+
+       /* Now for the remaining rounds up to 79: */
+       do {
+               ROUND512(a,b,c,d,e,f,g,h);
+               ROUND512(h,a,b,c,d,e,f,g);
+               ROUND512(g,h,a,b,c,d,e,f);
+               ROUND512(f,g,h,a,b,c,d,e);
+               ROUND512(e,f,g,h,a,b,c,d);
+               ROUND512(d,e,f,g,h,a,b,c);
+               ROUND512(c,d,e,f,g,h,a,b);
+               ROUND512(b,c,d,e,f,g,h,a);
+       } while (j < 80);
+
+       /* Compute the current intermediate hash value */
+       context->state[0] += a;
+       context->state[1] += b;
+       context->state[2] += c;
+       context->state[3] += d;
+       context->state[4] += e;
+       context->state[5] += f;
+       context->state[6] += g;
+       context->state[7] += h;
+
+       /* Clean up */
+       a = b = c = d = e = f = g = h = T1 = 0;
+}
+
+#else /* SHA2_UNROLL_TRANSFORM */
+
+void SHA512_Transform(SHA512_CTX* context, const sha2_word64* data) {
+       sha2_word64     a, b, c, d, e, f, g, h, s0, s1;
+       sha2_word64     T1, T2, *W512 = (sha2_word64*)context->buffer;
+       int             j;
+
+       /* Initialize registers with the prev. intermediate value */
+       a = context->state[0];
+       b = context->state[1];
+       c = context->state[2];
+       d = context->state[3];
+       e = context->state[4];
+       f = context->state[5];
+       g = context->state[6];
+       h = context->state[7];
+
+       j = 0;
+       do {
+#if SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN
+               /* Convert TO host byte order */
+               REVERSE64(*data++, W512[j]);
+               /* Apply the SHA-512 compression function to update a..h */
+               T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + W512[j];
+#else /* SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN */
+               /* Apply the SHA-512 compression function to update a..h with copy */
+               T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + (W512[j] = *data++);
+#endif /* SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN */
+               T2 = Sigma0_512(a) + Maj(a, b, c);
+               h = g;
+               g = f;
+               f = e;
+               e = d + T1;
+               d = c;
+               c = b;
+               b = a;
+               a = T1 + T2;
+
+               j++;
+       } while (j < 16);
+
+       do {
+               /* Part of the message block expansion: */
+               s0 = W512[(j+1)&0x0f];
+               s0 = sigma0_512(s0);
+               s1 = W512[(j+14)&0x0f];
+               s1 =  sigma1_512(s1);
+
+               /* Apply the SHA-512 compression function to update a..h */
+               T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] +
+                    (W512[j&0x0f] += s1 + W512[(j+9)&0x0f] + s0);
+               T2 = Sigma0_512(a) + Maj(a, b, c);
+               h = g;
+               g = f;
+               f = e;
+               e = d + T1;
+               d = c;
+               c = b;
+               b = a;
+               a = T1 + T2;
+
+               j++;
+       } while (j < 80);
+
+       /* Compute the current intermediate hash value */
+       context->state[0] += a;
+       context->state[1] += b;
+       context->state[2] += c;
+       context->state[3] += d;
+       context->state[4] += e;
+       context->state[5] += f;
+       context->state[6] += g;
+       context->state[7] += h;
+
+       /* Clean up */
+       a = b = c = d = e = f = g = h = T1 = T2 = 0;
+}
+
+#endif /* SHA2_UNROLL_TRANSFORM */
+
+void SHA512_Update(SHA512_CTX* context, const sha2_byte *data, size_t len) {
+       unsigned int    freespace, usedspace;
+
+       if (len == 0) {
+               /* Calling with no data is valid - we do nothing */
+               return;
+       }
+
+       /* Sanity check: */
+       assert(context != (SHA512_CTX*)0 && data != (sha2_byte*)0);
+
+       usedspace = (context->bitcount[0] >> 3) % SHA512_BLOCK_LENGTH;
+       if (usedspace > 0) {
+               /* Calculate how much free space is available in the buffer */
+               freespace = SHA512_BLOCK_LENGTH - usedspace;
+
+               if (len >= freespace) {
+                       /* Fill the buffer completely and process it */
+                       bcopy(data, &context->buffer[usedspace], freespace);
+                       ADDINC128(context->bitcount, freespace << 3);
+                       len -= freespace;
+                       data += freespace;
+                       SHA512_Transform(context, (sha2_word64*)context->buffer);
+               } else {
+                       /* The buffer is not yet full */
+                       bcopy(data, &context->buffer[usedspace], len);
+                       ADDINC128(context->bitcount, len << 3);
+                       /* Clean up: */
+                       usedspace = freespace = 0;
+                       return;
+               }
+       }
+       while (len >= SHA512_BLOCK_LENGTH) {
+               /* Process as many complete blocks as we can */
+               SHA512_Transform(context, (const sha2_word64*)data);
+               ADDINC128(context->bitcount, SHA512_BLOCK_LENGTH << 3);
+               len -= SHA512_BLOCK_LENGTH;
+               data += SHA512_BLOCK_LENGTH;
+       }
+       if (len > 0) {
+               /* There's left-overs, so save 'em */
+               bcopy(data, context->buffer, len);
+               ADDINC128(context->bitcount, len << 3);
+       }
+       /* Clean up: */
+       usedspace = freespace = 0;
+}
+
+void SHA512_Last(SHA512_CTX* context) {
+       unsigned int    usedspace;
+
+       usedspace = (context->bitcount[0] >> 3) % SHA512_BLOCK_LENGTH;
+#if SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN
+       /* Convert FROM host byte order */
+       REVERSE64(context->bitcount[0],context->bitcount[0]);
+       REVERSE64(context->bitcount[1],context->bitcount[1]);
+#endif
+       if (usedspace > 0) {
+               /* Begin padding with a 1 bit: */
+               context->buffer[usedspace++] = 0x80;
+
+               if (usedspace <= SHA512_SHORT_BLOCK_LENGTH) {
+                       /* Set-up for the last transform: */
+                       bzero(&context->buffer[usedspace], SHA512_SHORT_BLOCK_LENGTH - usedspace);
+               } else {
+                       if (usedspace < SHA512_BLOCK_LENGTH) {
+                               bzero(&context->buffer[usedspace], SHA512_BLOCK_LENGTH - usedspace);
+                       }
+                       /* Do second-to-last transform: */
+                       SHA512_Transform(context, (sha2_word64*)context->buffer);
+
+                       /* And set-up for the last transform: */
+                       bzero(context->buffer, SHA512_BLOCK_LENGTH - 2);
+               }
+       } else {
+               /* Prepare for final transform: */
+               bzero(context->buffer, SHA512_SHORT_BLOCK_LENGTH);
+
+               /* Begin padding with a 1 bit: */
+               *context->buffer = 0x80;
+       }
+       /* Store the length of input data (in bits): */
+       *(sha2_word64*)&context->buffer[SHA512_SHORT_BLOCK_LENGTH] = context->bitcount[1];
+       *(sha2_word64*)&context->buffer[SHA512_SHORT_BLOCK_LENGTH+8] = context->bitcount[0];
+
+       /* Final transform: */
+       SHA512_Transform(context, (sha2_word64*)context->buffer);
+}
+
+void SHA512_Final(sha2_byte digest[], SHA512_CTX* context) {
+       sha2_word64     *d = (sha2_word64*)digest;
+
+       /* Sanity check: */
+       assert(context != (SHA512_CTX*)0);
+
+       /* If no digest buffer is passed, we don't bother doing this: */
+       if (digest != (sha2_byte*)0) {
+               SHA512_Last(context);
+
+               /* Save the hash data for output: */
+#if SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN
+               {
+                       /* Convert TO host byte order */
+                       int     j;
+                       for (j = 0; j < 8; j++) {
+                               REVERSE64(context->state[j],context->state[j]);
+                               *d++ = context->state[j];
+                       }
+               }
+#else
+               bcopy(context->state, d, SHA512_DIGEST_LENGTH);
+#endif
+       }
+
+       /* Zero out state data */
+       bzero(context, sizeof(context));
+}
+
+char *SHA512_End(SHA512_CTX* context, char buffer[]) {
+       sha2_byte       digest[SHA512_DIGEST_LENGTH], *d = digest;
+       int             i;
+
+       /* Sanity check: */
+       assert(context != (SHA512_CTX*)0);
+
+       if (buffer != (char*)0) {
+               SHA512_Final(digest, context);
+
+               for (i = 0; i < SHA512_DIGEST_LENGTH; i++) {
+                       *buffer++ = sha2_hex_digits[(*d & 0xf0) >> 4];
+                       *buffer++ = sha2_hex_digits[*d & 0x0f];
+                       d++;
+               }
+               *buffer = (char)0;
+       } else {
+               bzero(context, sizeof(context));
+       }
+       bzero(digest, SHA512_DIGEST_LENGTH);
+       return buffer;
+}
+
+char* SHA512_Data(const sha2_byte* data, size_t len, char digest[SHA512_DIGEST_STRING_LENGTH]) {
+       SHA512_CTX      context;
+
+       SHA512_Init(&context);
+       SHA512_Update(&context, data, len);
+       return SHA512_End(&context, digest);
+}
+
+
+/*** SHA-384: *********************************************************/
+void SHA384_Init(SHA384_CTX* context) {
+       if (context == (SHA384_CTX*)0) {
+               return;
+       }
+       bcopy(sha384_initial_hash_value, context->state, SHA512_DIGEST_LENGTH);
+       bzero(context->buffer, SHA384_BLOCK_LENGTH);
+       context->bitcount[0] = context->bitcount[1] = 0;
+}
+
+void SHA384_Update(SHA384_CTX* context, const sha2_byte* data, size_t len) {
+       SHA512_Update((SHA512_CTX*)context, data, len);
+}
+
+void SHA384_Final(sha2_byte digest[], SHA384_CTX* context) {
+       sha2_word64     *d = (sha2_word64*)digest;
+
+       /* Sanity check: */
+       assert(context != (SHA384_CTX*)0);
+
+       /* If no digest buffer is passed, we don't bother doing this: */
+       if (digest != (sha2_byte*)0) {
+               SHA512_Last((SHA512_CTX*)context);
+
+               /* Save the hash data for output: */
+#if SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN
+               {
+                       /* Convert TO host byte order */
+                       int     j;
+                       for (j = 0; j < 6; j++) {
+                               REVERSE64(context->state[j],context->state[j]);
+                               *d++ = context->state[j];
+                       }
+               }
+#else
+               bcopy(context->state, d, SHA384_DIGEST_LENGTH);
+#endif
+       }
+
+       /* Zero out state data */
+       bzero(context, sizeof(context));
+}
+
+char *SHA384_End(SHA384_CTX* context, char buffer[]) {
+       sha2_byte       digest[SHA384_DIGEST_LENGTH], *d = digest;
+       int             i;
+
+       /* Sanity check: */
+       assert(context != (SHA384_CTX*)0);
+
+       if (buffer != (char*)0) {
+               SHA384_Final(digest, context);
+
+               for (i = 0; i < SHA384_DIGEST_LENGTH; i++) {
+                       *buffer++ = sha2_hex_digits[(*d & 0xf0) >> 4];
+                       *buffer++ = sha2_hex_digits[*d & 0x0f];
+                       d++;
+               }
+               *buffer = (char)0;
+       } else {
+               bzero(context, sizeof(context));
+       }
+       bzero(digest, SHA384_DIGEST_LENGTH);
+       return buffer;
+}
+
+char* SHA384_Data(const sha2_byte* data, size_t len, char digest[SHA384_DIGEST_STRING_LENGTH]) {
+       SHA384_CTX      context;
+
+       SHA384_Init(&context);
+       SHA384_Update(&context, data, len);
+       return SHA384_End(&context, digest);
+}
+
+#endif /* !NO_64BIT */
+
+/*
+ * $PchId: sha2.c,v 1.1 2005/06/28 14:29:23 philip Exp $
+ */
diff --git a/servers/inet/sha2.h b/servers/inet/sha2.h
new file mode 100644 (file)
index 0000000..85f8a2a
--- /dev/null
@@ -0,0 +1,168 @@
+/*     $FreeBSD: src/sys/crypto/sha2/sha2.h,v 1.1.2.1 2001/07/03 11:01:36 ume Exp $    */
+/*     $KAME: sha2.h,v 1.3 2001/03/12 08:27:48 itojun Exp $    */
+
+/*
+ * sha2.h
+ *
+ * Version 1.0.0beta1
+ *
+ * Written by Aaron D. Gifford <me@aarongifford.com>
+ *
+ * Copyright 2000 Aaron D. Gifford.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the copyright holder nor the names of contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) AND CONTRIBUTOR(S) ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR(S) OR CONTRIBUTOR(S) BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#ifndef __SHA2_H__
+#define __SHA2_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/*** SHA-256/384/512 Various Length Definitions ***********************/
+#define SHA256_BLOCK_LENGTH            64
+#define SHA256_DIGEST_LENGTH           32
+#define SHA256_DIGEST_STRING_LENGTH    (SHA256_DIGEST_LENGTH * 2 + 1)
+#define SHA384_BLOCK_LENGTH            128
+#define SHA384_DIGEST_LENGTH           48
+#define SHA384_DIGEST_STRING_LENGTH    (SHA384_DIGEST_LENGTH * 2 + 1)
+#define SHA512_BLOCK_LENGTH            128
+#define SHA512_DIGEST_LENGTH           64
+#define SHA512_DIGEST_STRING_LENGTH    (SHA512_DIGEST_LENGTH * 2 + 1)
+
+#ifdef __minix
+#include <assert.h>
+#include <string.h>
+#include <sys/types.h>
+#include <minix/u64.h>
+
+typedef u8_t u_int8_t; /* 1-byte  (8-bits)  */
+typedef u32_t u_int32_t;       /* 4-bytes (32-bits) */
+typedef u64_t u_int64_t;       /* 8-bytes (64-bits) */
+
+#ifndef __P
+#define __P(x) x
+#endif
+
+#define NO_64BIT       1
+#define MINIX_64BIT    1
+
+#define SHA2_BYTE_ORDER                0x04030201
+#define SHA2_LITTLE_ENDIAN     0x04030201
+#define SHA2_BIG_ENDIAN                0x01020204
+#define bcopy(s,d,l)   (memmove((d),(s),(l)))
+#define bzero(d,l)     (memset((d),'\0',(l)))
+#endif
+
+/*** SHA-256/384/512 Context Structures *******************************/
+/* NOTE: If your architecture does not define either u_intXX_t types or
+ * uintXX_t (from inttypes.h), you may need to define things by hand
+ * for your system:
+ */
+#if 0
+typedef unsigned char u_int8_t;                /* 1-byte  (8-bits)  */
+typedef unsigned int u_int32_t;                /* 4-bytes (32-bits) */
+typedef unsigned long long u_int64_t;  /* 8-bytes (64-bits) */
+#endif
+/*
+ * Most BSD systems already define u_intXX_t types, as does Linux.
+ * Some systems, however, like Compaq's Tru64 Unix instead can use
+ * uintXX_t types defined by very recent ANSI C standards and included
+ * in the file:
+ *
+ *   #include <inttypes.h>
+ *
+ * If you choose to use <inttypes.h> then please define: 
+ *
+ *   #define SHA2_USE_INTTYPES_H
+ *
+ * Or on the command line during compile:
+ *
+ *   cc -DSHA2_USE_INTTYPES_H ...
+ */
+#if 0 /*def SHA2_USE_INTTYPES_H*/
+
+typedef struct _SHA256_CTX {
+       uint32_t        state[8];
+       uint64_t        bitcount;
+       uint8_t buffer[SHA256_BLOCK_LENGTH];
+} SHA256_CTX;
+typedef struct _SHA512_CTX {
+       uint64_t        state[8];
+       uint64_t        bitcount[2];
+       uint8_t buffer[SHA512_BLOCK_LENGTH];
+} SHA512_CTX;
+
+#else /* SHA2_USE_INTTYPES_H */
+
+typedef struct _SHA256_CTX {
+       u_int32_t       state[8];
+       u_int64_t       bitcount;
+       u_int8_t        buffer[SHA256_BLOCK_LENGTH];
+} SHA256_CTX;
+typedef struct _SHA512_CTX {
+       u_int64_t       state[8];
+       u_int64_t       bitcount[2];
+       u_int8_t        buffer[SHA512_BLOCK_LENGTH];
+} SHA512_CTX;
+
+#endif /* SHA2_USE_INTTYPES_H */
+
+typedef SHA512_CTX SHA384_CTX;
+
+
+/*** SHA-256/384/512 Function Prototypes ******************************/
+
+void SHA256_Init __P((SHA256_CTX *));
+void SHA256_Update __P((SHA256_CTX*, const u_int8_t*, size_t));
+void SHA256_Final __P((u_int8_t[SHA256_DIGEST_LENGTH], SHA256_CTX*));
+char* SHA256_End __P((SHA256_CTX*, char[SHA256_DIGEST_STRING_LENGTH]));
+char* SHA256_Data __P((const u_int8_t*, size_t, char[SHA256_DIGEST_STRING_LENGTH]));
+
+void SHA384_Init __P((SHA384_CTX*));
+void SHA384_Update __P((SHA384_CTX*, const u_int8_t*, size_t));
+void SHA384_Final __P((u_int8_t[SHA384_DIGEST_LENGTH], SHA384_CTX*));
+char* SHA384_End __P((SHA384_CTX*, char[SHA384_DIGEST_STRING_LENGTH]));
+char* SHA384_Data __P((const u_int8_t*, size_t, char[SHA384_DIGEST_STRING_LENGTH]));
+
+void SHA512_Init __P((SHA512_CTX*));
+void SHA512_Update __P((SHA512_CTX*, const u_int8_t*, size_t));
+void SHA512_Final __P((u_int8_t[SHA512_DIGEST_LENGTH], SHA512_CTX*));
+char* SHA512_End __P((SHA512_CTX*, char[SHA512_DIGEST_STRING_LENGTH]));
+char* SHA512_Data __P((const u_int8_t*, size_t, char[SHA512_DIGEST_STRING_LENGTH]));
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __SHA2_H__ */
+
+
+/*
+ * $PchId: sha2.h,v 1.1 2005/06/28 14:29:33 philip Exp $
+ */
index 8f9e3da20e601e68f65640926d473e481f98ec89..4fdc0257c67fca7eed26195ce90ef9f9e281b785 100644 (file)
 
 #include "inet.h"
 
+#ifndef __minix_vmd /* Minix 3 */
+#include <sys/select.h>
+#endif
+#include <sys/svrctl.h>
 #include <minix/callnr.h>
 
 #include "mq.h"
+#include "qp.h"
 #include "proto.h"
 #include "generic/type.h"
 
 #include "generic/assert.h"
 #include "generic/buf.h"
+#include "generic/event.h"
 #include "generic/sr.h"
+#include "sr_int.h"
+
+#ifndef __minix_vmd /* Minix 3 */
+#define DEV_CANCEL NW_CANCEL
+#define DEVICE_REPLY REVIVE
+#define DEV_IOCTL3 DEV_IOCTL
+#define NDEV_BUFFER ADDRESS
+#define NDEV_COUNT COUNT
+#define NDEV_IOCTL REQUEST
+#define NDEV_MINOR DEVICE
+#define NDEV_PROC PROC_NR
+#endif
 
 THIS_FILE
 
-#define FD_NR                  (16*IP_PORT_MAX)
+PUBLIC sr_fd_t sr_fd_table[FD_NR];
 
-typedef struct sr_fd
-{
-       int srf_flags;
-       int srf_fd;
-       int srf_port;
-       sr_open_t srf_open;
-       sr_close_t srf_close;
-       sr_write_t srf_write;
-       sr_read_t srf_read;
-       sr_ioctl_t srf_ioctl;
-       sr_cancel_t srf_cancel;
-       mq_t *srf_ioctl_q, *srf_ioctl_q_tail;
-       mq_t *srf_read_q, *srf_read_q_tail;
-       mq_t *srf_write_q, *srf_write_q_tail;
-} sr_fd_t;
-
-#define SFF_FLAGS      0x0F
-#      define SFF_FREE         0x00
-#      define SFF_MINOR        0x01
-#      define SFF_INUSE        0x02
-#      define SFF_BUSY         0x3C
-#              define SFF_IOCTL_IP     0x04
-#              define SFF_READ_IP      0x08
-#              define SFF_WRITE_IP     0x10
-#      define SFF_PENDING_REQ  0x30
-#      define SFF_SUSPENDED    0x1C0
-#              define SFF_IOCTL_SUSP   0x40
-#              define SFF_READ_SUSP    0x80
-#              define SFF_WRITE_SUSP   0x100
+PRIVATE mq_t *repl_queue, *repl_queue_tail;
+#ifdef __minix_vmd
+PRIVATE cpvec_t cpvec[CPVEC_NR];
+#else /* Minix 3 */
+PRIVATE struct vir_cp_req vir_cp_req[CPVEC_NR];
+#endif
 
 FORWARD _PROTOTYPE ( int sr_open, (message *m) );
 FORWARD _PROTOTYPE ( void sr_close, (message *m) );
 FORWARD _PROTOTYPE ( int sr_rwio, (mq_t *m) );
+FORWARD _PROTOTYPE ( int sr_restart_read, (sr_fd_t *fdp) );
+FORWARD _PROTOTYPE ( int sr_restart_write, (sr_fd_t *fdp) );
+FORWARD _PROTOTYPE ( int sr_restart_ioctl, (sr_fd_t *fdp) );
 FORWARD _PROTOTYPE ( int sr_cancel, (message *m) );
+#ifndef __minix_vmd /* Minix 3 */
+FORWARD _PROTOTYPE ( int sr_select, (message *m) );
+#endif
 FORWARD _PROTOTYPE ( void sr_reply, (mq_t *m, int reply, int can_enqueue) );
 FORWARD _PROTOTYPE ( sr_fd_t *sr_getchannel, (int minor));
 FORWARD _PROTOTYPE ( acc_t *sr_get_userdata, (int fd, vir_bytes offset,
                                        vir_bytes count, int for_ioctl) );
 FORWARD _PROTOTYPE ( int sr_put_userdata, (int fd, vir_bytes offset,
                                                acc_t *data, int for_ioctl) );
+#ifdef __minix_vmd 
+#define sr_select_res 0
+#else /* Minix 3 */
+FORWARD _PROTOTYPE (void sr_select_res, (int fd, unsigned ops) );
+#endif
 FORWARD _PROTOTYPE ( int sr_repl_queue, (int proc, int ref, int operation) );
 FORWARD _PROTOTYPE ( int walk_queue, (sr_fd_t *sr_fd, mq_t *q_head, 
                        mq_t **q_tail_ptr, int type, int proc_nr, int ref) );
 FORWARD _PROTOTYPE ( void process_req_q, (mq_t *mq, mq_t *tail, 
                                                        mq_t **tail_ptr) );
+FORWARD _PROTOTYPE ( void sr_event, (event_t *evp, ev_arg_t arg) );
 FORWARD _PROTOTYPE ( int cp_u2b, (int proc, char *src, acc_t **var_acc_ptr,
                                                                 int size) );
 FORWARD _PROTOTYPE ( int cp_b2u, (acc_t *acc_ptr, int proc, char *dest) );
 
-PRIVATE sr_fd_t sr_fd_table[FD_NR];
-PRIVATE mq_t *repl_queue, *repl_queue_tail;
-PRIVATE struct vir_cp_req vir_cp_req[CPVEC_NR];
-
 PUBLIC void sr_init()
 {
-#if ZERO
        int i;
 
        for (i=0; i<FD_NR; i++)
+       {
                sr_fd_table[i].srf_flags= SFF_FREE;
+               ev_init(&sr_fd_table[i].srf_ioctl_ev);
+               ev_init(&sr_fd_table[i].srf_read_ev);
+               ev_init(&sr_fd_table[i].srf_write_ev);
+       }
        repl_queue= NULL;
-#endif
 }
 
 PUBLIC void sr_rec(m)
@@ -127,9 +133,15 @@ mq_t *m;
 
        if (repl_queue)
        {
-               if (m->mq_mess.m_type == NW_CANCEL)
+               if (m->mq_mess.m_type == DEV_CANCEL)
                {
-                       result= sr_repl_queue(m->mq_mess.PROC_NR, 0,  0);
+#ifdef __minix_vmd
+                       result= sr_repl_queue(m->mq_mess.NDEV_PROC,
+                               m->mq_mess.NDEV_REF, 
+                               m->mq_mess.NDEV_OPERATION);
+#else /* Minix 3 */
+                       result= sr_repl_queue(m->mq_mess.PROC_NR, 0, 0);
+#endif
                        if (result)
                        {
                                mq_free(m);
@@ -155,24 +167,33 @@ mq_t *m;
                break;
        case DEV_READ:
        case DEV_WRITE:
-       case DEV_IOCTL:
+       case DEV_IOCTL3:
                result= sr_rwio(m);
                assert(result == OK || result == SUSPEND);
                send_reply= (result == SUSPEND);
                free_mess= 0;
                break;
-       case CANCEL:
+       case DEV_CANCEL:
                result= sr_cancel(&m->mq_mess);
                assert(result == OK || result == EINTR);
                send_reply= (result == EINTR);
                free_mess= 1;
+#ifdef __minix_vmd
+               m->mq_mess.m_type= m->mq_mess.NDEV_OPERATION;
+#else /* Minix 3 */
                m->mq_mess.m_type= 0;
+#endif
+               break;
+#ifndef __minix_vmd /* Minix 3 */
+       case DEV_SELECT:
+               result= sr_select(&m->mq_mess);
+               send_reply= 1;
+               free_mess= 1;
                break;
-#if !CRAMPED
+#endif
        default:
                ip_panic(("unknown message, from %d, type %d",
                                m->mq_mess.m_source, m->mq_mess.m_type));
-#endif
        }
        if (send_reply)
        {
@@ -183,7 +204,7 @@ mq_t *m;
 }
 
 PUBLIC void sr_add_minor(minor, port, openf, closef, readf, writef,
-       ioctlf, cancelf)
+       ioctlf, cancelf, selectf)
 int minor;
 int port;
 sr_open_t openf;
@@ -192,6 +213,7 @@ sr_read_t readf;
 sr_write_t writef;
 sr_ioctl_t ioctlf;
 sr_cancel_t cancelf;
+sr_select_t selectf;
 {
        sr_fd_t *sr_fd;
 
@@ -209,6 +231,7 @@ sr_cancel_t cancelf;
        sr_fd->srf_read= readf;
        sr_fd->srf_ioctl= ioctlf;
        sr_fd->srf_cancel= cancelf;
+       sr_fd->srf_select= selectf;
 }
 
 PRIVATE int sr_open(m)
@@ -216,7 +239,7 @@ message *m;
 {
        sr_fd_t *sr_fd;
 
-       int minor= m->DEVICE;
+       int minor= m->NDEV_MINOR;
        int i, fd;
 
        if (minor<0 || minor>FD_NR)
@@ -241,7 +264,7 @@ message *m;
        *sr_fd= sr_fd_table[minor];
        sr_fd->srf_flags= SFF_INUSE;
        fd= (*sr_fd->srf_open)(sr_fd->srf_port, i, sr_get_userdata,
-               sr_put_userdata, 0);
+               sr_put_userdata, 0 /* no put_pkt */, sr_select_res);
        if (fd<0)
        {
                sr_fd->srf_flags= SFF_FREE;
@@ -257,10 +280,11 @@ message *m;
 {
        sr_fd_t *sr_fd;
 
-       sr_fd= sr_getchannel(m->DEVICE);
+       sr_fd= sr_getchannel(m->NDEV_MINOR);
        assert (sr_fd);
 
-       assert (!(sr_fd->srf_flags & SFF_BUSY));
+       if (sr_fd->srf_flags & SFF_BUSY)
+               ip_panic(("close on busy channel"));
 
        assert (!(sr_fd->srf_flags & SFF_MINOR));
        (*sr_fd->srf_close)(sr_fd->srf_fd);
@@ -277,7 +301,7 @@ mq_t *m;
        ioreq_t request;
        size_t size;
 
-       sr_fd= sr_getchannel(m->mq_mess.DEVICE);
+       sr_fd= sr_getchannel(m->mq_mess.NDEV_MINOR);
        assert (sr_fd);
 
        switch(m->mq_mess.m_type)
@@ -294,16 +318,14 @@ mq_t *m;
                ip_flag= SFF_WRITE_IP;
                susp_flag= SFF_WRITE_SUSP;
                break;
-       case DEV_IOCTL:
+       case DEV_IOCTL3:
                q_head_ptr= &sr_fd->srf_ioctl_q;
                q_tail_ptr= &sr_fd->srf_ioctl_q_tail;
                ip_flag= SFF_IOCTL_IP;
                susp_flag= SFF_IOCTL_SUSP;
                break;
-#if !CRAMPED
        default:
                ip_panic(("illegal case entry"));
-#endif
        }
 
        if (sr_fd->srf_flags & ip_flag)
@@ -324,15 +346,26 @@ mq_t *m;
        {
        case DEV_READ:
                r= (*sr_fd->srf_read)(sr_fd->srf_fd, 
-                       m->mq_mess.COUNT);
+                       m->mq_mess.NDEV_COUNT);
                break;
        case DEV_WRITE:
                r= (*sr_fd->srf_write)(sr_fd->srf_fd, 
-                       m->mq_mess.COUNT);
+                       m->mq_mess.NDEV_COUNT);
                break;
-       case DEV_IOCTL:
-               request= m->mq_mess.REQUEST;
-#ifdef _IOCPARM_MASK
+       case DEV_IOCTL3:
+               request= m->mq_mess.NDEV_IOCTL;
+
+               /* There should be a better way to do this... */
+               if (request == NWIOQUERYPARAM)
+               {
+                       r= qp_query(m->mq_mess.NDEV_PROC,
+                               (vir_bytes)m->mq_mess.NDEV_BUFFER);
+                       r= sr_put_userdata(sr_fd-sr_fd_table, r, NULL, 1);
+                       assert(r == OK);
+                       return OK;
+               }
+
+               /* And now, we continue with our regular program. */
                size= (request >> 16) & _IOCPARM_MASK;
                if (size>MAX_IOCTL_S)
                {
@@ -342,13 +375,10 @@ mq_t *m;
                        assert(r == OK);
                        return OK;
                }
-#endif
                r= (*sr_fd->srf_ioctl)(sr_fd->srf_fd, request);
                break;
-#if !CRAMPED
        default:
                ip_panic(("illegal case entry"));
-#endif
        }
 
        assert(r == OK || r == SUSPEND || 
@@ -358,21 +388,106 @@ mq_t *m;
        return r;
 }
 
+PRIVATE int sr_restart_read(sr_fd)
+sr_fd_t *sr_fd;
+{
+       mq_t *mp;
+       int r;
+
+       mp= sr_fd->srf_read_q;
+       assert(mp);
+
+       if (sr_fd->srf_flags & SFF_READ_IP)
+       {
+               assert(sr_fd->srf_flags & SFF_READ_SUSP);
+               return SUSPEND;
+       }
+       sr_fd->srf_flags |= SFF_READ_IP;
+
+       r= (*sr_fd->srf_read)(sr_fd->srf_fd, 
+               mp->mq_mess.NDEV_COUNT);
+
+       assert(r == OK || r == SUSPEND || 
+               (printf("r= %d\n", r), 0));
+       if (r == SUSPEND)
+               sr_fd->srf_flags |= SFF_READ_SUSP;
+       return r;
+}
+
+PRIVATE int sr_restart_write(sr_fd)
+sr_fd_t *sr_fd;
+{
+       mq_t *mp;
+       int r;
+
+       mp= sr_fd->srf_write_q;
+       assert(mp);
+
+       if (sr_fd->srf_flags & SFF_WRITE_IP)
+       {
+               assert(sr_fd->srf_flags & SFF_WRITE_SUSP);
+               return SUSPEND;
+       }
+       sr_fd->srf_flags |= SFF_WRITE_IP;
+
+       r= (*sr_fd->srf_write)(sr_fd->srf_fd, 
+               mp->mq_mess.NDEV_COUNT);
+
+       assert(r == OK || r == SUSPEND || 
+               (printf("r= %d\n", r), 0));
+       if (r == SUSPEND)
+               sr_fd->srf_flags |= SFF_WRITE_SUSP;
+       return r;
+}
+
+PRIVATE int sr_restart_ioctl(sr_fd)
+sr_fd_t *sr_fd;
+{
+       mq_t *mp;
+       int r;
+
+       mp= sr_fd->srf_ioctl_q;
+       assert(mp);
+
+       if (sr_fd->srf_flags & SFF_IOCTL_IP)
+       {
+               assert(sr_fd->srf_flags & SFF_IOCTL_SUSP);
+               return SUSPEND;
+       }
+       sr_fd->srf_flags |= SFF_IOCTL_IP;
+
+       r= (*sr_fd->srf_ioctl)(sr_fd->srf_fd, 
+               mp->mq_mess.NDEV_COUNT);
+
+       assert(r == OK || r == SUSPEND || 
+               (printf("r= %d\n", r), 0));
+       if (r == SUSPEND)
+               sr_fd->srf_flags |= SFF_IOCTL_SUSP;
+       return r;
+}
+
 PRIVATE int sr_cancel(m)
 message *m;
 {
        sr_fd_t *sr_fd;
-       int i, result;
-       mq_t *q_ptr, *q_ptr_prv;
+       int result;
        int proc_nr, ref, operation;
 
         result=EINTR;
-       proc_nr=  m->PROC_NR;
+       proc_nr=  m->NDEV_PROC;
+#ifdef __minix_vmd
+       ref=  m->NDEV_REF;
+       operation= m->NDEV_OPERATION;
+#else /* Minix 3 */
        ref=  0;
        operation= 0;
-       sr_fd= sr_getchannel(m->DEVICE);
+#endif
+       sr_fd= sr_getchannel(m->NDEV_MINOR);
        assert (sr_fd);
 
+#ifdef __minix_vmd
+       if (operation == CANCEL_ANY || operation == DEV_IOCTL3)
+#endif
        {
                result= walk_queue(sr_fd, sr_fd->srf_ioctl_q, 
                        &sr_fd->srf_ioctl_q_tail, SR_CANCEL_IOCTL,
@@ -380,6 +495,9 @@ message *m;
                if (result != EAGAIN)
                        return result;
        }
+#ifdef __minix_vmd
+       if (operation == CANCEL_ANY || operation == DEV_READ)
+#endif
        {
                result= walk_queue(sr_fd, sr_fd->srf_read_q, 
                        &sr_fd->srf_read_q_tail, SR_CANCEL_READ,
@@ -387,6 +505,9 @@ message *m;
                if (result != EAGAIN)
                        return result;
        }
+#ifdef __minix_vmd
+       if (operation == CANCEL_ANY || operation == DEV_WRITE)
+#endif
        {
                result= walk_queue(sr_fd, sr_fd->srf_write_q, 
                        &sr_fd->srf_write_q_tail, SR_CANCEL_WRITE,
@@ -394,14 +515,56 @@ message *m;
                if (result != EAGAIN)
                        return result;
        }
-#if !CRAMPED
+#ifdef __minix_vmd
        ip_panic((
-"request not found: from %d, type %d, MINOR= %d, PROC= %d, REF= %d OPERATION= %d",
-               m->m_source, m->m_type, m->DEVICE,
-               m->PROC_NR, 0, 0));
+"request not found: from %d, type %d, MINOR= %d, PROC= %d, REF= %d OPERATION= %ld",
+               m->m_source, m->m_type, m->NDEV_MINOR,
+               m->NDEV_PROC, m->NDEV_REF, m->NDEV_OPERATION));
+#else /* Minix 3 */
+       ip_panic((
+"request not found: from %d, type %d, MINOR= %d, PROC= %d",
+               m->m_source, m->m_type, m->NDEV_MINOR,
+               m->NDEV_PROC));
 #endif
 }
 
+#ifndef __minix_vmd /* Minix 3 */
+PRIVATE int sr_select(m)
+message *m;
+{
+       sr_fd_t *sr_fd;
+       mq_t **q_head_ptr, **q_tail_ptr;
+       int ip_flag, susp_flag;
+       int r, ops;
+       unsigned m_ops, i_ops;
+       ioreq_t request;
+       size_t size;
+
+       sr_fd= sr_getchannel(m->NDEV_MINOR);
+       assert (sr_fd);
+
+       sr_fd->srf_select_proc= m->m_source;
+
+       m_ops= m->PROC_NR;
+       i_ops= 0;
+       if (m_ops & SEL_RD) i_ops |= SR_SELECT_READ;
+       if (m_ops & SEL_WR) i_ops |= SR_SELECT_WRITE;
+       if (m_ops & SEL_ERR) i_ops |= SR_SELECT_EXCEPTION;
+       if (!(m_ops & SEL_NOTIFY)) i_ops |= SR_SELECT_POLL;
+
+       printf("should select 0%o on fd %d\n", i_ops, m->NDEV_MINOR);
+       r= (*sr_fd->srf_select)(sr_fd->srf_fd,  i_ops);
+       if (r < 0)
+               return r;
+       m_ops= 0;
+       if (r & SR_SELECT_READ) m_ops |= SEL_RD;
+       if (r & SR_SELECT_WRITE) m_ops |= SEL_WR;
+       if (r & SR_SELECT_EXCEPTION) m_ops |= SEL_ERR;
+
+       return m_ops;
+}
+#endif
+
 PRIVATE int walk_queue(sr_fd, q_head, q_tail_ptr, type, proc_nr, ref)
 sr_fd_t *sr_fd;
 mq_t *q_head, **q_tail_ptr;
@@ -415,8 +578,12 @@ int ref;
        for(q_ptr_prv= NULL, q_ptr= q_head; q_ptr; 
                q_ptr_prv= q_ptr, q_ptr= q_ptr->mq_next)
        {
-               if (q_ptr->mq_mess.PROC_NR != proc_nr)
+               if (q_ptr->mq_mess.NDEV_PROC != proc_nr)
                        continue;
+#ifdef __minix_vmd
+               if (q_ptr->mq_mess.NDEV_REF != ref)
+                       continue;
+#endif
                if (!q_ptr_prv)
                {
                        result= (*sr_fd->srf_cancel)(sr_fd->srf_fd, type);
@@ -456,21 +623,31 @@ int can_enqueue;
        int result, proc, ref,operation;
        message reply, *mp;
 
-       proc= mq->mq_mess.PROC_NR;
+       proc= mq->mq_mess.NDEV_PROC;
+#ifdef __minix_vmd
+       ref= mq->mq_mess.NDEV_REF;
+#else /* Minix 3 */
        ref= 0;
+#endif
        operation= mq->mq_mess.m_type;
+       assert(operation != DEV_CANCEL);
 
        if (can_enqueue)
                mp= &mq->mq_mess;
        else
                mp= &reply;
 
-       mp->m_type= REVIVE;
+       mp->m_type= DEVICE_REPLY;
        mp->REP_PROC_NR= proc;
        mp->REP_STATUS= status;
+#ifdef __minix_vmd
+       mp->REP_REF= ref;
+       mp->REP_OPERATION= operation;
+#endif
        result= send(mq->mq_mess.m_source, mp);
        if (result == ELOCKED && can_enqueue)
        {
+               mq->mq_next= NULL;
                if (repl_queue)
                        repl_queue_tail->mq_next= mq;
                else
@@ -491,26 +668,28 @@ vir_bytes count;
 int for_ioctl;
 {
        sr_fd_t *loc_fd;
-       mq_t **head_ptr, **tail_ptr, *m, *tail, *mq;
+       mq_t **head_ptr, *m, *mq;
        int ip_flag, susp_flag;
        int result;
        int suspended;
        char *src;
        acc_t *acc;
+       event_t *evp;
+       ev_arg_t arg;
 
        loc_fd= &sr_fd_table[fd];
 
        if (for_ioctl)
        {
                head_ptr= &loc_fd->srf_ioctl_q;
-               tail_ptr= &loc_fd->srf_ioctl_q_tail;
+               evp= &loc_fd->srf_ioctl_ev;
                ip_flag= SFF_IOCTL_IP;
                susp_flag= SFF_IOCTL_SUSP;
        }
        else
        {
                head_ptr= &loc_fd->srf_write_q;
-               tail_ptr= &loc_fd->srf_write_q_tail;
+               evp= &loc_fd->srf_write_ev;
                ip_flag= SFF_WRITE_IP;
                susp_flag= SFF_WRITE_SUSP;
        }
@@ -520,27 +699,26 @@ assert (loc_fd->srf_flags & ip_flag);
        if (!count)
        {
                m= *head_ptr;
-               *head_ptr= NULL;
-               tail= *tail_ptr;
-assert(m);
                mq= m->mq_next;
+               *head_ptr= mq;
                result= (int)offset;
                sr_reply (m, result, 1);
                suspended= (loc_fd->srf_flags & susp_flag);
                loc_fd->srf_flags &= ~(ip_flag|susp_flag);
                if (suspended)
                {
-                       process_req_q(mq, tail, tail_ptr);
-               }
-               else
-               {
-assert(!mq);
+                       if (mq)
+                       {
+ { where(); printf("sr_get_userdata: enqueuing event\n"); }
+                               arg.ev_ptr= loc_fd;
+                               ev_enqueue(evp, sr_event, arg);
+                       }
                }
                return NULL;
        }
 
-       src= (*head_ptr)->mq_mess.ADDRESS + offset;
-       result= cp_u2b ((*head_ptr)->mq_mess.PROC_NR, src, &acc, count);
+       src= (*head_ptr)->mq_mess.NDEV_BUFFER + offset;
+       result= cp_u2b ((*head_ptr)->mq_mess.NDEV_PROC, src, &acc, count);
 
        return result<0 ? NULL : acc;
 }
@@ -552,25 +730,27 @@ acc_t *data;
 int for_ioctl;
 {
        sr_fd_t *loc_fd;
-       mq_t **head_ptr, **tail_ptr, *m, *tail, *mq;
+       mq_t **head_ptr, *m, *mq;
        int ip_flag, susp_flag;
        int result;
        int suspended;
        char *dst;
+       event_t *evp;
+       ev_arg_t arg;
 
        loc_fd= &sr_fd_table[fd];
 
        if (for_ioctl)
        {
                head_ptr= &loc_fd->srf_ioctl_q;
-               tail_ptr= &loc_fd->srf_ioctl_q_tail;
+               evp= &loc_fd->srf_ioctl_ev;
                ip_flag= SFF_IOCTL_IP;
                susp_flag= SFF_IOCTL_SUSP;
        }
        else
        {
                head_ptr= &loc_fd->srf_read_q;
-               tail_ptr= &loc_fd->srf_read_q_tail;
+               evp= &loc_fd->srf_read_ev;
                ip_flag= SFF_READ_IP;
                susp_flag= SFF_READ_SUSP;
        }
@@ -580,30 +760,55 @@ int for_ioctl;
        if (!data)
        {
                m= *head_ptr;
-               assert(m);
-
-               *head_ptr= NULL;
-               tail= *tail_ptr;
                mq= m->mq_next;
+               *head_ptr= mq;
                result= (int)offset;
                sr_reply (m, result, 1);
                suspended= (loc_fd->srf_flags & susp_flag);
                loc_fd->srf_flags &= ~(ip_flag|susp_flag);
                if (suspended)
                {
-                       process_req_q(mq, tail, tail_ptr);
-               }
-               else
-               {
-                       assert(!mq);
+                       if (mq)
+                       {
+ { where(); printf("sr_put_userdata: enqueuing event\n"); }
+                               arg.ev_ptr= loc_fd;
+                               ev_enqueue(evp, sr_event, arg);
+                       }
                }
                return OK;
        }
 
-       dst= (*head_ptr)->mq_mess.ADDRESS + offset;
-       return cp_b2u (data, (*head_ptr)->mq_mess.PROC_NR, dst);
+       dst= (*head_ptr)->mq_mess.NDEV_BUFFER + offset;
+       return cp_b2u (data, (*head_ptr)->mq_mess.NDEV_PROC, dst);
 }
 
+#ifndef __minix_vmd /* Minix 3 */
+PRIVATE void sr_select_res(fd, ops)
+int fd;
+unsigned ops;
+{
+       unsigned m_ops;
+       sr_fd_t *sr_fd;
+       message m;
+
+       sr_fd= &sr_fd_table[fd];
+       
+       m_ops= 0;
+       if (ops & SR_SELECT_READ) m_ops |= SEL_RD;
+       if (ops & SR_SELECT_WRITE) m_ops |= SEL_WR;
+       if (ops & SR_SELECT_EXCEPTION) m_ops |= SEL_ERR;
+
+       m.NOTIFY_TYPE= DEV_SELECTED;
+       m.NOTIFY_ARG= fd;
+       m.NOTIFY_FLAGS= m_ops;
+
+       printf("sr_select_res: notifying caller %d with ops 0%o\n",
+               sr_fd->srf_select_proc, m_ops);
+
+       notify(sr_fd->srf_select_proc, &m);
+}
+#endif
+
 PRIVATE void process_req_q(mq, tail, tail_ptr)
 mq_t *mq, *tail, **tail_ptr;
 {
@@ -631,6 +836,47 @@ mq_t *mq, *tail, **tail_ptr;
        return;
 }
 
+PRIVATE void sr_event(evp, arg)
+event_t *evp;
+ev_arg_t arg;
+{
+       sr_fd_t *sr_fd;
+       int r;
+
+       sr_fd= arg.ev_ptr;
+       if (evp == &sr_fd->srf_write_ev)
+       {
+               while(sr_fd->srf_write_q)
+               {
+                       r= sr_restart_write(sr_fd);
+                       if (r == SUSPEND)
+                               return;
+               }
+               return;
+       }
+       if (evp == &sr_fd->srf_read_ev)
+       {
+               while(sr_fd->srf_read_q)
+               {
+                       r= sr_restart_read(sr_fd);
+                       if (r == SUSPEND)
+                               return;
+               }
+               return;
+       }
+       if (evp == &sr_fd->srf_ioctl_ev)
+       {
+               while(sr_fd->srf_ioctl_q)
+               {
+                       r= sr_restart_ioctl(sr_fd);
+                       if (r == SUSPEND)
+                               return;
+               }
+               return;
+       }
+       ip_panic(("sr_event: unkown event\n"));
+}
+
 PRIVATE int cp_u2b (proc, src, var_acc_ptr, size)
 int proc;
 char *src;
@@ -650,6 +896,11 @@ int size;
        {
                size= (vir_bytes)acc->acc_length;
 
+#ifdef __minix_vmd
+               cpvec[i].cpv_src= (vir_bytes)src;
+               cpvec[i].cpv_dst= (vir_bytes)ptr2acc_data(acc);
+               cpvec[i].cpv_size= size;
+#else /* Minix 3 */
                vir_cp_req[i].count= size;
                vir_cp_req[i].src.proc_nr = proc;
                vir_cp_req[i].src.segment = D;
@@ -657,6 +908,7 @@ int size;
                vir_cp_req[i].dst.proc_nr = this_proc;
                vir_cp_req[i].dst.segment = D;
                vir_cp_req[i].dst.offset = (vir_bytes) ptr2acc_data(acc);
+#endif
 
                src += size;
                acc= acc->acc_next;
@@ -664,9 +916,17 @@ int size;
 
                if (i == CPVEC_NR || acc == NULL)
                {
+#ifdef __minix_vmd
+                       mess.m_type= SYS_VCOPY;
+                       mess.m1_i1= proc;
+                       mess.m1_i2= this_proc;
+                       mess.m1_i3= i;
+                       mess.m1_p1= (char *)cpvec;
+#else /* Minix 3 */
                        mess.m_type= SYS_VIRVCOPY;
-                       mess.VCP_VEC_SIZE = i;
-                       mess.VCP_VEC_ADDR = (char *) vir_cp_req;
+                       mess.VCP_VEC_SIZE= i;
+                       mess.VCP_VEC_ADDR= (char *)vir_cp_req;
+#endif
                        if (sendrec(SYSTASK, &mess) <0)
                                ip_panic(("unable to sendrec"));
                        if (mess.m_type <0)
@@ -699,6 +959,11 @@ char *dest;
 
                if (size)
                {
+#ifdef __minix_vmd
+                       cpvec[i].cpv_src= (vir_bytes)ptr2acc_data(acc);
+                       cpvec[i].cpv_dst= (vir_bytes)dest;
+                       cpvec[i].cpv_size= size;
+#else /* Minix 3 */
                        vir_cp_req[i].src.proc_nr = this_proc;
                        vir_cp_req[i].src.segment = D;
                        vir_cp_req[i].src.offset= (vir_bytes)ptr2acc_data(acc);
@@ -706,6 +971,7 @@ char *dest;
                        vir_cp_req[i].dst.segment = D;
                        vir_cp_req[i].dst.offset= (vir_bytes)dest;
                        vir_cp_req[i].count= size;
+#endif
                        i++;
                }
 
@@ -714,9 +980,17 @@ char *dest;
 
                if (i == CPVEC_NR || acc == NULL)
                {
+#ifdef __minix_vmd
+                       mess.m_type= SYS_VCOPY;
+                       mess.m1_i1= this_proc;
+                       mess.m1_i2= proc;
+                       mess.m1_i3= i;
+                       mess.m1_p1= (char *)cpvec;
+#else /* Minix 3 */
                        mess.m_type= SYS_VIRVCOPY;
-                       mess.VCP_VEC_SIZE = i;
-                       mess.VCP_VEC_ADDR = (char *) vir_cp_req;
+                       mess.VCP_VEC_SIZE= i;
+                       mess.VCP_VEC_ADDR= (char *) vir_cp_req;
+#endif
                        if (sendrec(SYSTASK, &mess) <0)
                                ip_panic(("unable to sendrec"));
                        if (mess.m_type <0)
@@ -743,15 +1017,20 @@ int operation;
 
        for (m= repl_queue; m;)
        {
+#ifdef __minix_vmd
+               if (m->mq_mess.REP_PROC_NR == proc && 
+                       m->mq_mess.REP_REF ==ref &&
+                       (m->mq_mess.REP_OPERATION == operation ||
+                               operation == CANCEL_ANY))
+#else /* Minix 3 */
                if (m->mq_mess.REP_PROC_NR == proc)
+#endif
                {
 assert(!m_cancel);
                        m_cancel= m;
                        m= m->mq_next;
                        continue;
                }
-assert(m->mq_mess.m_source != PM_PROC_NR);
-assert(m->mq_mess.m_type == REVIVE);
                result= send(m->mq_mess.m_source, &m->mq_mess);
                if (result != OK)
                        ip_panic(("unable to send: %d", result));
@@ -762,8 +1041,6 @@ assert(m->mq_mess.m_type == REVIVE);
        repl_queue= NULL;
        if (m_cancel)
        {
-assert(m_cancel->mq_mess.m_source != PM_PROC_NR);
-assert(m_cancel->mq_mess.m_type == REVIVE);
                result= send(m_cancel->mq_mess.m_source, &m_cancel->mq_mess);
                if (result != OK)
                        ip_panic(("unable to send: %d", result));
@@ -774,5 +1051,5 @@ assert(m_cancel->mq_mess.m_type == REVIVE);
 }
 
 /*
- * $PchId: sr.c,v 1.9 1996/05/07 21:11:14 philip Exp $
+ * $PchId: sr.c,v 1.17 2005/06/28 14:26:16 philip Exp $
  */
diff --git a/servers/inet/sr_int.h b/servers/inet/sr_int.h
new file mode 100644 (file)
index 0000000..6c4eed6
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+inet/sr_int.h
+
+SR internals
+
+Created:       Aug 2004 by Philip Homburg <philip@f-mnx.phicoh.com>
+*/
+
+#define FD_NR                  (16*IP_PORT_MAX)
+
+typedef struct sr_fd
+{
+       int srf_flags;
+       int srf_fd;
+       int srf_port;
+       int srf_select_proc;
+       sr_open_t srf_open;
+       sr_close_t srf_close;
+       sr_write_t srf_write;
+       sr_read_t srf_read;
+       sr_ioctl_t srf_ioctl;
+       sr_cancel_t srf_cancel;
+       sr_select_t srf_select;
+       mq_t *srf_ioctl_q, *srf_ioctl_q_tail;
+       mq_t *srf_read_q, *srf_read_q_tail;
+       mq_t *srf_write_q, *srf_write_q_tail;
+       event_t srf_ioctl_ev;
+       event_t srf_read_ev;
+       event_t srf_write_ev;
+} sr_fd_t;
+
+#      define SFF_FREE         0x00
+#      define SFF_MINOR        0x01
+#      define SFF_INUSE        0x02
+#define SFF_BUSY               0x1C
+#      define SFF_IOCTL_IP     0x04
+#      define SFF_READ_IP      0x08
+#      define SFF_WRITE_IP     0x10
+#define SFF_SUSPENDED  0x1C0
+#      define SFF_IOCTL_SUSP   0x40
+#      define SFF_READ_SUSP    0x80
+#      define SFF_WRITE_SUSP   0x100
+
+EXTERN sr_fd_t sr_fd_table[FD_NR];
+
+/*
+ * $PchId: sr_int.h,v 1.2 2005/06/28 14:28:17 philip Exp $
+ */
index 951133d22b320f3d840f67ea7dd471d5ba75f96b..9b7ba36e66a9de7a04d9e3270b7e44e5517f7006 100644 (file)
@@ -2,8 +2,10 @@
 version.c
 */
 
-char version[]= "inet 0.35K, last compiled on " __DATE__ " " __TIME__;
+#include "inet.h"
+
+char version[]= "inet 0.79, last compiled on " __DATE__ " " __TIME__;
 
 /*
- * $PchId: version.c,v 1.9 1996/12/17 08:01:39 philip Exp philip $
+ * $PchId: version.c,v 1.54 2005/06/28 14:35:01 philip Exp $
  */