MIB/libsys: support for remote MIB (RMIB) subtrees

author David van Moolenbroek <david@minix3.org>

Sat, 23 Apr 2016 18:07:39 +0000 (18:07 +0000)

committer David van Moolenbroek <david@minix3.org>

Sat, 18 Jun 2016 12:46:59 +0000 (12:46 +0000)
author David van Moolenbroek <david@minix3.org>
Sat, 23 Apr 2016 18:07:39 +0000 (18:07 +0000)
committer David van Moolenbroek <david@minix3.org>
Sat, 18 Jun 2016 12:46:59 +0000 (12:46 +0000)
diff --git a/distrib/sets/lists/minix-comp/mi b/distrib/sets/lists/minix-comp/mi

index 247ab29e579a2837a4e199180065904e56d27c58..f4eb5b464f447cdb4667329661eca26f04aae4e5 100644 (file)
--- a/distrib/sets/lists/minix-comp/mi
+++ b/distrib/sets/lists/minix-comp/mi
@@ -1231,6 +1231,7 @@
  ./usr/include/minix/procfs.h                            minix-comp
  ./usr/include/minix/profile.h                           minix-comp
  ./usr/include/minix/queryparam.h                        minix-comp
+./usr/include/minix/rmib.h                              minix-comp
  ./usr/include/minix/rs.h                                minix-comp
  ./usr/include/minix/safecopies.h                        minix-comp
  ./usr/include/minix/sched.h                             minix-comp
diff --git a/distrib/sets/lists/minix-tests/mi b/distrib/sets/lists/minix-tests/mi

index 8e20fe782cea8a42bc479718a6ffa4ca6ba5fc76..7612d44f636242a7549ce84177c1cb2a9930fc0f 100644 (file)
--- a/distrib/sets/lists/minix-tests/mi
+++ b/distrib/sets/lists/minix-tests/mi
@@ -84,6 +84,9 @@
  ./usr/tests/minix-posix/ddekit/ddekittest_driver        minix-tests
  ./usr/tests/minix-posix/ddekit/system.conf              minix-tests
  ./usr/tests/minix-posix/mod                             minix-tests     pic
+./usr/tests/minix-posix/rmibtest                        minix-tests
+./usr/tests/minix-posix/rmibtest/rmibtest               minix-tests
+./usr/tests/minix-posix/rmibtest/rmibtest.conf          minix-tests
  ./usr/tests/minix-posix/run                             minix-tests
  ./usr/tests/minix-posix/t10a                            minix-tests
  ./usr/tests/minix-posix/t11a                            minix-tests
@@ -195,6 +198,7 @@
  ./usr/tests/minix-posix/testkyua                        minix-tests
  ./usr/tests/minix-posix/testmfs                         minix-tests
  ./usr/tests/minix-posix/testrelpol                      minix-tests
+./usr/tests/minix-posix/testrmib                        minix-tests
  ./usr/tests/minix-posix/testsh1                         minix-tests
  ./usr/tests/minix-posix/testsh2                         minix-tests
  ./usr/tests/minix-posix/testvm                          minix-tests
diff --git a/etc/mtree/NetBSD.dist.base b/etc/mtree/NetBSD.dist.base

index 4fac55a8752445fe89d27fe5ecc96c3fab83fadf..8c314dace5e09ced5442222c9095d37cfc657064 100644 (file)
--- a/etc/mtree/NetBSD.dist.base
+++ b/etc/mtree/NetBSD.dist.base
@@ -261,6 +261,7 @@
  ./usr/tests/minix-posix
  ./usr/tests/minix-posix/blocktest
  ./usr/tests/minix-posix/ddekit
+./usr/tests/minix-posix/rmibtest
  
  # this one is for term(1)
  /set type=dir uid=0 gid=5 mode=775
diff --git a/minix/include/minix/Makefile b/minix/include/minix/Makefile

index 77627ba9fa2d2496ce10dc46bb6f13c3da69616d..dbc780436419720714820223ccd9cc4d1b236fd2 100644 (file)
--- a/minix/include/minix/Makefile
+++ b/minix/include/minix/Makefile
@@ -16,7 +16,7 @@ INCS+=        acpi.h audio_fw.h bitmap.h \
         keymap.h log.h mmio.h mthread.h minlib.h \
         netdriver.h optset.h padconf.h partition.h portio.h \
         priv.h procfs.h profile.h queryparam.h \
-       rs.h safecopies.h sched.h sef.h sffs.h \
+       rmib.h rs.h safecopies.h sched.h sef.h sffs.h \
         sound.h spin.h sys_config.h sysctl.h sysinfo.h \
         syslib.h sysutil.h timers.h type.h \
         u64.h usb.h usb_ch9.h vbox.h \
diff --git a/minix/include/minix/com.h b/minix/include/minix/com.h

index 9fcb12552b18efc0ed8af260ede3cc0192efc544..f6a72863063d63a4b13f680a3bdf1b2ec6ffc13a 100644 (file)
--- a/minix/include/minix/com.h
+++ b/minix/include/minix/com.h
@@ -607,9 +607,18 @@
  /* Process event message from PM. */
  #define PROC_EVENT             (COMMON_RQ_BASE+3)
  
+/* MIB information request for the root node of a registered subtree. */
+#define COMMON_MIB_INFO                (COMMON_RQ_BASE+4)
+
+/* MIB sysctl request on a registered subtree. */
+#define COMMON_MIB_CALL                (COMMON_RQ_BASE+5)
+
  /* Reply to process event message to PM. */
  #define PROC_EVENT_REPLY       (COMMON_RS_BASE+0)
  
+/* Reply to MIB information or sysctl request. */
+#define COMMON_MIB_REPLY       (COMMON_RS_BASE+1)
+
  /*===========================================================================*
   *                Messages for VM server                                    *
   *===========================================================================*/
@@ -1013,8 +1022,10 @@
  #define IS_MIB_CALL(type)      (((type) & ~0xff) == MIB_BASE)
  
  #define MIB_SYSCTL             (MIB_BASE + 0)          /* sysctl(2) */
+#define MIB_REGISTER           (MIB_BASE + 1)          /* mount subtree */
+#define MIB_DEREGISTER         (MIB_BASE + 2)          /* unmount subtree */
  
-#define NR_MIB_CALLS           1       /* highest number from base plus one */
+#define NR_MIB_CALLS           3       /* highest number from base plus one */
  
  /*===========================================================================*
   *             Internal codes used by several services                      *
diff --git a/minix/include/minix/drivers.h b/minix/include/minix/drivers.h

index f975b0c2246566024f9c9af5a37b5affc42e20ba..da94fef1474ca6b528e5a8b1ff0f15a9c26cc3c3 100644 (file)
--- a/minix/include/minix/drivers.h
+++ b/minix/include/minix/drivers.h
@@ -22,6 +22,7 @@
  #include <minix/sysutil.h>
  #include <minix/timers.h>
  #include <minix/type.h>
+#include <minix/ds.h>
  #include <sys/param.h>
  #include <sys/types.h>
  
@@ -39,5 +40,6 @@
  #include <stdlib.h>
  #include <string.h>
  #include <unistd.h>
+#include <assert.h>
  
  #endif
diff --git a/minix/include/minix/ipc.h b/minix/include/minix/ipc.h

index b5051a7850898a75d70b58f36e14538f7d76e328..721c3cc861e8cbc8091f1910379e1f02b15add00 100644 (file)
--- a/minix/include/minix/ipc.h
+++ b/minix/include/minix/ipc.h
@@ -1322,6 +1322,24 @@ typedef struct {
  } mess_lsys_kern_vsafecopy;
  _ASSERT_MSG_SIZE(mess_lsys_kern_vsafecopy);
  
+typedef struct {
+       uint32_t        root_id;
+       uint32_t        flags;
+       unsigned int    csize;
+       unsigned int    clen;
+       unsigned int    miblen;
+       int             mib[CTL_SHORTNAME];
+       uint8_t         padding[4];
+} mess_lsys_mib_register;
+_ASSERT_MSG_SIZE(mess_lsys_mib_register);
+
+typedef struct {
+       uint32_t        req_id;
+       ssize_t         status;
+       uint8_t         padding[48];
+} mess_lsys_mib_reply;
+_ASSERT_MSG_SIZE(mess_lsys_mib_reply);
+
  typedef struct {
         int devind;
         int port;
@@ -1480,6 +1498,34 @@ typedef struct {
  } mess_mib_lc_sysctl;
  _ASSERT_MSG_SIZE(mess_mib_lc_sysctl);
  
+typedef struct {
+       uint32_t        req_id;
+       uint32_t        root_id;
+       cp_grant_id_t   name_grant;
+       unsigned int    name_len;
+       cp_grant_id_t   oldp_grant;
+       size_t          oldp_len;
+       cp_grant_id_t   newp_grant;
+       size_t          newp_len;
+       endpoint_t      user_endpt;
+       uint32_t        flags;
+       uint32_t        root_ver;
+       uint32_t        tree_ver;
+       uint8_t         padding[8];
+} mess_mib_lsys_call;
+_ASSERT_MSG_SIZE(mess_mib_lsys_call);
+
+typedef struct {
+       uint32_t        req_id;
+       uint32_t        root_id;
+       cp_grant_id_t   name_grant;
+       size_t          name_size;
+       cp_grant_id_t   desc_grant;
+       size_t          desc_size;
+       uint8_t         padding[32];
+} mess_mib_lsys_info;
+_ASSERT_MSG_SIZE(mess_mib_lsys_info);
+
  typedef struct {
         off_t offset;
         void *addr;
@@ -2278,6 +2324,8 @@ typedef struct noxfer_message {
                 mess_lsys_krn_sys_vdevio m_lsys_krn_sys_vdevio;
                 mess_lsys_krn_sys_vumap m_lsys_krn_sys_vumap;
                 mess_lsys_kern_vsafecopy m_lsys_kern_vsafecopy;
+               mess_lsys_mib_register  m_lsys_mib_register;
+               mess_lsys_mib_reply     m_lsys_mib_reply;
                 mess_lsys_pci_busc_get_bar m_lsys_pci_busc_get_bar;
                 mess_lsys_pm_getepinfo  m_lsys_pm_getepinfo;
                 mess_lsys_pm_getprocnr  m_lsys_pm_getprocnr;
@@ -2297,6 +2345,8 @@ typedef struct noxfer_message {
                 mess_lsys_vm_update     m_lsys_vm_update;
                 mess_lsys_vm_vmremap    m_lsys_vm_vmremap;
                 mess_mib_lc_sysctl      m_mib_lc_sysctl;
+               mess_mib_lsys_call      m_mib_lsys_call;
+               mess_mib_lsys_info      m_mib_lsys_info;
                 mess_mmap               m_mmap;
                 mess_net_netdrv_dl_conf m_net_netdrv_dl_conf;
                 mess_net_netdrv_dl_getstat_s m_net_netdrv_dl_getstat_s;
diff --git a/minix/include/minix/rmib.h b/minix/include/minix/rmib.h

new file mode 100644 (file)

index 0000000..0e055a4
--- /dev/null
+++ b/minix/include/minix/rmib.h
@@ -0,0 +1,152 @@
+#ifndef _MINIX_RMIB_H
+#define _MINIX_RMIB_H
+
+/*
+ * This header file is for use by services that use the remote MIB (RMIB)
+ * functionality of libsys.  RMIB allows services to mount and handle certain
+ * subtrees of the MIB service's sysctl tree.
+ */
+
+#include <sys/sysctl.h>
+
+/*
+ * This structure contains a number of less heavily used parameters for handler
+ * functions, mainly to provide extensibility while limiting argument clutter.
+ */
+struct rmib_call {
+       endpoint_t call_endpt;          /* endpoint of the user process */
+       const int *call_name;           /* remaining part of the name */
+       unsigned int call_namelen;      /* length of the remaining name part */
+       unsigned int call_flags;        /* RMIB_FLAG_ call flags */
+       uint32_t call_rootver;          /* version of all nodes in subtree */
+       uint32_t call_treever;          /* version of the entire MIB tree */
+};
+
+/*
+ * Call flags.
+ *
+ * TODO: this is effectively a flag used on the wire.  This should be turned
+ * into a proper definition shared with the MIB service.  As long as we have
+ * only one flag anyway, this is not exactly urgent though.
+ */
+#define RMIB_FLAG_AUTH 0x1     /* user has superuser privileges */
+
+struct rmib_node;
+struct rmib_oldp;
+struct rmib_newp;
+
+typedef ssize_t (*rmib_func_ptr)(struct rmib_call *, struct rmib_node *,
+       struct rmib_oldp *, struct rmib_newp *);
+
+/*
+ * The central structure for remote MIB nodes.  This is essentially a somewhat
+ * cut-down version of the node structure used within the MIB service.  See the
+ * source code of that service for several details that apply here as well.
+ * The 'rnode_' prefix makes it possible to include both this header file and
+ * the MIB service's internal header file at once--neat if useless.
+ */
+struct rmib_node {
+       uint32_t rnode_flags;           /* CTLTYPE_ type and CTLFLAG_ flags */
+       size_t rnode_size;              /* size of associated data */
+       union ixfer_rnode_val_u {
+               bool rvu_bool;          /* immediate boolean */
+               int rvu_int;            /* immediate integer */
+               u_quad_t rvu_quad;      /* immediate quad */
+               uint32_t rvu_clen;      /* number of actual children */
+       } rnode_val_u;
+       union pxfer_rnode_ptr_u {
+               void *rpu_data;         /* struct or string data pointer */
+               struct rmib_node *rpu_cptr;     /* child node array */
+       } rnode_ptr_u;
+       rmib_func_ptr rnode_func;       /* handler function */
+       const char *rnode_name;         /* node name string */
+       const char *rnode_desc;         /* node description (may be NULL) */
+};
+#define rnode_bool     rnode_val_u.rvu_bool
+#define rnode_int      rnode_val_u.rvu_int
+#define rnode_quad     rnode_val_u.rvu_quad
+#define rnode_clen     rnode_val_u.rvu_clen
+#define rnode_data     rnode_ptr_u.rpu_data
+#define rnode_cptr     rnode_ptr_u.rpu_cptr
+
+/* Various macros to initialize nodes at compile time. */
+#define RMIB_NODE(f,t,n,d) {                                           \
+       .rnode_flags = CTLTYPE_NODE | CTLFLAG_READONLY |                \
+           CTLFLAG_PERMANENT | f,                                      \
+       .rnode_size = __arraycount(t),                                  \
+       .rnode_cptr = t,                                                \
+       .rnode_name = n,                                                \
+       .rnode_desc = d                                                 \
+}
+#define RMIB_FUNC(f,s,fp,n,d) {                                                \
+       .rnode_flags = CTLFLAG_PERMANENT | f,                           \
+       .rnode_size = s,                                                \
+       .rnode_func = fp,                                               \
+       .rnode_name = n,                                                \
+       .rnode_desc = d                                                 \
+}
+#define RMIB_BOOL(f,b,n,d) {                                           \
+       .rnode_flags = CTLTYPE_BOOL | CTLFLAG_PERMANENT |               \
+           CTLFLAG_IMMEDIATE | f,                                      \
+       .rnode_size = sizeof(bool),                                     \
+       .rnode_bool = b,                                                \
+       .rnode_name = n,                                                \
+       .rnode_desc = d                                                 \
+}
+#define RMIB_INT(f,i,n,d) {                                            \
+       .rnode_flags = CTLTYPE_INT | CTLFLAG_PERMANENT |                \
+           CTLFLAG_IMMEDIATE | f,                                      \
+       .rnode_size = sizeof(int),                                      \
+       .rnode_int = i,                                                 \
+       .rnode_name = n,                                                \
+       .rnode_desc = d                                                 \
+}
+#define RMIB_QUAD(f,q,n,d) {                                           \
+       .rnode_flags = CTLTYPE_QUAD | CTLFLAG_PERMANENT |               \
+           CTLFLAG_IMMEDIATE | f,                                      \
+       .rnode_size = sizeof(u_quad_t),                                 \
+       .rnode_quad = q,                                                \
+       .rnode_name = n,                                                \
+       .rnode_desc = d                                                 \
+}
+#define _RMIB_DATA(f,s,p,n,d) {                                                \
+       .rnode_flags = CTLFLAG_PERMANENT | f,                           \
+       .rnode_size = s,                                                \
+       .rnode_data = __UNCONST(p),                                     \
+       .rnode_name = n,                                                \
+       .rnode_desc = d                                                 \
+}
+/*
+ * The following macros really require a pointer to the proper data type; weird
+ * casts may not trigger compiler warnings but do allow for memory corruption.
+ * The first three need to be passed a pointer to a bool, int, and u_quad_t,
+ * respectively.  RMIB_STRING needs a pointer to a character array, so that
+ * sizeof(array) yields the proper size.  Since RMIB_STRUCT may be given a
+ * pointer to either a structure or an array, it must also be given a size.
+ */
+#define RMIB_BOOLPTR(f,p,n,d) _RMIB_DATA(CTLTYPE_BOOL | f, sizeof(*p), p, n, d)
+#define RMIB_INTPTR(f,p,n,d)  _RMIB_DATA(CTLTYPE_INT | f, sizeof(*p), p, n, d)
+#define RMIB_QUADPTR(f,p,n,d) _RMIB_DATA(CTLTYPE_QUAD | f, sizeof(*p), p, n, d)
+#define RMIB_STRING(f,p,n,d)  \
+       _RMIB_DATA(CTLTYPE_STRING | f, sizeof(p), p, n, d)
+#define RMIB_STRUCT(f,s,p,n,d)  _RMIB_DATA(CTLTYPE_STRUCT | f, s, p, n, d)
+
+/* Shortcut flag macros. */
+#define RMIB_RO        CTLFLAG_READONLY        /* shortcut for read-only nodes */
+#define RMIB_RW        CTLFLAG_READWRITE       /* shortcut for read-write nodes */
+
+/* Function prototypes. */
+int rmib_register(const int * name, unsigned int namelen, struct rmib_node *);
+int rmib_deregister(struct rmib_node *);
+void rmib_reset(void);
+void rmib_process(const message *, int);
+
+int rmib_inrange(struct rmib_oldp *, size_t);
+size_t rmib_getoldlen(struct rmib_oldp *);
+ssize_t rmib_copyout(struct rmib_oldp *, size_t, const void * __restrict,
+       size_t);
+int rmib_copyin(struct rmib_newp * __restrict, void * __restrict, size_t);
+ssize_t rmib_readwrite(struct rmib_call *, struct rmib_node *,
+       struct rmib_oldp *, struct rmib_newp *);
+
+#endif /* !_MINIX_RMIB_H */
diff --git a/minix/include/minix/sysctl.h b/minix/include/minix/sysctl.h

index ebd1bfda9788fd4311c46af136bd81546fe8ac6c..38a817521120e32e7da15046056d8861e0d928f7 100644 (file)
--- a/minix/include/minix/sysctl.h
+++ b/minix/include/minix/sysctl.h
@@ -51,6 +51,7 @@
  /* Identifiers for subnodes of MINIX_MIB. */
  #define MIB_NODES      1
  #define MIB_OBJECTS    2
+#define MIB_REMOTES    3
  
  /* Identifiers for subnodes of MINIX_PROC. */
  #define PROC_LIST      1
diff --git a/minix/kernel/system/do_safecopy.c b/minix/kernel/system/do_safecopy.c

index 6002a9011d66fd2bdfb9561295a5a53fbb7e4471..13433e69da3f83865133a4d6b08a5f66fa301f1d 100644 (file)
--- a/minix/kernel/system/do_safecopy.c
+++ b/minix/kernel/system/do_safecopy.c
@@ -216,13 +216,13 @@ int verify_grant(
                 *offset_result = g.cp_u.cp_direct.cp_start + offset_in;
                 *e_granter = granter;
         } else if(g.cp_flags & CPF_MAGIC) {
-               /* Currently, it is hardcoded that only FS may do
-                * magic grants.
+               /* Currently, it is hardcoded that only VFS and MIB may do
+                * magic grants.  TODO: this should be a system.conf flag.
                  */
-               if(granter != VFS_PROC_NR) {
+               if(granter != VFS_PROC_NR && granter != MIB_PROC_NR) {
                         printf(
                 "verify_grant: magic grant verify failed: granter (%d) "
-               "is not FS (%d)\n", granter, VFS_PROC_NR);
+               "not allowed\n", granter);
                         return EPERM;
                 }
  
diff --git a/minix/lib/libsys/Makefile b/minix/lib/libsys/Makefile

index d19fbe140a9b041974aceb649a0e9d37a33ed723..935ebbc0632422d6a243b70527fe4f9c9ac420d0 100644 (file)
--- a/minix/lib/libsys/Makefile
+++ b/minix/lib/libsys/Makefile
@@ -36,6 +36,7 @@ SRCS+=  \
         optset.c \
         panic.c \
         proceventmask.c \
+       rmib.c \
         safecopies.c \
         sched_start.c \
         sched_stop.c \
diff --git a/minix/lib/libsys/rmib.c b/minix/lib/libsys/rmib.c

new file mode 100644 (file)

index 0000000..4301353
--- /dev/null
+++ b/minix/lib/libsys/rmib.c
@@ -0,0 +1,949 @@
+/* Service support for remote MIB subtrees - by D.C. van Moolenbroek */
+/*
+ * In effect, this is a lightweight version of the MIB service's main and tree
+ * code.  Some parts of the code have even been copied almost as is, even
+ * though the copy here operates on slightly different data structures in order
+ * to keep the implementation more lightweight.  For clarification on many
+ * aspects of the source code here, see the source code of the MIB service.
+ *
+ * There is no way for this module to get to know about MIB service deaths
+ * without possibly interfering with the main code of the service this module
+ * is a part of.  As a result, re-registration of mount points after a MIB
+ * service restart is not automatic.  Instead, the main service code could
+ * implement re-registration by first calling rmib_reset() and then making the
+ * appropriate rmib_register() calls again.  TODO: it would be nicer if this
+ * module implemented re-registration, but that requires saving the MIB path
+ * for each of the registered subtrees.
+ */
+
+#include <minix/drivers.h>
+#include <minix/sysctl.h>
+#include <minix/rmib.h>
+
+/* Structures for outgoing and incoming data, deliberately distinctly named. */
+struct rmib_oldp {
+       cp_grant_id_t oldp_grant;
+       size_t oldp_len;
+};
+
+struct rmib_newp {
+       cp_grant_id_t newp_grant;
+       size_t newp_len;
+};
+
+/*
+ * The maximum field size, in bytes, for which updates (i.e., writes) to the
+ * field do not require dynamic memory allocation.  By policy, non-root users
+ * may not update fields exceeding this size at all.  For strings, this size
+ * includes an extra byte for adding a null terminator if missing.  As the name
+ * indicates, a buffer of this size is placed on the stack.
+ */
+#define RMIB_STACKBUF          257
+
+/*
+ * The maximum number of subtrees that this service can mount.  This value can
+ * be increased without any problems, but it is already quite high in practice.
+ */
+#define RMIB_MAX_SUBTREES      16
+
+/*
+ * The array of subtree root nodes.  Each root node's array index is the root
+ * identifier used in communication with the MIB service.
+ */
+static struct rmib_node *rnodes[RMIB_MAX_SUBTREES] = { NULL };
+
+/*
+ * Return TRUE or FALSE indicating whether the given offset is within the range
+ * of data that is to be copied out.  This call can be used to test whether
+ * certain bits of data need to be prepared for copying at all.
+ */
+int
+rmib_inrange(struct rmib_oldp * oldp, size_t off)
+{
+
+       if (oldp == NULL)
+               return FALSE;
+
+       return (off < oldp->oldp_len);
+}
+
+/*
+ * Return the total length of the requested data.  This should not be used
+ * directly except in highly unusual cases, such as particular node requests
+ * where the request semantics blatantly violate overall sysctl(2) semantics.
+ */
+size_t
+rmib_getoldlen(struct rmib_oldp * oldp)
+{
+
+       if (oldp == NULL)
+               return 0;
+
+       return oldp->oldp_len;
+}
+
+/*
+ * Copy out (partial) data to the user.  The copy is automatically limited to
+ * the range of data requested by the user.  Return the requested length on
+ * success (for the caller's convenience) or an error code on failure.
+ */
+ssize_t
+rmib_copyout(struct rmib_oldp * __restrict oldp, size_t off,
+       const void * __restrict buf, size_t size)
+{
+       size_t len;
+       int r;
+
+       len = size;
+       assert(len <= SSIZE_MAX);
+
+       if (oldp == NULL || off >= oldp->oldp_len)
+               return size; /* nothing to do */
+
+       if (len > oldp->oldp_len - off)
+               len = oldp->oldp_len - off;
+
+       if ((r = sys_safecopyto(MIB_PROC_NR, oldp->oldp_grant, off,
+           (vir_bytes)buf, len)) != OK)
+               return r;
+
+       return size;
+}
+
+/*
+ * Copy in data from the user.  The given length must match exactly the length
+ * given by the user.  Return OK or an error code.
+ */
+int
+rmib_copyin(struct rmib_newp * __restrict newp, void * __restrict buf,
+       size_t len)
+{
+
+       if (newp == NULL || len != newp->newp_len)
+               return EINVAL;
+
+       if (len == 0)
+               return OK;
+
+       return sys_safecopyfrom(MIB_PROC_NR, newp->newp_grant, 0,
+           (vir_bytes)buf, len);
+}
+
+/*
+ * Copy out a node to userland, using the exchange format for nodes (namely,
+ * a sysctlnode structure).  Return the size of the object that is (or, if the
+ * node falls outside the requested data range, would be) copied out on
+ * success, or a negative error code on failure.
+ */
+static ssize_t
+rmib_copyout_node(struct rmib_call * call, struct rmib_oldp * oldp,
+       ssize_t off, unsigned int id, const struct rmib_node * rnode)
+{
+       struct sysctlnode scn;
+       int visible;
+
+       if (!rmib_inrange(oldp, off))
+               return sizeof(scn); /* nothing to do */
+
+       memset(&scn, 0, sizeof(scn));
+
+       /*
+        * The RMIB implementation does not overload flags, so it also need not
+        * hide any of them from the user.
+        */
+       scn.sysctl_flags = SYSCTL_VERSION | rnode->rnode_flags;
+       scn.sysctl_num = id;
+       strlcpy(scn.sysctl_name, rnode->rnode_name, sizeof(scn.sysctl_name));
+       scn.sysctl_ver = call->call_rootver;
+       scn.sysctl_size = rnode->rnode_size;
+
+       /* Some information is only visible if the user can access the node. */
+       visible = (!(rnode->rnode_flags & CTLFLAG_PRIVATE) ||
+           (call->call_flags & RMIB_FLAG_AUTH));
+
+       /*
+        * For immediate types, store the immediate value in the resulting
+        * structure, unless the caller is not authorized to obtain the value.
+        */
+       if ((rnode->rnode_flags & CTLFLAG_IMMEDIATE) && visible) {
+               switch (SYSCTL_TYPE(rnode->rnode_flags)) {
+               case CTLTYPE_BOOL:
+                       scn.sysctl_bdata = rnode->rnode_bool;
+                       break;
+               case CTLTYPE_INT:
+                       scn.sysctl_idata = rnode->rnode_int;
+                       break;
+               case CTLTYPE_QUAD:
+                       scn.sysctl_qdata = rnode->rnode_quad;
+                       break;
+               }
+       }
+
+       /* Special rules apply to parent nodes. */
+       if (SYSCTL_TYPE(rnode->rnode_flags) == CTLTYPE_NODE) {
+               /* Report the node size the way NetBSD does, just in case. */
+               scn.sysctl_size = sizeof(scn);
+
+               /*
+                * For real parent nodes, report child information, but only if
+                * the node itself is accessible by the caller.  For function-
+                * driven nodes, set a nonzero function address, for trace(1).
+                */
+               if (rnode->rnode_func == NULL && visible) {
+                       scn.sysctl_csize = rnode->rnode_size;
+                       scn.sysctl_clen = rnode->rnode_clen;
+               } else if (rnode->rnode_func != NULL)
+                       scn.sysctl_func = SYSCTL_NODE_FN;
+       }
+
+       /* Copy out the resulting node. */
+       return rmib_copyout(oldp, off, &scn, sizeof(scn));
+}
+
+/*
+ * Given a query on a non-leaf (parent) node, provide the user with an array of
+ * this node's children.
+ */
+static ssize_t
+rmib_query(struct rmib_call * call, struct rmib_node * rparent,
+       struct rmib_oldp * oldp, struct rmib_newp * newp)
+{
+       struct sysctlnode scn;
+       struct rmib_node *rnode;
+       unsigned int id;
+       ssize_t r, off;
+
+       /* If the user passed in version numbers, check them. */
+       if (newp != NULL) {
+               if ((r = rmib_copyin(newp, &scn, sizeof(scn))) != OK)
+                       return r;
+
+               if (SYSCTL_VERS(scn.sysctl_flags) != SYSCTL_VERSION)
+                       return EINVAL;
+
+               /*
+                * If a node version number is given, it must match the version
+                * of the subtree or the root of the entire MIB version.
+                */
+               if (scn.sysctl_ver != 0 &&
+                   scn.sysctl_ver != call->call_rootver &&
+                   scn.sysctl_ver != call->call_treever)
+                       return EINVAL;
+       }
+
+       /* Enumerate the child nodes of the given parent node. */
+       off = 0;
+
+       for (id = 0; id < rparent->rnode_size; id++) {
+               rnode = &rparent->rnode_cptr[id];
+
+               if (rnode->rnode_flags == 0)
+                       continue;
+
+               if ((r = rmib_copyout_node(call, oldp, off, id, rnode)) < 0)
+                       return r;
+               off += r;
+       }
+
+       return off;
+}
+
+/*
+ * Copy out a node description to userland, using the exchange format for node
+ * descriptions (namely, a sysctldesc structure).  Return the size of the
+ * object that is (or, if the description falls outside the requested data
+ * range, would be) copied out on success, or a negative error code on failure.
+ * The function may return 0 to indicate that nothing was copied out after all.
+ */
+static ssize_t
+rmib_copyout_desc(struct rmib_call * call, struct rmib_oldp * oldp,
+       ssize_t off, unsigned int id, const struct rmib_node * rnode)
+{
+       struct sysctldesc scd;
+       size_t len, size;
+       ssize_t r;
+
+       /* Descriptions of private nodes are considered private too. */
+       if ((rnode->rnode_flags & CTLFLAG_PRIVATE) &&
+           !(call->call_flags & RMIB_FLAG_AUTH))
+               return 0;
+
+       /*
+        * Unfortunately, we do not have a scratch buffer here.  Instead, copy
+        * out the description structure and the actual description string
+        * separately.  This is more costly, but remote subtrees are already
+        * not going to give the best performance ever.  We do optimize for the
+        * case that there is no description, because that is relatively easy.
+        */
+       /* The description length includes the null terminator. */
+       if (rnode->rnode_desc != NULL)
+               len = strlen(rnode->rnode_desc) + 1;
+       else
+               len = 1;
+
+       memset(&scd, 0, sizeof(scd));
+       scd.descr_num = id;
+       scd.descr_ver = call->call_rootver;
+       scd.descr_len = len;
+
+       size = offsetof(struct sysctldesc, descr_str);
+
+       if (len == 1) {
+               scd.descr_str[0] = '\0'; /* superfluous */
+               size++;
+       }
+
+       /* Copy out the structure, possibly including a null terminator. */
+       if ((r = rmib_copyout(oldp, off, &scd, size)) < 0)
+               return r;
+
+       if (len > 1) {
+               /* Copy out the description itself. */
+               if ((r = rmib_copyout(oldp, off + size, rnode->rnode_desc,
+                   len)) < 0)
+                       return r;
+
+               size += len;
+       }
+
+       /*
+        * By aligning just the size, we may leave garbage between the entries
+        * copied out, which is fine because it is userland's own data.
+        */
+       return roundup2(size, sizeof(int32_t));
+}
+
+/*
+ * Retrieve node descriptions in bulk, or retrieve a particular node's
+ * description.
+ */
+static ssize_t
+rmib_describe(struct rmib_call * call, struct rmib_node * rparent,
+       struct rmib_oldp * oldp, struct rmib_newp * newp)
+{
+       struct sysctlnode scn;
+       struct rmib_node *rnode;
+       unsigned int id;
+       ssize_t r, off;
+
+       if (newp != NULL) {
+               if ((r = rmib_copyin(newp, &scn, sizeof(scn))) != OK)
+                       return r;
+
+               if (SYSCTL_VERS(scn.sysctl_flags) != SYSCTL_VERSION)
+                       return EINVAL;
+
+               /* Locate the child node. */
+               if ((unsigned int)scn.sysctl_num >= rparent->rnode_size)
+                       return ENOENT;
+               rnode = &rparent->rnode_cptr[scn.sysctl_num];
+               if (rnode->rnode_flags == 0)
+                       return ENOENT;
+
+               /* Descriptions of private nodes are considered private too. */
+               if ((rnode->rnode_flags & CTLFLAG_PRIVATE) &&
+                   !(call->call_flags & RMIB_FLAG_AUTH))
+                       return EPERM;
+
+               /*
+                * If a description pointer was given, this is a request to
+                * set the node's description.  We do not allow this, nor would
+                * we be able to support it, since we cannot access the data.
+                */
+               if (scn.sysctl_desc != NULL)
+                       return EPERM;
+
+               /*
+                * Copy out the requested node's description.  At this point we
+                * should be sure that this call does not return zero.
+                */
+               return rmib_copyout_desc(call, oldp, 0, scn.sysctl_num, rnode);
+       }
+
+       /* Describe the child nodes of the given parent node. */
+       off = 0;
+
+       for (id = 0; id < rparent->rnode_size; id++) {
+               rnode = &rparent->rnode_cptr[id];
+
+               if (rnode->rnode_flags == 0)
+                       continue;
+
+               if ((r = rmib_copyout_desc(call, oldp, off, id, rnode)) < 0)
+                       return r;
+               off += r;
+       }
+
+       return off;
+}
+
+/*
+ * Return a pointer to the data associated with the given node, or NULL if the
+ * node has no associated data.  Actual calls to this function should never
+ * result in NULL - as long as the proper rules are followed elsewhere.
+ */
+static void *
+rmib_getptr(struct rmib_node * rnode)
+{
+
+       switch (SYSCTL_TYPE(rnode->rnode_flags)) {
+       case CTLTYPE_BOOL:
+               if (rnode->rnode_flags & CTLFLAG_IMMEDIATE)
+                       return &rnode->rnode_bool;
+               break;
+       case CTLTYPE_INT:
+               if (rnode->rnode_flags & CTLFLAG_IMMEDIATE)
+                       return &rnode->rnode_int;
+               break;
+       case CTLTYPE_QUAD:
+               if (rnode->rnode_flags & CTLFLAG_IMMEDIATE)
+                       return &rnode->rnode_quad;
+               break;
+       case CTLTYPE_STRING:
+       case CTLTYPE_STRUCT:
+               if (rnode->rnode_flags & CTLFLAG_IMMEDIATE)
+                       return NULL;
+               break;
+       default:
+               return NULL;
+       }
+
+       return rnode->rnode_data;
+}
+
+/*
+ * Read current (old) data from a regular data node, if requested.  Return the
+ * old data length.
+ */
+static ssize_t
+rmib_read(struct rmib_node * rnode, struct rmib_oldp * oldp)
+{
+       void *ptr;
+       size_t oldlen;
+       int r;
+
+       if ((ptr = rmib_getptr(rnode)) == NULL)
+               return EINVAL;
+
+       if (SYSCTL_TYPE(rnode->rnode_flags) == CTLTYPE_STRING)
+               oldlen = strlen(rnode->rnode_data) + 1;
+       else
+               oldlen = rnode->rnode_size;
+
+       if (oldlen > SSIZE_MAX)
+               return EINVAL;
+
+       /* Copy out the current data, if requested at all. */
+       if (oldp != NULL && (r = rmib_copyout(oldp, 0, ptr, oldlen)) < 0)
+               return r;
+
+       /* Return the current length in any case. */
+       return (ssize_t)oldlen;
+}
+
+/*
+ * Write new data into a regular data node, if requested.
+ */
+static int
+rmib_write(struct rmib_call * call, struct rmib_node * rnode,
+       struct rmib_newp * newp)
+{
+       bool b[(sizeof(bool) == sizeof(char)) ? 1 : -1]; /* for sanitizing */
+       char *src, *dst, buf[RMIB_STACKBUF];
+       size_t newlen;
+       int r;
+
+       if (newp == NULL)
+               return OK; /* nothing to do */
+
+       /*
+        * When setting a new value, we cannot risk doing an in-place update:
+        * the copy from userland may fail halfway through, in which case an
+        * in-place update could leave the node value in a corrupted state.
+        * Thus, we must first fetch any new data into a temporary buffer.
+        */
+       newlen = newp->newp_len;
+
+       if ((dst = rmib_getptr(rnode)) == NULL)
+               return EINVAL;
+
+       switch (SYSCTL_TYPE(rnode->rnode_flags)) {
+       case CTLTYPE_BOOL:
+       case CTLTYPE_INT:
+       case CTLTYPE_QUAD:
+       case CTLTYPE_STRUCT:
+               /* Non-string types must have an exact size match. */
+               if (newlen != rnode->rnode_size)
+                       return EINVAL;
+               break;
+       case CTLTYPE_STRING:
+               /*
+                * Strings must not exceed their buffer size.  There is a
+                * second check further below, because we allow userland to
+                * give us an unterminated string.  In that case we terminate
+                * it ourselves, but then the null terminator must fit as well.
+                */
+               if (newlen > rnode->rnode_size)
+                       return EINVAL;
+               break;
+       default:
+               return EINVAL;
+       }
+
+       /*
+        * If we cannot fit the data in the small stack buffer, then allocate a
+        * temporary buffer.  We add one extra byte so that we can add a null
+        * terminator at the end of strings in case userland did not supply
+        * one.  Either way, we must free the temporary buffer later!
+        */
+       if (newlen + 1 > sizeof(buf)) {
+               /*
+                * For regular users, we do not want to perform dynamic memory
+                * allocation.  Thus, for CTLTYPE_ANYWRITE nodes, only the
+                * superuser may set values exceeding the small buffer in size.
+                */
+               if (!(call->call_flags & RMIB_FLAG_AUTH))
+                       return EPERM;
+
+               /* Do not return ENOMEM on allocation failure. */
+               if ((src = malloc(newlen + 1)) == NULL)
+                       return EINVAL;
+       } else
+               src = buf;
+
+       /* Copy in the data.  Note that the given new length may be zero. */
+       if ((r = rmib_copyin(newp, src, newlen)) == OK) {
+               /* Check and, if acceptable, store the new value. */
+               switch (SYSCTL_TYPE(rnode->rnode_flags)) {
+               case CTLTYPE_BOOL:
+                       /* Sanitize booleans.  See the MIB code for details. */
+                       b[0] = (bool)src[0];
+                       memcpy(dst, &b[0], sizeof(b[0]));
+                       break;
+               case CTLTYPE_INT:
+               case CTLTYPE_QUAD:
+               case CTLTYPE_STRUCT:
+                       memcpy(dst, src, rnode->rnode_size);
+                       break;
+               case CTLTYPE_STRING:
+                       if (newlen == rnode->rnode_size &&
+                           src[newlen - 1] != '\0') {
+                               /* Our null terminator does not fit! */
+                               r = EINVAL;
+                               break;
+                       }
+                       src[newlen] = '\0';
+                       strlcpy(dst, src, rnode->rnode_size);
+                       break;
+               default:
+                       r = EINVAL;
+               }
+       }
+
+       if (src != buf)
+               free(src);
+
+       return r;
+}
+
+/*
+ * Read and/or write the value of a regular data node.  A regular data node is
+ * a leaf node.  Typically, a leaf node has no associated function, in which
+ * case this function will be used instead.  In addition, this function may be
+ * used from handler functions as part of their functionality.
+ */
+ssize_t
+rmib_readwrite(struct rmib_call * call, struct rmib_node * rnode,
+       struct rmib_oldp * oldp, struct rmib_newp * newp)
+{
+       ssize_t len;
+       int r;
+
+       /* Copy out old data, if requested.  Always get the old data length. */
+       if ((r = len = rmib_read(rnode, oldp)) < 0)
+               return r;
+
+       /* Copy in new data, if requested. */
+       if ((r = rmib_write(call, rnode, newp)) != OK)
+               return r;
+
+       /* Return the old data length. */
+       return len;
+}
+
+/*
+ * Handle a sysctl(2) call from a user process, relayed by the MIB service to
+ * us.  If the call succeeds, return the old length.  The MIB service will
+ * perform a check against the given old length and return ENOMEM to the caller
+ * when applicable, so we do not have to do that here.  If the call fails,
+ * return a negative error code.
+ */
+static ssize_t
+rmib_call(const message * m_in)
+{
+       struct rmib_node *rnode, *rparent;
+       struct rmib_call call;
+       struct rmib_oldp oldp_data, *oldp;
+       struct rmib_newp newp_data, *newp;
+       unsigned int root_id, namelen;
+       int r, id, is_leaf, has_func, name[CTL_MAXNAME];
+
+       /*
+        * Look up the root of the subtree that is the subject of the call.  If
+        * the call is for a subtree that is not registered, return ERESTART to
+        * indicate to the MIB service that it should deregister the subtree it
+        * thinks we have.  This case may occur in practice if a deregistration
+        * request from us crosses a sysctl call request from the MIB service.
+        */
+       root_id = m_in->m_mib_lsys_call.root_id;
+       if (root_id >= __arraycount(rnodes) || rnodes[root_id] == NULL)
+               return ERESTART;
+       rnode = rnodes[root_id];
+
+       /*
+        * Set up all data structures that we need to use while handling the
+        * call processing.  Start by copying in the remainder of the MIB name.
+        */
+       /* A zero name length is valid and should always yield EISDIR. */
+       namelen = m_in->m_mib_lsys_call.name_len;
+       if (namelen > __arraycount(name))
+               return EINVAL;
+
+       if (namelen > 0) {
+               r = sys_safecopyfrom(m_in->m_source,
+                   m_in->m_mib_lsys_call.name_grant, 0, (vir_bytes)name,
+                   sizeof(name[0]) * namelen);
+               if (r != OK)
+                       return r;
+       }
+
+       oldp_data.oldp_grant = m_in->m_mib_lsys_call.oldp_grant;
+       oldp_data.oldp_len = m_in->m_mib_lsys_call.oldp_len;
+       oldp = (GRANT_VALID(oldp_data.oldp_grant)) ? &oldp_data : NULL;
+
+       newp_data.newp_grant = m_in->m_mib_lsys_call.newp_grant;
+       newp_data.newp_len = m_in->m_mib_lsys_call.newp_len;
+       newp = (GRANT_VALID(newp_data.newp_grant)) ? &newp_data : NULL;
+
+       call.call_endpt = m_in->m_mib_lsys_call.user_endpt;
+       call.call_name = name;
+       call.call_namelen = namelen;
+       call.call_flags = m_in->m_mib_lsys_call.flags;
+       call.call_rootver = m_in->m_mib_lsys_call.root_ver;
+       call.call_treever = m_in->m_mib_lsys_call.tree_ver;
+
+       /*
+        * Dispatch the call.
+        */
+       for (rparent = rnode; call.call_namelen > 0; rparent = rnode) {
+               id = call.call_name[0];
+               call.call_name++;
+               call.call_namelen--;
+
+               assert(SYSCTL_TYPE(rparent->rnode_flags) == CTLTYPE_NODE);
+
+               /* Check for meta-identifiers. */
+               if (id < 0) {
+                       /*
+                        * A meta-identifier must always be the last name
+                        * component.
+                        */
+                       if (call.call_namelen > 0)
+                               return EINVAL;
+
+                       switch (id) {
+                       case CTL_QUERY:
+                               return rmib_query(&call, rparent, oldp, newp);
+                       case CTL_DESCRIBE:
+                               return rmib_describe(&call, rparent, oldp,
+                                   newp);
+                       case CTL_CREATE:
+                       case CTL_DESTROY:
+                               /* We support fully static subtrees only. */
+                               return EPERM;
+                       default:
+                               return EOPNOTSUPP;
+                       }
+               }
+
+               /* Locate the child node. */
+               if ((unsigned int)id >= rparent->rnode_size)
+                       return ENOENT;
+               rnode = &rparent->rnode_cptr[id];
+               if (rnode->rnode_flags == 0)
+                       return ENOENT;
+
+               /* Check if access is permitted at this level. */
+               if ((rnode->rnode_flags & CTLFLAG_PRIVATE) &&
+                   !(call.call_flags & RMIB_FLAG_AUTH))
+                       return EPERM;
+
+               /*
+                * Is this a leaf node, and/or is this node handled by a
+                * function?  If either is true, resolution ends at this level.
+                */
+               is_leaf = (SYSCTL_TYPE(rnode->rnode_flags) != CTLTYPE_NODE);
+               has_func = (rnode->rnode_func != NULL);
+
+               /*
+                * The name may be longer only if the node is not a leaf.  That
+                * also applies to leaves with functions, so check this first.
+                */
+               if (is_leaf && call.call_namelen > 0)
+                       return ENOTDIR;
+
+               /*
+                * If resolution indeed ends here, and the user supplied new
+                * data, check if writing is allowed.
+                */
+               if ((is_leaf || has_func) && newp != NULL) {
+                       if (!(rnode->rnode_flags & CTLFLAG_READWRITE))
+                               return EPERM;
+
+                       if (!(rnode->rnode_flags & CTLFLAG_ANYWRITE) &&
+                           !(call.call_flags & RMIB_FLAG_AUTH))
+                               return EPERM;
+               }
+
+               /* If this node has a handler function, let it do the work. */
+               if (has_func)
+                       return rnode->rnode_func(&call, rnode, oldp, newp);
+
+               /* For regular data leaf nodes, handle generic access. */
+               if (is_leaf)
+                       return rmib_readwrite(&call, rnode, oldp, newp);
+
+               /* No function and not a leaf?  Descend further. */
+       }
+
+       /* If we get here, the name refers to a node array. */
+       return EISDIR;
+}
+
+/*
+ * Initialize the given node and recursively all its node-type children,
+ * assigning the proper child length value to each of them.
+ */
+static void
+rmib_init(struct rmib_node * rnode)
+{
+       struct rmib_node *rchild;
+       unsigned int id;
+
+       rchild = rnode->rnode_cptr;
+
+       for (id = 0; id < rnode->rnode_size; id++, rchild++) {
+               if (rchild->rnode_flags == 0)
+                       continue;
+
+               rnode->rnode_clen++;
+
+               if (SYSCTL_TYPE(rchild->rnode_flags) == CTLTYPE_NODE)
+                       rmib_init(rchild); /* recurse */
+       }
+}
+
+/*
+ * Register a MIB subtree.  Initialize the subtree, add it to the local set,
+ * and send a registration request for it to the MIB service.
+ */
+int
+rmib_register(const int * name, unsigned int namelen, struct rmib_node * rnode)
+{
+       message m;
+       unsigned int id, free_id;
+       int r;
+
+       /* A few basic sanity checks. */
+       if (namelen == 0 || namelen >= CTL_SHORTNAME)
+               return EINVAL;
+       if (SYSCTL_TYPE(rnode->rnode_flags) != CTLTYPE_NODE)
+               return EINVAL;
+
+       /* Make sure this is a new subtree, and find a free slot for it. */
+       for (id = free_id = 0; id < __arraycount(rnodes); id++) {
+               if (rnodes[id] == rnode)
+                       return EEXIST;
+               else if (rnodes[id] == NULL && rnodes[free_id] != NULL)
+                       free_id = id;
+       }
+
+       if (rnodes[free_id] != NULL)
+               return ENOMEM;
+
+       /*
+        * Initialize the entire subtree.  This will also compute rnode_clen
+        * for the given rnode, so do this before sending the message.
+        */
+       rmib_init(rnode);
+
+       /*
+        * Request that the MIB service mount this subtree.  This is a one-way
+        * request, so we never hear whether mounting succeeds.  There is not
+        * that much we can do if it fails anyway though.
+        */
+       memset(&m, 0, sizeof(m));
+
+       m.m_type = MIB_REGISTER;
+       m.m_lsys_mib_register.root_id = free_id;
+       m.m_lsys_mib_register.flags = SYSCTL_VERSION | rnode->rnode_flags;
+       m.m_lsys_mib_register.csize = rnode->rnode_size;
+       m.m_lsys_mib_register.clen = rnode->rnode_clen;
+       m.m_lsys_mib_register.miblen = namelen;
+       memcpy(m.m_lsys_mib_register.mib, name, sizeof(name[0]) * namelen);
+
+       if ((r = asynsend3(MIB_PROC_NR, &m, AMF_NOREPLY)) == OK)
+               rnodes[free_id] = rnode;
+
+       return r;
+}
+
+/*
+ * Deregister a previously registered subtree, both internally and with the MIB
+ * service.  Return OK if the deregistration procedure has been started, in
+ * which case the given subtree is guaranteed to no longer be accessed.  Return
+ * a negative error code on failure.
+ */
+int
+rmib_deregister(struct rmib_node * rnode)
+{
+       message m;
+       unsigned int id;
+
+       for (id = 0; id < __arraycount(rnodes); id++)
+               if (rnodes[id] == rnode)
+                       break;
+
+       if (id == __arraycount(rnodes))
+               return ENOENT;
+
+       rnodes[id] = NULL;
+
+       /*
+        * Request that the MIB service unmount the subtree.  We completely
+        * ignore failure here, because the caller would not be able to do
+        * anything about it anyway.  We may also still receive sysctl call
+        * requests for the node we just deregistered, but this is caught
+        * during request processing.  Reuse of the rnodes[] slot could be a
+        * potential problem though.  We could use sequence numbers in the root
+        * identifiers to resolve that problem if it ever occurs in reality.
+        */
+       memset(&m, 0, sizeof(m));
+
+       m.m_type = MIB_DEREGISTER;
+       m.m_lsys_mib_register.root_id = id;
+
+       (void)asynsend3(MIB_PROC_NR, &m, AMF_NOREPLY);
+
+       return OK;
+}
+
+/*
+ * Reset all registrations, without involving MIB communication.  This call
+ * must be issued only when the caller has determined that the MIB service has
+ * restarted, and is about to reregister its subtrees.
+ */
+void
+rmib_reset(void)
+{
+
+       memset(rnodes, 0, sizeof(rnodes));
+}
+
+/*
+ * Process a request from the MIB service for information about the root node
+ * of a subtree, specifically its name and description.
+ */
+static int
+rmib_info(const message * m_in)
+{
+       struct rmib_node *rnode;
+       unsigned int id;
+       const char *ptr;
+       size_t size;
+       int r;
+
+       id = m_in->m_mib_lsys_info.root_id;
+       if (id >= __arraycount(rnodes) || rnodes[id] == NULL)
+               return ENOENT;
+       rnode = rnodes[id];
+
+       /* The name must fit.  If it does not, the service writer messed up. */
+       size = strlen(rnode->rnode_name) + 1;
+       if (size > m_in->m_mib_lsys_info.name_size)
+               return ENAMETOOLONG;
+
+       r = sys_safecopyto(m_in->m_source, m_in->m_mib_lsys_info.name_grant, 0,
+           (vir_bytes)rnode->rnode_name, size);
+       if (r != OK)
+               return r;
+
+       /* If there is no (optional) description, copy out an empty string. */
+       ptr = (rnode->rnode_desc != NULL) ? rnode->rnode_desc : "";
+       size = strlen(ptr) + 1;
+
+       if (size > m_in->m_mib_lsys_info.desc_size)
+               size = m_in->m_mib_lsys_info.desc_size;
+
+       return sys_safecopyto(m_in->m_source, m_in->m_mib_lsys_info.desc_grant,
+           0, (vir_bytes)ptr, size);
+}
+
+/*
+ * Process a request from the MIB service.  The given message should originate
+ * from the MIB service and have one of the COMMON_MIB_ requests as type.
+ */
+void
+rmib_process(const message * m_in, int ipc_status)
+{
+       message m_out;
+       uint32_t req_id;
+       ssize_t r;
+
+       /* Only the MIB service may issue these requests. */
+       if (m_in->m_source != MIB_PROC_NR)
+               return;
+
+       /* Process the actual request. */
+       switch (m_in->m_type) {
+       case COMMON_MIB_INFO:
+               req_id = m_in->m_mib_lsys_info.req_id;
+
+               r = rmib_info(m_in);
+
+               break;
+
+       case COMMON_MIB_CALL:
+               req_id = m_in->m_mib_lsys_call.req_id;
+
+               r = rmib_call(m_in);
+
+               break;
+
+       default:
+               /*
+                * HACK: assume that for all current and future requests, the
+                * request ID field is in the same place.  We could create a
+                * m_mib_lsys_unknown pseudo message type for this, but, eh.
+                */
+               req_id = m_in->m_mib_lsys_info.req_id;
+
+               r = ENOSYS;
+       }
+
+       /* Construct and send a reply message to the MIB service. */
+       memset(&m_out, 0, sizeof(m_out));
+
+       m_out.m_type = COMMON_MIB_REPLY;
+       m_out.m_lsys_mib_reply.req_id = req_id;
+       m_out.m_lsys_mib_reply.status = r;
+
+       if (IPC_STATUS_CALL(ipc_status) == SENDREC)
+               r = ipc_sendnb(m_in->m_source, &m_out);
+       else
+               r = asynsend3(m_in->m_source, &m_out, AMF_NOREPLY);
+
+       if (r != OK)
+               printf("lsys:rmib: unable to send reply to %d: %d\n",
+                   m_in->m_source, r);
+}
diff --git a/minix/servers/mib/Makefile b/minix/servers/mib/Makefile

index cc5b04f5cccdd09d94aab30a57db37a92d95578f..bc241a0e2ce902fd0470e5e4e5601e1846a1b434 100644 (file)
--- a/minix/servers/mib/Makefile
+++ b/minix/servers/mib/Makefile
@@ -1,7 +1,7 @@
  # Makefile for the Management Information Base (MIB) server
  
  PROG=  mib
-SRCS=  main.c tree.c kern.c vm.c hw.c proc.c minix.c
+SRCS=  main.c tree.c remote.c kern.c vm.c hw.c proc.c minix.c
  
  CPPFLAGS+= -I${NETBSDSRCDIR}/minix
  
diff --git a/minix/servers/mib/main.c b/minix/servers/mib/main.c

index 8101c0e030e3346993b4ab6d28a7f9c35c6758e4..51d7492e99d88e7cc4cfedba196d7b8890968f2f 100644 (file)
--- a/minix/servers/mib/main.c
+++ b/minix/servers/mib/main.c
@@ -18,6 +18,17 @@
   * service needs superuser privileges because it may need to issue privileged
   * calls and obtain privileged information from other services.
   *
+ * While most of the sysctl tree is maintained locally, the MIB service also
+ * allows other services to register "remote" subtrees which are then handled
+ * entirely by those services.  This feature, which works much like file system
+ * mounting, allows 1) sysctl handling code to stay local to its corresponding
+ * service, and 2) parts of the sysctl tree to adapt and expand dynamically as
+ * optional services are started and stopped.  Compared to the MIB service's
+ * local handling, remotely handled subtrees are subject to several additional
+ * practical restrictions, hoever.  In the current implementation, the MIB
+ * service makes blocking calls to remote services as needed; in the future,
+ * these interactions could be made (more) asynchronous.
+ *
   * The MIB service was created by David van Moolenbroek <david@minix3.org>.
   */
  
@@ -25,14 +36,17 @@
  
  /*
   * Most of these initially empty nodes are filled in by their corresponding
- * modules' _init calls; see mib_init below.  However, CTL_USER stays empty:
- * the libc sysctl(3) wrapper code takes care of that subtree.  It must have
- * an entry here though, or sysctl(8) will not list it.  CTL_VENDOR is also
- * empty, but writable, so that it may be used by third parties.
+ * modules' _init calls; see mib_init below.  However, some subtrees are not
+ * populated by the MIB service itself.  CTL_NET is expected to be populated
+ * through registration of remote subtrees.  The libc sysctl(3) wrapper code
+ * takes care of the CTL_USER subtree.  It must have an entry here though, or
+ * sysctl(8) will not list it.  CTL_VENDOR is also empty, but writable, so that
+ * it may be used by third parties.
   */
  static struct mib_node mib_table[] = {
  /* 1*/ [CTL_KERN]      = MIB_ENODE(_P | _RO, "kern", "High kernel"),
  /* 2*/ [CTL_VM]        = MIB_ENODE(_P | _RO, "vm", "Virtual memory"),
+/* 4*/ [CTL_NET]       = MIB_ENODE(_P | _RO, "net", "Networking"),
  /* 6*/ [CTL_HW]        = MIB_ENODE(_P | _RO, "hw", "Generic CPU, I/O"),
  /* 8*/ [CTL_USER]      = MIB_ENODE(_P | _RO, "user", "User-level"),
  /*11*/ [CTL_VENDOR]    = MIB_ENODE(_P | _RW, "vendor", "Vendor specific"),
@@ -45,7 +59,7 @@ static struct mib_node mib_table[] = {
   * node is writable by default, so that programs such as init(8) may create
   * their own top-level entries.
   */
-static struct mib_node mib_root = MIB_NODE(_RW, mib_table, "", "");
+struct mib_node mib_root = MIB_NODE(_RW, mib_table, "", "");
  
  /*
   * Structures describing old and new data as provided by userland.  The primary
@@ -187,6 +201,56 @@ mib_copyin_aux(struct mib_newp * __restrict newp, vir_bytes addr,
         return sys_datacopy(newp->newp_endpt, addr, SELF, (vir_bytes)buf, len);
  }
  
+/*
+ * Create a grant for a call's old data region, if not NULL, for the given
+ * endpoint.  On success, store the grant (or GRANT_INVALID) in grantp and the
+ * length in lenp, and return OK.  On error, return an error code that must not
+ * be ENOMEM.
+ */
+int
+mib_relay_oldp(endpoint_t endpt, struct mib_oldp * __restrict oldp,
+       cp_grant_id_t * grantp, size_t * __restrict lenp)
+{
+
+       if (oldp != NULL) {
+               *grantp = cpf_grant_magic(endpt, oldp->oldp_endpt,
+                   oldp->oldp_addr, oldp->oldp_len, CPF_WRITE);
+               if (!GRANT_VALID(*grantp))
+                       return EINVAL;
+               *lenp = oldp->oldp_len;
+       } else {
+               *grantp = GRANT_INVALID;
+               *lenp = 0;
+       }
+
+       return OK;
+}
+
+/*
+ * Create a grant for a call's new data region, if not NULL, for the given
+ * endpoint.  On success, store the grant (or GRANT_INVALID) in grantp and the
+ * length in lenp, and return OK.  On error, return an error code that must not
+ * be ENOMEM.
+ */
+int
+mib_relay_newp(endpoint_t endpt, struct mib_newp * __restrict newp,
+       cp_grant_id_t * grantp, size_t * __restrict lenp)
+{
+
+       if (newp != NULL) {
+               *grantp = cpf_grant_magic(endpt, newp->newp_endpt,
+                   newp->newp_addr, newp->newp_len, CPF_READ);
+               if (!GRANT_VALID(*grantp))
+                       return EINVAL;
+               *lenp = newp->newp_len;
+       } else {
+               *grantp = GRANT_INVALID;
+               *lenp = 0;
+       }
+
+       return OK;
+}
+
  /*
   * Check whether the user is allowed to perform privileged operations.  The
   * function returns a nonzero value if this is the case, and zero otherwise.
@@ -211,7 +275,8 @@ mib_authed(struct mib_call * call)
   * Implement the sysctl(2) system call.
   */
  static int
-mib_sysctl(message * __restrict m_in, message * __restrict m_out)
+mib_sysctl(message * __restrict m_in, int ipc_status,
+       message * __restrict m_out)
  {
         vir_bytes oldaddr, newaddr;
         size_t oldlen, newlen;
@@ -223,6 +288,10 @@ mib_sysctl(message * __restrict m_in, message * __restrict m_out)
         struct mib_call call;
         ssize_t r;
  
+       /* Only handle blocking calls.  Ignore everything else. */
+       if (IPC_STATUS_CALL(ipc_status) != SENDREC)
+               return EDONTREPLY;
+
         endpt = m_in->m_source;
         oldaddr = m_in->m_lc_mib_sysctl.oldp;
         oldlen = m_in->m_lc_mib_sysctl.oldlen;
@@ -281,7 +350,7 @@ mib_sysctl(message * __restrict m_in, message * __restrict m_out)
         call.call_flags = 0;
         call.call_reslen = 0;
  
-       r = mib_dispatch(&call, &mib_root, oldpp, newpp);
+       r = mib_dispatch(&call, oldpp, newpp);
  
         /*
          * From NetBSD: we copy out as much as we can from the old data, while
@@ -332,7 +401,10 @@ mib_init(int type __unused, sef_init_info_t * info __unused)
          * Now that the static tree is complete, go through the entire tree,
          * initializing miscellaneous fields.
          */
-       mib_tree_init(&mib_root);
+       mib_tree_init();
+
+       /* Prepare for requests to mount remote subtrees. */
+       mib_remote_init();
  
         return OK;
  }
@@ -385,19 +457,34 @@ main(void)
  
                 switch (m_in.m_type) {
                 case MIB_SYSCTL:
-                       r = mib_sysctl(&m_in, &m_out);
+                       r = mib_sysctl(&m_in, ipc_status, &m_out);
+
+                       break;
+
+               case MIB_REGISTER:
+                       r = mib_register(&m_in, ipc_status);
+
+                       break;
+
+               case MIB_DEREGISTER:
+                       r = mib_deregister(&m_in, ipc_status);
  
                         break;
  
                 default:
-                       r = ENOSYS;
+                       if (IPC_STATUS_CALL(ipc_status) == SENDREC)
+                               r = ENOSYS;
+                       else
+                               r = EDONTREPLY;
                 }
  
-               /* Send the reply. */
-               m_out.m_type = r;
+               /* Send a reply, if applicable. */
+               if (r != EDONTREPLY) {
+                       m_out.m_type = r;
  
-               if ((r = ipc_sendnb(m_in.m_source, &m_out)) != OK)
-                       printf("MIB: ipc_sendnb failed (%d)\n", r);
+                       if ((r = ipc_sendnb(m_in.m_source, &m_out)) != OK)
+                               printf("MIB: ipc_sendnb failed (%d)\n", r);
+               }
         }
  
         /* NOTREACHED */
diff --git a/minix/servers/mib/mib.h b/minix/servers/mib/mib.h

index e9c598aef8520445cd5b83b193a78f177c323e63..413a109cc899a044a1c2011cf9bfb021e5a47e4e 100644 (file)
--- a/minix/servers/mib/mib.h
+++ b/minix/servers/mib/mib.h
@@ -22,6 +22,14 @@
   */
  #define MINIX_TEST_SUBTREE     1       /* include the minix.test subtree? */
  
+/*
+ * By default, mount request failures will be silently discarded, because the
+ * requests themselves are one-way.  For service authors, a bit more output may
+ * be helpful.  Set the following defininition to "printf s" in order to
+ * include more information about mount requests and failures.
+ */
+#define MIB_DEBUG_MOUNT(s)     /* printf s */
+
  struct mib_oldp;
  struct mib_newp;
  
@@ -42,8 +50,8 @@ struct mib_call {
  #define MIB_FLAG_NOAUTH                0x02    /* user verified to be regular user */
  
  /*
- * We reassign new meaning to two NetBSD node flags, because we do not use the
- * flags in the way NetBSD does:
+ * We reassign new meaning to three NetBSD node flags, because we do not use
+ * the flags in the way NetBSD does:
   *
   * - On NetBSD, CTLFLAG_ROOT is used to mark the root of the sysctl tree.  The
   *   entire root node is not exposed to userland, and thus, neither is this
@@ -52,13 +60,18 @@ struct mib_call {
   *   node, presumably to avoid having to duplicate entire subtrees.  We can
   *   simply have two nodes point to the same subtree instead, and thus, we do
   *   not need to support this functionality at all.
+ * - On NetBSD, CTLFLAG_MMAP is defined for future support for memory-mapping
+ *   node data with CTL_MMAP.  It is not yet clear where or why this feature
+ *   would be used in practice.  For as long as NetBSD does not actually use
+ *   this flag *for node-type nodes*, we can reuse it for our own purposes.
   *
   * The meaning of our replacement flags is explained further below.  We ensure
- * that neither of these flags are ever exposed to userland.  As such, our own
+ * that none of these flags are ever exposed to userland.  As such, our own
   * definitions can be changed as necessary without breaking anything.
   */
  #define CTLFLAG_PARENT CTLFLAG_ROOT    /* node is a real parent node */
  #define CTLFLAG_VERIFY CTLFLAG_ALIAS   /* node has verification function */
+#define CTLFLAG_REMOTE CTLFLAG_MMAP    /* node is root of remote subtree */
  
  /*
   * The following node structure definition aims to meet several goals at once:
@@ -66,12 +79,14 @@ struct mib_call {
   * 1) it can be used for static and dynamic nodes;
   * 2) it can be used to point to both static and dynamic child arrays at once;
   * 3) it allows for embedded, pointed-to, and function-generated data;
- * 4) its unions are compatible with magic instrumentation;
- * 5) it is optimized for size, assuming many static and few dynamic nodes.
+ * 4) it allows both temporary and obscuring mount points for remote subtrees;
+ * 5) its unions are compatible with magic instrumentation;
+ * 6) it is optimized for size, assuming many static and few dynamic nodes.
   *
- * All nodes have flags, a size, a version, a name, and optionally a
- * description.  The use of the rest of the fields depends on the type of the
- * node, which is defined by part of the flags field.
+ * All nodes have flags, a size, a version, a parent (except the root node), a
+ * name, and optionally a description.  The use of the rest of the fields
+ * depends on the type of the node, which is defined as part of the node's
+ * flags field.
   *
   * Data nodes, that is, nodes of type CTLTYPE_{BOOL,INT,QUAD,STRING,STRUCT},
   * have associated data.  For types CTLTYPE_{BOOL,INT,QUAD}, the node may have
@@ -90,24 +105,61 @@ struct mib_call {
   * (size, immediate and/or pointer) data fields as it sees fit.
   *
   * Node-type nodes, of type CTLTYPE_NODE, behave differently.  Such nodes may
- * have either static and dynamic child nodes, or an associated function.  Such
- * a function handles all access to the entire subtree.  If no function is set,
- * the CTLFLAG_PARENT flag is set, to indicate that this node is the root of a
- * real subtree; CTLFLAG_PARENT must not be set if the node has an associated
- * function.  For real node-type nodes (with CTLFLAG_PARENT set), node_size is
- * the number (not size!) of the array of static child nodes, which is pointed
- * to by node_scptr and indexed by child identifier.  Within the static array,
- * child nodes with zeroed flags fields are not in use.  The node_dcptr field
- * points to a linked list of dynamic child nodes. The node_csize field is set
- * to the size of the static array plus the number of dynamic nodes; node_clen
- * is set to the number of valid entries in the static array plus the number of
- * dynamic nodes.  If a function is set, none of these fields are used, and the
- * node_size field is typically (but not necessarily) set to zero.
+ * have static and dynamic child nodes, or have an associated function, or be
+ * a mount point for a subtree handled by a remote process.  The exact case is
+ * defined by the combination of the CTLFLAG_PARENT and CTLFLAG_REMOTE flags,
+ * yielding four possible cases:
+ *
+ * CTLFLAG_PARENT  CTLFLAG_REMOTE  Meaning
+ *     not set         not set     The node has an associated function which
+ *                                 handles all access to the entire subtree.
+ *       set           not set     The node is the root of a real, local
+ *                                 subtree with static and/or dynamic children.
+ *     not set           set       The node is a temporarily created mount
+ *                                 point for a remote tree.  A remote service
+ *                                 handles all access to the entire subtree.
+ *                                 Unmounting the node also destroys the node.
+ *       set             set       The node is a mount point that obscures a
+ *                                 real, local subtree.  A remote service
+ *                                 handles all access to the entire subtree.
+ *                                 Unmounting makes the original node visible.
+ *
+ * If the CTLFLAG_PARENT flag is set, the node is the root of a real sutree.
+ * For such nodes, node_size is the number (not size!) of the array of static
+ * child nodes, which is pointed to by node_scptr and indexed by child
+ * identifier.  Within the static array, child nodes with zeroed flags fields
+ * are not in use.  The node_dcptr field points to a linked list of dynamic
+ * child nodes. The node_csize field is set to the size of the static array
+ * plus the number of dynamic nodes; node_clen is set to the number of valid
+ * entries in the static array plus the number of dynamic nodes.
+ *
+ * If a function is set, and thus neither CTLFLAG_PARENT and CTLFLAG_REMOTE are
+ * set, none of the aforementioned fields are used, and the node_size field is
+ * typically (but not necessarily) set to zero.
+ *
+ * A remote service can mount its own subtree into the central MIB tree.  The
+ * MIB service will then relay any requests for that subtree to the remote
+ * service.  Both the mountpoint and the root of the remote subtree must be of
+ * type CTLTYPE_NODE; thus, no individual leaf nodes may be mounted.  The mount
+ * point may either be created temporarily for the purpose of mounting (e.g.,
+ * net.inet), or it may override a preexisting node (e.g., kern.ipc).  In the
+ * first case, the parent node must exist and be a node type (net).  In the
+ * second case, the preexisting target node (the MIB service's kern.ipc) may
+ * not have an associated function and may only have static children.  While
+ * being used as a mountpoint (i.e., have CTLFLAG_REMOTE set), the local node's
+ * node_csize and node_clen fields must not be used.  Instead, the same space
+ * in the node structure is used to store information about the remote node:
+ * node_rid, node_tid, and the smaller node_rcsize and node_rclen which contain
+ * information about the root of the remote subtree.  Remote nodes are also
+ * part of a linked list for administration purposes, using the node_next
+ * field.  When a preexisting (CTLFLAG_PARENT) node is unmounted, its original
+ * node_csize and node_clen fields are recomputed.
   *
   * The structure uses unions for either only pointers or only non-pointers, to
   * simplify live update support.  However, this does not mean the structure is
- * not fully used: real node-type nodes use node_{flags,size,ver,csize,clen,
- * scptr,dcptr,name,desc}, which together add up to the full structure size.
+ * not fully used: real node-type nodes use node_{flags,size,ver,parent,csize,
+ * clen,scptr,dcptr,name,desc}, which together add up to the full structure
+ * size.
   */
  struct mib_node;
  struct mib_dynode;
@@ -117,47 +169,77 @@ typedef ssize_t (*mib_func_ptr)(struct mib_call *, struct mib_node *,
  typedef int (*mib_verify_ptr)(struct mib_call *, struct mib_node *, void *,
         size_t);
  
+/*
+ * To save space for the maintenance of remote nodes, we split up one uint32_t
+ * field into three subfields:
+ * - node_eid ("endpoint ID"), which is an index into the table of endpoints;
+ * - node_rcsize ("child size"), the number of child slots of the remote root;
+ * - node_rclen ("child length"), the number of children of the remote root.
+ * These fields impose limits on the number of endpoints known in the MIB
+ * service, and the maximum size of the remote subtree root.
+ */
+#define MIB_EID_BITS   5       /* up to 32 services can set remote subtrees */
+#define MIB_RC_BITS    12      /* remote root may have up to 4096 children */
+
+#if MIB_EID_BITS + 2 * MIB_RC_BITS > 32
+#error "Sum of remote ID and remote children bit fields exceeds uint32_t size"
+#endif
+
  struct mib_node {
-       uint32_t node_flags;            /* CTLTYPE_ type and CTLFLAGS_ flags */
+       uint32_t node_flags;            /* CTLTYPE_ type and CTLFLAG_ flags */
         size_t node_size;               /* size of associated data (bytes) */
         uint32_t node_ver;              /* node version */
+       struct mib_node *node_parent;   /* pointer to parent node */
         union ixfer_node_val_u {
                 struct {
                         uint32_t nvuc_csize;    /* number of child slots */
                         uint32_t nvuc_clen;     /* number of actual children */
                 } nvu_child;
-               int nvu_int;            /* immediate integer */
+               struct {
+                       uint32_t nvur_eid:MIB_EID_BITS; /* endpoint index */
+                       uint32_t nvur_csize:MIB_RC_BITS;/* remote ch. slots */
+                       uint32_t nvur_clen:MIB_RC_BITS; /* remote children */
+                       uint32_t nvur_rid;      /* opaque ID of remote root */
+               } nvu_remote;
                 bool nvu_bool;          /* immediate boolean */
+               int nvu_int;            /* immediate integer */
                 u_quad_t nvu_quad;      /* immediate quad */
         } node_val_u;
         union pxfer_node_ptr_u {
-               void *npu_data; /* struct or string data pointer */
+               void *npu_data;         /* struct or string data pointer */
                 struct mib_node *npu_scptr;     /* static child node array */
         } node_ptr_u;
         union pxfer_node_aux_u {
                 struct mib_dynode *nau_dcptr;   /* dynamic child node list */
                 mib_func_ptr nau_func;          /* handler function */
                 mib_verify_ptr nau_verify;      /* verification function */
+               struct mib_node *nau_next;      /* next remote node in list */
         } node_aux_u;
         const char *node_name;          /* node name string */
         const char *node_desc;          /* node description (may be NULL) */
  };
  #define node_csize     node_val_u.nvu_child.nvuc_csize
  #define node_clen      node_val_u.nvu_child.nvuc_clen
-#define node_int       node_val_u.nvu_int
+#define node_eid       node_val_u.nvu_remote.nvur_eid
+#define node_rcsize    node_val_u.nvu_remote.nvur_csize
+#define node_rclen     node_val_u.nvu_remote.nvur_clen
+#define node_rid       node_val_u.nvu_remote.nvur_rid
  #define node_bool      node_val_u.nvu_bool
+#define node_int       node_val_u.nvu_int
  #define node_quad      node_val_u.nvu_quad
  #define node_data      node_ptr_u.npu_data
  #define node_scptr     node_ptr_u.npu_scptr
  #define node_dcptr     node_aux_u.nau_dcptr
  #define node_func      node_aux_u.nau_func
  #define node_verify    node_aux_u.nau_verify
+#define node_next      node_aux_u.nau_next
  
  /*
   * This structure is used for dynamically allocated nodes, that is, nodes
   * created by userland at run time.  It contains not only the fields below, but
   * also the full name and, for leaf nodes with non-immediate data, the actual
- * data area.
+ * data area, or, for temporary mount points for remote subtrees, the node's
+ * description.
   */
  struct mib_dynode {
         struct mib_dynode *dynode_next; /* next in linked dynamic node list */
@@ -179,13 +261,6 @@ struct mib_dynode {
         .node_name = n,                                                 \
         .node_desc = d                                                  \
  }
-#define MIB_INT(f,i,n,d) {                                             \
-       .node_flags = CTLTYPE_INT | CTLFLAG_IMMEDIATE | f,              \
-       .node_size = sizeof(int),                                       \
-       .node_int = i,                                                  \
-       .node_name = n,                                                 \
-       .node_desc = d                                                  \
-}
  #define MIB_BOOL(f,b,n,d) {                                            \
         .node_flags = CTLTYPE_BOOL | CTLFLAG_IMMEDIATE | f,             \
         .node_size = sizeof(bool),                                      \
@@ -193,6 +268,13 @@ struct mib_dynode {
         .node_name = n,                                                 \
         .node_desc = d                                                  \
  }
+#define MIB_INT(f,i,n,d) {                                             \
+       .node_flags = CTLTYPE_INT | CTLFLAG_IMMEDIATE | f,              \
+       .node_size = sizeof(int),                                       \
+       .node_int = i,                                                  \
+       .node_name = n,                                                 \
+       .node_desc = d                                                  \
+}
  #define MIB_QUAD(f,q,n,d) {                                            \
         .node_flags = CTLTYPE_QUAD | CTLFLAG_IMMEDIATE | f,             \
         .node_size = sizeof(u_quad_t),                                  \
@@ -200,16 +282,18 @@ struct mib_dynode {
         .node_name = n,                                                 \
         .node_desc = d                                                  \
  }
-#define MIB_DATA(f,s,n,d) {                                            \
+#define _MIB_DATA(f,s,p,n,d) {                                         \
         .node_flags = f,                                                \
-       .node_size = sizeof(s),                                         \
-       .node_data = __UNCONST(s),                                      \
+       .node_size = s,                                                 \
+       .node_data = __UNCONST(p),                                      \
         .node_name = n,                                                 \
         .node_desc = d                                                  \
  }
-#define MIB_STRING(f,p,n,d)    MIB_DATA(CTLTYPE_STRING | f, p, n, d)
-#define MIB_STRUCT(f,p,n,d)    MIB_DATA(CTLTYPE_STRUCT | f, p, n, d)
-#define MIB_INTPTR(f,p,n,d)    MIB_DATA(CTLTYPE_INT | f, p, n, d)
+#define MIB_BOOLPTR(f,p,n,d)  _MIB_DATA(CTLTYPE_BOOL | f, sizeof(*p), p, n, d)
+#define MIB_INTPTR(f,p,n,d)   _MIB_DATA(CTLTYPE_INT | f, sizeof(*p), p, n, d)
+#define MIB_QUADTR(f,p,n,d)   _MIB_DATA(CTLTYPE_QUAD | f, sizeof(*p), p, n, d)
+#define MIB_STRING(f,p,n,d)   _MIB_DATA(CTLTYPE_STRING | f, sizeof(p), p, n, d)
+#define MIB_STRUCT(f,s,p,n,d) _MIB_DATA(CTLTYPE_STRUCT | f, s, p, n, d)
  #define MIB_FUNC(f,s,fp,n,d) {                                         \
         .node_flags = f,                                                \
         .node_size = s,                                                 \
@@ -258,16 +342,32 @@ size_t mib_getnewlen(struct mib_newp *);
  int mib_copyin(struct mib_newp * __restrict, void * __restrict, size_t);
  int mib_copyin_aux(struct mib_newp * __restrict, vir_bytes,
         void * __restrict, size_t);
+int mib_relay_oldp(endpoint_t, struct mib_oldp * __restrict, cp_grant_id_t *,
+       size_t * __restrict);
+int mib_relay_newp(endpoint_t, struct mib_newp * __restrict, cp_grant_id_t *,
+       size_t * __restrict);
  int mib_authed(struct mib_call *);
+extern struct mib_node mib_root;
  
  /* tree.c */
  ssize_t mib_readwrite(struct mib_call *, struct mib_node *, struct mib_oldp *,
         struct mib_newp *, mib_verify_ptr);
-ssize_t mib_dispatch(struct mib_call *, struct mib_node *, struct mib_oldp *,
-       struct mib_newp *);
-void mib_tree_init(struct mib_node *);
-extern unsigned int nodes;
-extern unsigned int objects;
+ssize_t mib_dispatch(struct mib_call *, struct mib_oldp *, struct mib_newp *);
+void mib_tree_init(void);
+int mib_mount(const int *, unsigned int, unsigned int, uint32_t, uint32_t,
+       unsigned int, unsigned int, struct mib_node **);
+void mib_unmount(struct mib_node *);
+extern unsigned int mib_nodes;
+extern unsigned int mib_objects;
+extern unsigned int mib_remotes;
+
+/* remote.c */
+void mib_remote_init(void);
+int mib_register(const message *, int);
+int mib_deregister(const message *, int);
+int mib_remote_info(unsigned int, uint32_t, char *, size_t, char *, size_t);
+ssize_t mib_remote_call(struct mib_call *, struct mib_node *,
+       struct mib_oldp *, struct mib_newp *);
  
  /* proc.c */
  ssize_t mib_kern_lwp(struct mib_call *, struct mib_node *, struct mib_oldp *,
diff --git a/minix/servers/mib/minix.c b/minix/servers/mib/minix.c

index 683d9506319b17a601b8f747e03078120e52b262..08a1739ef674b18360dc2b2ea90637a7978be1c8 100644 (file)
--- a/minix/servers/mib/minix.c
+++ b/minix/servers/mib/minix.c
@@ -24,7 +24,8 @@ static struct mib_node mib_minix_test_table[] = {
  /* 2*/ [TEST_QUAD]             = MIB_QUAD(_RW, 0, "quad", "Quad test field"),
  /* 3*/ [TEST_STRING]           = MIB_STRING(_RW, test_string, "string",
                                     "String test field"),
-/* 4*/ [TEST_STRUCT]           = MIB_STRUCT(_RW, test_struct, "struct",
+/* 4*/ [TEST_STRUCT]           = MIB_STRUCT(_RW, sizeof(test_struct),
+                                   test_struct, "struct",
                                     "Structure test field"),
  /* 5*/ [TEST_PRIVATE]          = MIB_INT(_RW | CTLFLAG_PRIVATE, -5375,
                                     "private", "Private test field"),
@@ -45,11 +46,14 @@ static struct mib_node mib_minix_test_table[] = {
  
  static struct mib_node mib_minix_mib_table[] = {
  /* 1*/ [MIB_NODES]             = MIB_INTPTR(_P | _RO | CTLFLAG_UNSIGNED,
-                                   &nodes, "nodes",
+                                   &mib_nodes, "nodes",
                                     "Number of nodes in the MIB tree"),
  /* 2*/ [MIB_OBJECTS]           = MIB_INTPTR(_P | _RO | CTLFLAG_UNSIGNED,
-                                   &objects, "objects", "Number of "
+                                   &mib_objects, "objects", "Number of "
                                     "dynamically allocated MIB objects"),
+/* 3*/ [MIB_REMOTES]           = MIB_INTPTR(_P | _RO | CTLFLAG_UNSIGNED,
+                                   &mib_remotes, "remotes",
+                                   "Number of mounted remote MIB subtrees"),
  };
  
  static struct mib_node mib_minix_proc_table[] = {
@@ -63,7 +67,7 @@ static struct mib_node mib_minix_proc_table[] = {
  
  static struct mib_node mib_minix_table[] = {
  #if MINIX_TEST_SUBTREE
-/* 0*/ [MINIX_TEST]            = MIB_NODE(_RW | CTLFLAG_HIDDEN,
+/* 0*/ [MINIX_TEST]            = MIB_NODE(_P | _RW | CTLFLAG_HIDDEN,
                                     mib_minix_test_table, "test",
                                     "Test87 testing ground"),
  #endif /* MINIX_TEST_SUBTREE */
diff --git a/minix/servers/mib/remote.c b/minix/servers/mib/remote.c

new file mode 100644 (file)

index 0000000..24d7fea
--- /dev/null
+++ b/minix/servers/mib/remote.c
@@ -0,0 +1,477 @@
+/* MIB service - remote.c - remote service management and communication */
+
+#include "mib.h"
+
+/*
+ * TODO: the main feature that is missing here is a more active way to
+ * determine that a particular service has died, so that its mount points can
+ * be removed proactively.  Without this, there is a (small) risk that we end
+ * up talking to a recycled endpoint with a service that ignores our request,
+ * resulting in a deadlock of the MIB service.  Right now, the problem is that
+ * there is no proper DS API to subscribe to generic service-down events.
+ *
+ * In the long term, communication to other services should be made
+ * asynchronous, so that the MIB service does not block if there are problems
+ * with the other service.  The protocol should already support this, and some
+ * simplifications are the result of preparing for future asynchrony support
+ * (such as not dynamically querying the remote root node for its properties,
+ * which would be very hard to implement in a nonblocking way).  However,
+ * actual support is missing.  For now we assume that the remote service either
+ * answers the request, or crashes (causing the sendrec to abort), which is
+ * mostly good enough.
+ */
+
+/* This is the maximum number of remote services that may register subtrees. */
+#define MIB_ENDPTS     (1U << MIB_EID_BITS)
+
+/* This is the maximum service label size, including '\0'. */
+#define MIB_LABEL_MAX  16
+
+/* Table of remote endpoints, indexed by mount point nodes' node_eid fields. */
+static struct {
+       endpoint_t endpt;               /* remote endpoint or NONE */
+       struct mib_node *nodes;         /* head of list of mount point nodes */
+       char label[MIB_LABEL_MAX];      /* label of the remote endpoint */
+} endpts[MIB_ENDPTS];
+
+/*
+ * Initialize the table of remote endpoints.
+ */
+void
+mib_remote_init(void)
+{
+       unsigned int i;
+
+       for (i = 0; i < __arraycount(endpts); i++) {
+               endpts[i].endpt = NONE;
+               endpts[i].nodes = NULL;
+       }
+}
+
+/*
+ * The remote endpoint with the given table index has been determined to have
+ * died.  Clean up all its mount points.
+ */
+static void
+mib_down(unsigned int eid)
+{
+       struct mib_node *node, *next_node;
+
+       assert(endpts[eid].endpt != NONE);
+       assert(endpts[eid].nodes != NULL);
+
+       /* Unmount each of the remote endpoint's mount points. */
+       for (node = endpts[eid].nodes; node != NULL; node = next_node) {
+               /* The unmount call may deallocate the node object. */
+               next_node = node->node_next;
+
+               mib_unmount(node);
+       }
+
+       /* Mark the entry itself as no longer in use. */
+       endpts[eid].endpt = NONE;
+       endpts[eid].nodes = NULL;
+}
+
+/*
+ * Obtain the label for the given endpoint.  On success, return OK and store
+ * the label in the given buffer.  If the label cannot be retrieved or does not
+ * fit in the given buffer, return a negative error code.
+ */
+static int
+mib_get_label(endpoint_t endpt, char * label, size_t labelsize)
+{
+       char key[DS_MAX_KEYLEN];
+       int r;
+
+       /* TODO: init has a label, so this is not a proper is-service test! */
+       if ((r = ds_retrieve_label_name(key, endpt)) != OK) {
+               printf("MIB: unable to obtain label for %d\n", endpt);
+
+               return r;
+       }
+
+       key[sizeof(key) - 1] = 0;
+       if (strlen(key) >= labelsize) {
+               /* This should really never happen. */
+               printf("MIB: service %d label '%s' is too long\n", endpt, key);
+
+               return ENAMETOOLONG;
+       }
+
+       strlcpy(label, key, labelsize);
+       return OK;
+}
+
+/*
+ * Register a remote subtree, mounting it in the local tree as requested.
+ */
+static void
+mib_do_register(endpoint_t endpt, const char * label, uint32_t rid,
+       uint32_t flags, unsigned int csize, unsigned int clen, const int * mib,
+       unsigned int miblen)
+{
+       struct mib_node *node;
+       unsigned int eid;
+       int r, free_eid;
+
+       /*
+        * See if we already have a remote endpoint for the service's label.
+        * If so, we can safely assume that the old endpoint has died and we
+        * have to unmount any previous entries.  Also find a free entry for
+        * the remote endpoint if it is new.
+        */
+       free_eid = -1;
+       for (eid = 0; eid < __arraycount(endpts); eid++) {
+               if (endpts[eid].endpt == endpt)
+                       break;
+               else if (endpts[eid].endpt != NONE &&
+                   !strcmp(endpts[eid].label, label)) {
+                       mib_down(eid);
+
+                       assert(endpts[eid].endpt == NONE);
+                       assert(endpts[eid].nodes == NULL);
+
+                       break;
+               } else if (endpts[eid].endpt == NONE && free_eid < 0)
+                       free_eid = eid;
+       }
+
+       if (eid == __arraycount(endpts)) {
+               if (free_eid < 0) {
+                       printf("MIB: remote endpoints table is full!\n");
+
+                       return;
+               }
+
+               eid = free_eid;
+       }
+
+       /*
+        * Make sure that the caller does not introduce two mount points with
+        * the same ID.  Right now we refuse such requests; instead, we could
+        * also choose to first deregister the old mount point with this ID.
+        */
+       for (node = endpts[eid].nodes; node != NULL; node = node->node_next) {
+               if (node->node_rid == rid)
+                       break;
+       }
+
+       if (node != NULL) {
+               MIB_DEBUG_MOUNT(("MIB: service %d tried to reuse ID %"PRIu32
+                   "\n", endpt, rid));
+
+               return;
+       }
+
+       /*
+        * If we did not already have an entry for this endpoint, add one now,
+        * because the mib_mount() call will expect it to be there.  If the
+        * mount call fails, we may have to invalidate the entry again.
+        */
+       if (endpts[eid].endpt == NONE) {
+               endpts[eid].endpt = endpt;
+               endpts[eid].nodes = NULL;
+               strlcpy(endpts[eid].label, label, sizeof(endpts[eid].label));
+       }
+
+       /* Attempt to mount the remote subtree in the tree. */
+       r = mib_mount(mib, miblen, eid, rid, flags, csize, clen, &node);
+
+       if (r != OK) {
+               /* If the entry has no other mount points, invalidate it. */
+               if (endpts[eid].nodes == NULL)
+                       endpts[eid].endpt = NONE;
+
+               return;
+       }
+
+       /* Add the new node to the list of mount points of the endpoint. */
+       node->node_next = endpts[eid].nodes;
+       endpts[eid].nodes = node;
+}
+
+/*
+ * Process a mount point registration request from another service.
+ */
+int
+mib_register(const message * m_in, int ipc_status)
+{
+       char label[DS_MAX_KEYLEN];
+
+       /*
+        * Registration messages must be one-way, or they may cause a deadlock
+        * if crossed by a request coming from us.  This case also effectively
+        * eliminates the possibility for userland to register nodes.  The
+        * return value of ENOSYS effectively tells userland that this call
+        * number is not in use, which allows us to repurpose call numbers
+        * later.
+        */
+       if (IPC_STATUS_CALL(ipc_status) == SENDREC)
+               return ENOSYS;
+
+       MIB_DEBUG_MOUNT(("MIB: got register request from %d\n",
+           m_in->m_source));
+
+       /* Double-check if the caller is a service by obtaining its label. */
+       if (mib_get_label(m_in->m_source, label, sizeof(label)) != OK)
+               return EDONTREPLY;
+
+       /* Perform one message-level bounds check here. */
+       if (m_in->m_lsys_mib_register.miblen >
+           __arraycount(m_in->m_lsys_mib_register.mib))
+               return EDONTREPLY;
+
+       /* The rest of the work is handled by a message-agnostic function. */
+       mib_do_register(m_in->m_source, label,
+           m_in->m_lsys_mib_register.root_id, m_in->m_lsys_mib_register.flags,
+           m_in->m_lsys_mib_register.csize, m_in->m_lsys_mib_register.clen,
+           m_in->m_lsys_mib_register.mib, m_in->m_lsys_mib_register.miblen);
+
+       /* Never reply to this message. */
+       return EDONTREPLY;
+}
+
+/*
+ * Deregister a previously registered remote subtree, unmounting it from the
+ * local tree.
+ */
+static void
+mib_do_deregister(endpoint_t endpt, uint32_t rid)
+{
+       struct mib_node *node, **nodep;
+       unsigned int eid;
+
+       for (eid = 0; eid < __arraycount(endpts); eid++) {
+               if (endpts[eid].endpt == endpt)
+                       break;
+       }
+
+       if (eid == __arraycount(endpts)) {
+               MIB_DEBUG_MOUNT(("MIB: deregister request from unknown "
+                   "endpoint %d\n", endpt));
+
+               return;
+       }
+
+       for (nodep = &endpts[eid].nodes; *nodep != NULL;
+           nodep = &node->node_next) {
+               node = *nodep;
+
+               if (node->node_rid == rid)
+                       break;
+       }
+
+       if (*nodep == NULL) {
+               MIB_DEBUG_MOUNT(("MIB: deregister request from %d for unknown "
+                   "ID %"PRIu32"\n", endpt, rid));
+
+               return;
+       }
+
+       /*
+        * The unmount function may or may not deallocate the node object, so
+        * remove it from the linked list first.  If this leaves an empty
+        * linked list, also mark the remote endpoint entry itself as free.
+        */
+       *nodep = node->node_next;
+
+       if (endpts[eid].nodes == NULL) {
+               endpts[eid].endpt = NONE;
+               endpts[eid].nodes = NULL;
+       }
+
+       /* Finally, unmount the remote subtree. */
+       mib_unmount(node);
+}
+
+/*
+ * Process a mount point deregistration request from another service.
+ */
+int
+mib_deregister(const message * m_in, int ipc_status)
+{
+
+       /* Same as for registration messages. */
+       if (IPC_STATUS_CALL(ipc_status) == SENDREC)
+               return ENOSYS;
+
+       MIB_DEBUG_MOUNT(("MIB: got deregister request from %d\n",
+           m_in->m_source));
+
+       /* The rest of the work is handled by a message-agnostic function. */
+       mib_do_deregister(m_in->m_source, m_in->m_lsys_mib_register.root_id);
+
+       /* Never reply to this message. */
+       return EDONTREPLY;
+}
+
+/*
+ * Retrieve information about the root of a remote subtree, specifically its
+ * name and description.  This is done only when there was no corresponding
+ * local node and one has to be created temporarily.  On success, return OK
+ * with the name and description stored in the given buffers.  Otherwise,
+ * return a negative error code.
+ */
+int
+mib_remote_info(unsigned int eid, uint32_t rid, char * name, size_t namesize,
+       char * desc, size_t descsize)
+{
+       endpoint_t endpt;
+       cp_grant_id_t name_grant, desc_grant;
+       message m;
+       int r;
+
+       if (eid >= __arraycount(endpts) || endpts[eid].endpt == NONE)
+               return EINVAL;
+
+       endpt = endpts[eid].endpt;
+
+       if ((name_grant = cpf_grant_direct(endpt, (vir_bytes)name, namesize,
+           CPF_WRITE)) == GRANT_INVALID)
+               return EINVAL;
+
+       if ((desc_grant = cpf_grant_direct(endpt, (vir_bytes)desc, descsize,
+           CPF_WRITE)) == GRANT_INVALID) {
+               cpf_revoke(name_grant);
+
+               return EINVAL;
+       }
+
+       memset(&m, 0, sizeof(m));
+
+       m.m_type = COMMON_MIB_INFO;
+       m.m_mib_lsys_info.req_id = 0; /* reserved for future async support */
+       m.m_mib_lsys_info.root_id = rid;
+       m.m_mib_lsys_info.name_grant = name_grant;
+       m.m_mib_lsys_info.name_size = namesize;
+       m.m_mib_lsys_info.desc_grant = desc_grant;
+       m.m_mib_lsys_info.desc_size = descsize;
+
+       r = ipc_sendrec(endpt, &m);
+
+       cpf_revoke(desc_grant);
+       cpf_revoke(name_grant);
+
+       if (r != OK)
+               return r;
+
+       if (m.m_type != COMMON_MIB_REPLY)
+               return EINVAL;
+       if (m.m_lsys_mib_reply.req_id != 0)
+               return EINVAL;
+
+       return m.m_lsys_mib_reply.status;
+}
+
+/*
+ * Relay a sysctl(2) call from a user process to a remote service, because the
+ * call reached a mount point into a remote subtree.  Return the result code
+ * from the remote service.  Alternatively, return ERESTART if it has been
+ * determined that the remote service is dead, in which case its mount points
+ * will have been removed (possibly including the entire given node), and the
+ * caller should continue the call on the underlying local subtree if there is
+ * any.  Note that the remote service may also return ERESTART to indicate that
+ * the remote subtree does not exist, either because it is being deregistered
+ * or because the remote service was restarted with loss of state.
+ */
+ssize_t
+mib_remote_call(struct mib_call * call, struct mib_node * node,
+       struct mib_oldp * oldp, struct mib_newp * newp)
+{
+       cp_grant_id_t name_grant, oldp_grant, newp_grant;
+       size_t oldp_len, newp_len;
+       endpoint_t endpt;
+       message m;
+       int r;
+
+       endpt = endpts[node->node_eid].endpt;
+       assert(endpt != NONE);
+
+       /*
+        * Allocate grants.  Since ENOMEM has a special meaning for sysctl(2),
+        * never return that code even if it is the most appropriate one.
+        * The remainder of the name may be empty; the callee should check.
+        */
+       name_grant = cpf_grant_direct(endpt, (vir_bytes)call->call_name,
+           call->call_namelen * sizeof(call->call_name[0]), CPF_READ);
+       if (!GRANT_VALID(name_grant))
+               return EINVAL;
+
+       if ((r = mib_relay_oldp(endpt, oldp, &oldp_grant, &oldp_len)) != OK) {
+               cpf_revoke(name_grant);
+
+               return r;
+       }
+
+       if ((r = mib_relay_newp(endpt, newp, &newp_grant, &newp_len)) != OK) {
+               if (GRANT_VALID(oldp_grant))
+                       cpf_revoke(oldp_grant);
+               cpf_revoke(name_grant);
+
+               return r;
+       }
+
+       /*
+        * Construct the request message.  We have not optimized this flow for
+        * performance.  In particular, we never embed even short names in the
+        * message, and we supply a flag indicating whether the caller is root
+        * regardless of whether the callee is interested in this.  This is
+        * more convenient for the callee, but also more costly.
+        */
+       memset(&m, 0, sizeof(m));
+
+       m.m_type = COMMON_MIB_CALL;
+       m.m_mib_lsys_call.req_id = 0; /* reserved for future async support */
+       m.m_mib_lsys_call.root_id = node->node_rid;
+       m.m_mib_lsys_call.name_grant = name_grant;
+       m.m_mib_lsys_call.name_len = call->call_namelen;
+       m.m_mib_lsys_call.oldp_grant = oldp_grant;
+       m.m_mib_lsys_call.oldp_len = oldp_len;
+       m.m_mib_lsys_call.newp_grant = newp_grant;
+       m.m_mib_lsys_call.newp_len = newp_len;
+       m.m_mib_lsys_call.user_endpt = call->call_endpt;
+       m.m_mib_lsys_call.flags = !!mib_authed(call); /* TODO: define flags */
+       m.m_mib_lsys_call.root_ver = node->node_ver;
+       m.m_mib_lsys_call.tree_ver = mib_root.node_ver;
+
+       /* Issue a synchronous call to the remove service. */
+       r = ipc_sendrec(endpt, &m);
+
+       /* Then first clean up. */
+       if (GRANT_VALID(newp_grant))
+               cpf_revoke(newp_grant);
+       if (GRANT_VALID(oldp_grant))
+               cpf_revoke(oldp_grant);
+       cpf_revoke(name_grant);
+
+       /*
+        * Treat any IPC-level error as an indication that there is a problem
+        * with the remote service.  Declare it dead, remove all its mount
+        * points, and return ERESTART to indicate to the caller that it should
+        * (carefully) try to continue the request on a local subtree instead.
+        * Again: mib_down() may actually deallocate the given 'node' object.
+        */
+       if (r != OK) {
+               mib_down(node->node_eid);
+
+               return ERESTART;
+       }
+
+       if (m.m_type != COMMON_MIB_REPLY)
+               return EINVAL;
+       if (m.m_lsys_mib_reply.req_id != 0)
+               return EINVAL;
+
+       /*
+        * If a deregister message from the service crosses our call, we'll get
+        * the response before we get the deregister request.  In that case,
+        * the remote service should return ERESTART to indicate that the mount
+        * point does not exist as far as it is concerned, so that we can try
+        * the local version of the tree instead.
+        */
+       if (m.m_lsys_mib_reply.status == ERESTART)
+               mib_do_deregister(endpt, node->node_rid);
+
+       return m.m_lsys_mib_reply.status;
+}
diff --git a/minix/servers/mib/tree.c b/minix/servers/mib/tree.c

index 2b583a62627299380750e114c123631a6aaf8398..9eba191b8f2d23cccd9ee85af2ac69e2acb043bd 100644 (file)
--- a/minix/servers/mib/tree.c
+++ b/minix/servers/mib/tree.c
@@ -22,8 +22,9 @@
  #define SCRATCH_SIZE   MAX(PAGE_SIZE, sizeof(struct sysctldesc) + MAXDESCLEN)
  static char scratch[SCRATCH_SIZE] __aligned(sizeof(int32_t));
  
-unsigned int nodes;    /* how many nodes are there in the tree? */
-unsigned int objects;  /* how many allocated memory objects are there? */
+unsigned int mib_nodes;                /* how many nodes are there in the tree? */
+unsigned int mib_objects;      /* how many memory objects are allocated? */
+unsigned int mib_remotes;      /* how many remote subtrees are there? */
  
  /*
   * Find a node through its parent node and identifier.  Return the node if it
@@ -99,12 +100,12 @@ mib_copyout_node(struct mib_call * call, struct mib_oldp * oldp, size_t off,
         memset(&scn, 0, sizeof(scn));
  
         /*
-        * We use CTLFLAG_PARENT and CTLFLAG_VERIFY internally only.  NetBSD
-        * uses the values of these flags for different purposes.  Either way,
-        * do not expose them to userland.
+        * We use CTLFLAG_PARENT, CTLFLAG_VERIFY, and CTLFLAG_REMOTE internally
+        * only.  NetBSD uses the values of these flags for different purposes.
+        * Either way, do not expose them to userland.
          */
-       scn.sysctl_flags = SYSCTL_VERSION |
-           (node->node_flags & ~(CTLFLAG_PARENT | CTLFLAG_VERIFY));
+       scn.sysctl_flags = SYSCTL_VERSION | (node->node_flags &
+           ~(CTLFLAG_PARENT | CTLFLAG_VERIFY | CTLFLAG_REMOTE));
         scn.sysctl_num = id;
         strlcpy(scn.sysctl_name, node->node_name, sizeof(scn.sysctl_name));
         scn.sysctl_ver = node->node_ver;
@@ -135,13 +136,16 @@ mib_copyout_node(struct mib_call * call, struct mib_oldp * oldp, size_t off,
                 /* Report the node size the way NetBSD does, just in case. */
                 scn.sysctl_size = sizeof(scn);
  
-               /* If this is a real parent node, report child information. */
-               if ((node->node_flags & CTLFLAG_PARENT) && visible) {
-                       scn.sysctl_csize = node->node_csize;
-                       scn.sysctl_clen = node->node_clen;
-               }
-
                 /*
+                * If this is a remote node, use the values we have of the root
+                * of the remote subtree.  If we did not have these values, we
+                * would have to call into the remote service here, which for
+                * reliability purposes is a bad idea.
+                *
+                * If this is a real parent node, report child information.  In
+                * both these cases, expose child information only if the node
+                * itself is accessible by the caller.
+                *
                  * If this is a function-driven node, indicate this by setting
                  * a nonzero function address.  This allows trace(1) to
                  * determine that it should not attempt to descend into this
@@ -150,7 +154,17 @@ mib_copyout_node(struct mib_call * call, struct mib_oldp * oldp, size_t off,
                  * expected in these parts of the tree.  Do not return the real
                  * function pointer, as this would leak anti-ASR information.
                  */
-               if (!(node->node_flags & CTLFLAG_PARENT))
+               if (node->node_flags & CTLFLAG_REMOTE) {
+                       if (visible) {
+                               scn.sysctl_csize = node->node_rcsize;
+                               scn.sysctl_clen = node->node_rclen;
+                       }
+               } else if (node->node_flags & CTLFLAG_PARENT) {
+                       if (visible) {
+                               scn.sysctl_csize = node->node_csize;
+                               scn.sysctl_clen = node->node_clen;
+                       }
+               } else
                         scn.sysctl_func = SYSCTL_NODE_FN;
         }
  
@@ -164,7 +178,7 @@ mib_copyout_node(struct mib_call * call, struct mib_oldp * oldp, size_t off,
   */
  static ssize_t
  mib_query(struct mib_call * call, struct mib_node * parent,
-       struct mib_oldp * oldp, struct mib_newp * newp, struct mib_node * root)
+       struct mib_oldp * oldp, struct mib_newp * newp)
  {
         struct sysctlnode scn;
         struct mib_node *node;
@@ -184,7 +198,8 @@ mib_query(struct mib_call * call, struct mib_node * parent,
                  * If a node version number is given, it must match the version
                  * of the parent or the root.
                  */
-               if (scn.sysctl_ver != 0 && scn.sysctl_ver != root->node_ver &&
+               if (scn.sysctl_ver != 0 &&
+                   scn.sysctl_ver != mib_root.node_ver &&
                     scn.sysctl_ver != parent->node_ver)
                         return EINVAL;
         }
@@ -223,6 +238,33 @@ mib_query(struct mib_call * call, struct mib_node * parent,
         return off;
  }
  
+/*
+ * Check whether the given name buffer contains a valid node name string.  If
+ * the name is nonempty, properly terminated, and contains only acceptable
+ * characters, return the length of the string excluding null terminator.
+ * Otherwise, return zero to indicate failure.
+ */
+static size_t
+mib_check_name(const char * name, size_t namesize)
+{
+       size_t namelen;
+       char c;
+
+       /* Names must be nonempty, null terminated, C symbol style strings. */
+       for (namelen = 0; namelen < namesize; namelen++) {
+               if ((c = name[namelen]) == '\0')
+                       break;
+               /* A-Z, a-z, 0-9, _ only, and no digit as first character. */
+               if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
+                   c == '_' || (c >= '0' && c <= '9' && namelen > 0)))
+                       return 0;
+       }
+       if (namelen == 0 || namelen == namesize)
+               return 0;
+
+       return namelen;
+}
+
  /*
   * Scan a parent node's children, as part of new node creation.  Search for
   * either a free node identifier (if given_id < 0) or collisions with the node
@@ -379,30 +421,63 @@ mib_copyin_str(struct mib_newp * __restrict newp, vir_bytes addr,
  
  /*
   * Increase the version of the root node, and copy this new version to all
- * nodes on the path to a node, as well as (optionally) that node itself.
+ * nodes on the path to the given node, including that node itself.
   */
  static void
-mib_upgrade(struct mib_node ** stack, int depth, struct mib_node * node)
+mib_upgrade(struct mib_node * node)
  {
         uint32_t ver;
  
+       assert(node != NULL);
+
         /*
-        * The bottom of the stack is always the root node, which determines
-        * the version of the entire tree.  Do not use version number 0, as a
-        * zero version number indicates no interest in versions elsewhere.
+        * The root node determines the version of the entire tree.  Do not use
+        * version number 0, as a zero version number indicates no interest in
+        * versions elsewhere.
          */
-       assert(depth > 0);
  
-       ver = stack[0]->node_ver + 1;
+       ver = mib_root.node_ver + 1;
         if (ver == 0)
                 ver = 1;
  
         /* Copy the new version to all the nodes on the path. */
-       while (depth-- > 0)
-               stack[depth]->node_ver = ver;
+       do {
+               node->node_ver = ver;
  
-       if (node != NULL)
-               node->node_ver = stack[0]->node_ver;
+               node = node->node_parent;
+       } while (node != NULL);
+}
+
+/*
+ * Add a new dynamically allocated node into the tree, inserting it into the
+ * linked-list position of the parent tree as given by 'prevp'.  Also update
+ * versions and counters accordingly.  This function never fails.
+ */
+static void
+mib_add(struct mib_dynode * dynode, struct mib_dynode ** prevp)
+{
+       struct mib_node *parent;
+
+       parent = dynode->dynode_node.node_parent;
+       assert(parent != NULL);
+
+       /* Link the dynamic node into the list, in the right place. */
+       assert(prevp != NULL);
+       dynode->dynode_next = *prevp;
+       *prevp = dynode;
+
+       /* The parent node now has one more child. */
+       parent->node_csize++;
+       parent->node_clen++;
+
+       /* There is now one more node in the tree. */
+       mib_nodes++;
+
+       /*
+        * Bump the version of all nodes on the path to the new node, including
+        * the node itself.
+        */
+       mib_upgrade(&dynode->dynode_node);
  }
  
  /*
@@ -410,8 +485,7 @@ mib_upgrade(struct mib_node ** stack, int depth, struct mib_node * node)
   */
  static ssize_t
  mib_create(struct mib_call * call, struct mib_node * parent,
-       struct mib_oldp * oldp, struct mib_newp * newp,
-       struct mib_node ** stack, int depth)
+       struct mib_oldp * oldp, struct mib_newp * newp)
  {
         struct mib_dynode *dynode, **prevp;
         struct mib_node *node;
@@ -426,6 +500,13 @@ mib_create(struct mib_call * call, struct mib_node * parent,
         if (!mib_authed(call))
                 return EPERM;
  
+       /*
+        * The parent must not be a remote node, but this is already implied by
+        * the fact that we got here at all.
+        */
+       assert(SYSCTL_TYPE(parent->node_flags) == CTLTYPE_NODE);
+       assert(!(parent->node_flags & CTLFLAG_REMOTE));
+
         /* The parent node must not be marked as read-only. */
         if (!(parent->node_flags & CTLFLAG_READWRITE))
                 return EPERM;
@@ -456,13 +537,11 @@ mib_create(struct mib_call * call, struct mib_node * parent,
                 return EINVAL;
  
         /*
-        * If a node version number is given, it must match the version of the
-        * parent or the root (which is always the bottom of the node stack).
-        * The given version number is *not* used for the node being created.
+        * If a node version number is given, it must match the version of
+        * either the parent or the root node.  The given version number is
+        * *not* used for the node being created.
          */
-       assert(depth > 0);
-
-       if (scn.sysctl_ver != 0 && scn.sysctl_ver != stack[0]->node_ver &&
+       if (scn.sysctl_ver != 0 && scn.sysctl_ver != mib_root.node_ver &&
             scn.sysctl_ver != parent->node_ver)
                 return EINVAL;
  
@@ -554,16 +633,10 @@ mib_create(struct mib_call * call, struct mib_node * parent,
         if (scn.sysctl_func != NULL || scn.sysctl_parent != NULL)
                 return EINVAL;
  
-       /* Names must be nonempty, null terminated, C symbol style strings. */
-       for (namelen = 0; namelen < sizeof(scn.sysctl_name); namelen++) {
-               if ((c = scn.sysctl_name[namelen]) == '\0')
-                       break;
-               /* A-Z, a-z, 0-9, _ only, and no digit as first character. */
-               if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
-                   c == '_' || (c >= '0' && c <= '9' && namelen > 0)))
-                       return EINVAL;
-       }
-       if (namelen == 0 || namelen == sizeof(scn.sysctl_name))
+       /* Verify that the given name is valid, and get its string length. */
+       namelen = mib_check_name(scn.sysctl_name, sizeof(scn.sysctl_name));
+
+       if (namelen == 0)
                 return EINVAL;
  
         /*
@@ -609,7 +682,7 @@ mib_create(struct mib_call * call, struct mib_node * parent,
  
         if ((dynode = malloc(size)) == NULL)
                 return EINVAL; /* do not return ENOMEM */
-       objects++;
+       mib_objects++;
  
         /* From here on, we have to free "dynode" before returning an error. */
         r = OK;
@@ -623,6 +696,7 @@ mib_create(struct mib_call * call, struct mib_node * parent,
         if (SYSCTL_TYPE(scn.sysctl_flags) == CTLTYPE_NODE)
                 node->node_flags |= CTLFLAG_PARENT;
         node->node_size = scn.sysctl_size;
+       node->node_parent = parent;
         node->node_name = dynode->dynode_name;
  
         /* Initialize the node value. */
@@ -682,35 +756,80 @@ mib_create(struct mib_call * call, struct mib_node * parent,
         /* Deal with earlier failures now. */
         if (r != OK) {
                 free(dynode);
-               objects--;
+               mib_objects--;
  
                 return r;
         }
  
-       /* At this point, actual creation can no longer fail. */
+       /*
+        * At this point, actual creation can no longer fail.  Add the node
+        * into the tree, and update versions and counters.
+        */
+       mib_add(dynode, prevp);
  
-       /* Link the dynamic node into the list, in the right place. */
-       assert(prevp != NULL);
-       dynode->dynode_next = *prevp;
-       *prevp = dynode;
+       /*
+        * Copy out the newly created node as resulting ("old") data.  Do not
+        * undo the creation if this fails, though.
+        */
+       return mib_copyout_node(call, oldp, 0, id, node);
+}
  
-       /* The parent node now has one more child. */
-       parent->node_csize++;
-       parent->node_clen++;
+/*
+ * Remove the given node from the tree.  If 'prevp' is NULL, the node is a
+ * static node which should be zeroed out.  If 'prevp' is not NULL, the node is
+ * a dynamic node which should be freed; 'prevp' will then point to the pointer
+ * to its dynode container.  Also update versions and counters as appropriate.
+ * This function never fails.
+ */
+static void
+mib_remove(struct mib_node * node, struct mib_dynode ** prevp)
+{
+       struct mib_dynode *dynode;
+       struct mib_node *parent;
  
-       nodes++;
+       parent = node->node_parent;
+       assert(parent != NULL);
+
+       /* If the description was allocated, free it. */
+       if (node->node_flags & CTLFLAG_OWNDESC) {
+               free(__UNCONST(node->node_desc));
+               mib_objects--;
+       }
  
         /*
-        * Bump the version of all nodes on the path to the new node, including
-        * the node itself.
+        * Static nodes only use static memory, and dynamic nodes have the data
+        * area embedded in the dynode object.  In neither case is data memory
+        * allocated separately, and thus, it need never be freed separately.
+        * Therefore we *must not* check CTLFLAG_OWNDATA here.
          */
-       mib_upgrade(stack, depth, node);
+
+       assert(parent->node_csize > 0);
+       assert(parent->node_clen > 0);
  
         /*
-        * Copy out the newly created node as resulting ("old") data.  Do not
-        * undo the creation if this fails, though.
+        * Dynamic nodes must be freed.  Freeing the dynode object also frees
+        * the node name and any associated data.  Static nodes are zeroed out,
+        * and the static memory they referenced will become inaccessible.
          */
-       return mib_copyout_node(call, oldp, 0, id, node);
+       if (prevp != NULL) {
+               dynode = *prevp;
+               *prevp = dynode->dynode_next;
+
+               assert(node == &dynode->dynode_node);
+
+               free(dynode);
+               mib_objects--;
+
+               parent->node_csize--;
+       } else
+               memset(node, 0, sizeof(*node));
+
+       parent->node_clen--;
+
+       mib_nodes--;
+
+       /* Bump the version of all nodes on the path to the destroyed node. */
+       mib_upgrade(parent);
  }
  
  /*
@@ -718,10 +837,9 @@ mib_create(struct mib_call * call, struct mib_node * parent,
   */
  static ssize_t
  mib_destroy(struct mib_call * call, struct mib_node * parent,
-       struct mib_oldp * oldp, struct mib_newp * newp,
-       struct mib_node ** stack, int depth)
+       struct mib_oldp * oldp, struct mib_newp * newp)
  {
-       struct mib_dynode *dynode, **prevp;
+       struct mib_dynode **prevp;
         struct mib_node *node;
         struct sysctlnode scn;
         ssize_t r;
@@ -754,6 +872,10 @@ mib_destroy(struct mib_call * call, struct mib_node * parent,
  
         /* For node-type nodes, extra rules apply. */
         if (SYSCTL_TYPE(node->node_flags) == CTLTYPE_NODE) {
+               /* The node must not be a mount point. */
+               if (node->node_flags & CTLFLAG_REMOTE)
+                       return EBUSY;
+
                 /* The node must not have an associated function. */
                 if (!(node->node_flags & CTLFLAG_PARENT))
                         return EPERM;
@@ -783,44 +905,12 @@ mib_destroy(struct mib_call * call, struct mib_node * parent,
          */
         r = mib_copyout_node(call, oldp, 0, scn.sysctl_num, node);
  
-       /* If the description was allocated, free it. */
-       if (node->node_flags & CTLFLAG_OWNDESC) {
-               free(__UNCONST(node->node_desc));
-               objects--;
-       }
-
-       /*
-        * Static nodes only use static memory, and dynamic nodes have the data
-        * area embedded in the dynode object.  In neither case is data memory
-        * allocated separately, and thus, it need never be freed separately.
-        * Therefore we *must not* check CTLFLAG_OWNDATA here.
-        */
-
-       assert(parent->node_csize > 0);
-       assert(parent->node_clen > 0);
-
         /*
-        * Dynamic nodes must be freed.  Freeing the dynode object also frees
-        * the node name and any associated data.  Static nodes are zeroed out,
-        * and the static memory they referenced will become inaccessible.
+        * Remove the node from the tree.  The procedure depends on whether the
+        * node is static (prevp == NULL) or dynamic (prevp != NULL).  Also
+        * update versions and counters.
          */
-       if (prevp != NULL) {
-               dynode = *prevp;
-               *prevp = dynode->dynode_next;
-
-               free(dynode);
-               objects--;
-
-               parent->node_csize--;
-       } else
-               memset(node, 0, sizeof(*node));
-
-       parent->node_clen--;
-
-       nodes--;
-
-       /* Bump the version of all nodes on the path to the destroyed node. */
-       mib_upgrade(stack, depth, NULL);
+       mib_remove(node, prevp);
  
         return r;
  }
@@ -914,6 +1004,15 @@ mib_describe(struct mib_call * call, struct mib_node * parent,
                         if (!mib_authed(call))
                                 return EPERM;
  
+                       /*
+                        * The node must not be a mount point.  Arguably this
+                        * check is not necessary, since we use the description
+                        * of the preexisting underlying node anyway.
+                        */
+                       if (SYSCTL_TYPE(node->node_flags) == CTLTYPE_NODE &&
+                           (node->node_flags & CTLFLAG_REMOTE))
+                               return EBUSY;
+
                         /* The node must not already have a description. */
                         if (node->node_desc != NULL)
                                 return EPERM;
@@ -946,7 +1045,7 @@ mib_describe(struct mib_call * call, struct mib_node * parent,
  
                                 return EINVAL; /* do not return ENOMEM */
                         }
-                       objects++;
+                       mib_objects++;
  
                         /* The description must now be freed with the node. */
                         node->node_flags |= CTLFLAG_OWNDESC;
@@ -1086,6 +1185,14 @@ mib_write(struct mib_call * call, struct mib_node * node,
                 return EINVAL;
  
         switch (SYSCTL_TYPE(node->node_flags)) {
+       case CTLTYPE_BOOL:
+       case CTLTYPE_INT:
+       case CTLTYPE_QUAD:
+       case CTLTYPE_STRUCT:
+               /* Non-string types must have an exact size match. */
+               if (newlen != node->node_size)
+                       return EINVAL;
+               break;
         case CTLTYPE_STRING:
                 /*
                  * Strings must not exceed their buffer size.  There is a
@@ -1096,20 +1203,12 @@ mib_write(struct mib_call * call, struct mib_node * node,
                 if (newlen > node->node_size)
                         return EINVAL;
                 break;
-       case CTLTYPE_BOOL:
-       case CTLTYPE_INT:
-       case CTLTYPE_QUAD:
-       case CTLTYPE_STRUCT:
-               /* Non-string types must have an exact size match. */
-               if (newlen != node->node_size)
-                       return EINVAL;
-               break;
         default:
                 return EINVAL;
         }
  
         /*
-        * If we cannot fit the data in the small stack buffer, then allocate a
+        * If we cannot fit the data in the scratch buffer, then allocate a
          * temporary buffer.  We add one extra byte so that we can add a null
          * terminator at the end of strings in case userland did not supply
          * one.  Either way, we must free the temporary buffer later!
@@ -1138,7 +1237,7 @@ mib_write(struct mib_call * call, struct mib_node * node,
  
                         return EINVAL;
                 }
-               objects++;
+               mib_objects++;
         } else
                 src = scratch;
  
@@ -1194,7 +1293,7 @@ mib_write(struct mib_call * call, struct mib_node * node,
  
         if (src != scratch) {
                 free(src);
-               objects--;
+               mib_objects--;
         }
  
         return r;
@@ -1231,12 +1330,12 @@ mib_readwrite(struct mib_call * call, struct mib_node * node,
   * old data length on success, or a negative error code on failure.
   */
  ssize_t
-mib_dispatch(struct mib_call * call, struct mib_node * root,
-       struct mib_oldp * oldp, struct mib_newp * newp)
+mib_dispatch(struct mib_call * call, struct mib_oldp * oldp,
+       struct mib_newp * newp)
  {
-       struct mib_node *stack[CTL_MAXNAME];
         struct mib_node *parent, *node;
-       int id, depth, is_leaf, has_verify, has_func;
+       ssize_t r;
+       int id, is_leaf, can_restart, has_verify, has_func;
  
         assert(call->call_namelen <= CTL_MAXNAME);
  
@@ -1244,15 +1343,7 @@ mib_dispatch(struct mib_call * call, struct mib_node * root,
          * Resolve the name by descending into the node tree, level by level,
          * starting at the MIB root.
          */
-       depth = 0;
-
-       for (parent = root; call->call_namelen > 0; parent = node) {
-               /*
-                * For node creation and destruction, build a node stack, to
-                * allow for up-propagation of new node version numbers.
-                */
-               stack[depth++] = parent;
-
+       for (parent = &mib_root; call->call_namelen > 0; parent = node) {
                 id = call->call_name[0];
                 call->call_name++;
                 call->call_namelen--;
@@ -1276,14 +1367,11 @@ mib_dispatch(struct mib_call * call, struct mib_node * root,
  
                         switch (id) {
                         case CTL_QUERY:
-                               return mib_query(call, parent, oldp, newp,
-                                   root);
+                               return mib_query(call, parent, oldp, newp);
                         case CTL_CREATE:
-                               return mib_create(call, parent, oldp, newp,
-                                   stack, depth);
+                               return mib_create(call, parent, oldp, newp);
                         case CTL_DESTROY:
-                               return mib_destroy(call, parent, oldp, newp,
-                                   stack, depth);
+                               return mib_destroy(call, parent, oldp, newp);
                         case CTL_DESCRIBE:
                                 return mib_describe(call, parent, oldp, newp);
                         case CTL_CREATESYM:
@@ -1301,6 +1389,32 @@ mib_dispatch(struct mib_call * call, struct mib_node * root,
                 if ((node->node_flags & CTLFLAG_PRIVATE) && !mib_authed(call))
                         return EPERM;
  
+               /*
+                * Start by checking if the node is a remote node.  If so, let
+                * a remote service handle the remainder of this request.
+                * However, as part of attempting the remote call, we may
+                * discover that the remote service has died or that it is
+                * unmounting the subtree.  If the node was not a temporary
+                * mountpoint, we should (and do) continue with the request
+                * locally - if it was, it will already be deallocated and we
+                * must be very careful not to access 'node' again!
+                */
+               is_leaf = (SYSCTL_TYPE(node->node_flags) != CTLTYPE_NODE);
+
+               if (!is_leaf && (node->node_flags & CTLFLAG_REMOTE)) {
+                       /* Determine this before 'node' may disappear.. */
+                       can_restart = (node->node_flags & CTLFLAG_PARENT);
+
+                       r = mib_remote_call(call, node, oldp, newp);
+
+                       if (r != ERESTART || !can_restart)
+                               return (r != ERESTART) ? r : ENOENT;
+
+                       /* Service died, subtree is unmounted, keep going. */
+                       assert(SYSCTL_TYPE(node->node_flags) == CTLTYPE_NODE);
+                       assert(!(node->node_flags & CTLFLAG_REMOTE));
+               }
+
                 /*
                  * Is this a leaf node, and/or is this node handled by a
                  * function?  If either is true, resolution ends at this level.
@@ -1308,7 +1422,6 @@ mib_dispatch(struct mib_call * call, struct mib_node * root,
                  * different ways to determine whether there is a function
                  * depending on whether the node is a leaf or not.
                  */
-               is_leaf = (SYSCTL_TYPE(node->node_flags) != CTLTYPE_NODE);
                 if (is_leaf) {
                         has_verify = (node->node_flags & CTLFLAG_VERIFY);
                         has_func = (!has_verify && node->node_func != NULL);
@@ -1385,11 +1498,12 @@ mib_tree_recurse(struct mib_node * parent)
                 if (node->node_flags == 0)
                         continue;
  
-               nodes++;
+               mib_nodes++;
  
                 parent->node_clen++;
  
                 node->node_ver = parent->node_ver;
+               node->node_parent = parent;
  
                 /* Recursively apply this function to all node children. */
                 if (SYSCTL_TYPE(node->node_flags) == CTLTYPE_NODE &&
@@ -1403,16 +1517,326 @@ mib_tree_recurse(struct mib_node * parent)
   * that could not be assigned at compile time.
   */
  void
-mib_tree_init(struct mib_node * root)
+mib_tree_init(void)
  {
  
         /* Initialize some variables. */
-       nodes = 1; /* the root node itself */
-       objects = 0;
+       mib_nodes = 1; /* the root node itself */
+       mib_objects = 0;
  
-       /* The entire tree starts with the same, nonzero node version. */
-       root->node_ver = 1;
+       /*
+        * The entire tree starts with the same, nonzero node version.
+        * The root node is the only node without a parent.
+        */
+       mib_root.node_ver = 1;
+       mib_root.node_parent = NULL;
  
         /* Recursively initialize the static tree. */
-       mib_tree_recurse(root);
+       mib_tree_recurse(&mib_root);
+}
+
+/*
+ * Process a subtree mount request from a remote service.  Return OK on
+ * success, with a pointer to the resulting static-node structure stored in
+ * 'nodep'.  Return a negative error code on failure.
+ */
+int
+mib_mount(const int * mib, unsigned int miblen, unsigned int eid, uint32_t rid,
+       uint32_t flags, unsigned int csize, unsigned int clen,
+       struct mib_node ** nodep)
+{
+       struct mib_dynode *dynode, **prevp;
+       struct mib_node *parent, *node;
+       char name[SYSCTL_NAMELEN], *desc;
+       size_t size, namelen, desclen;
+       unsigned int n;
+       int r, id;
+
+       /*
+        * Perform initial verification of the given parameters.  Even stricter
+        * checks may be performed later.
+        */
+       /*
+        * By policy, we forbid mounting top-level nodes.  This is in effect
+        * also the only security-like restriction: a service should not be
+        * able to just take over, say, the entire "kern" subtree.  There is
+        * currently little in the way of a service taking over an important
+        * set of second-level nodes, though.
+        *
+        * TODO: allow mounting of predefined mount points only, for example by
+        * having an internal node flag that permits mounting the subtree or
+        * any node in it.  As an even better alternative, allow this to be
+        * controlled through a policy specification; unfortunately, this would
+        * also add a substantial amount of infrastructure.
+        */
+       if (miblen < 2) {
+               MIB_DEBUG_MOUNT(("MIB: mounting failed, path too short\n"));
+
+               return EPERM;
+       }
+
+       /*
+        * The flags field is highly restricted right now.  Only a few flags
+        * may be given at all, and then when using an existing node as mount
+        * point, the flag must exactly match the existing node's flags.
+        */
+       if (SYSCTL_VERS(flags) != SYSCTL_VERSION ||
+           SYSCTL_TYPE(flags) != CTLTYPE_NODE ||
+           (SYSCTL_FLAGS(flags) & ~(CTLFLAG_READONLY | CTLFLAG_READWRITE |
+           CTLFLAG_PERMANENT | CTLFLAG_HIDDEN)) != 0) {
+               MIB_DEBUG_MOUNT(("MIB: mounting failed, invalid flags %"PRIx32
+                   "\n", flags));
+
+               return EINVAL;
+       }
+
+       if (csize > (1U << MIB_RC_BITS) || clen > csize) {
+               MIB_DEBUG_MOUNT(("MIB: mounting failed, invalid child size or "
+                   "length (%u, %u)\n", csize, clen));
+
+               return EINVAL;
+       }
+
+       /*
+        * Look up the parent node of the mount point.  This parent node must
+        * exist - we don't want to create more than one temporary node in any
+        * case.  All the nodes leading up to and including the parent node
+        * must be real, local, non-private, node-type nodes.  The path may not
+        * be private, because that would allow an unprivileged service to
+        * intercept writes to privileged nodes--currently a total nonissue in
+        * practice, but still.  Note that the service may itself restrict
+        * access to nodes in its own mounted subtree in any way it wishes.
+        */
+       parent = &mib_root;
+
+       for (n = 0; n < miblen - 1; n++) {
+               /* Meta-identifiers are obviously not allowed in the path. */
+               if ((id = mib[n]) < 0) {
+                       MIB_DEBUG_MOUNT(("MIB: mounting failed, meta-ID in "
+                           "path\n"));
+
+                       return EINVAL;
+               }
+
+               /* Locate the child node. */
+               if ((node = mib_find(parent, id, NULL /*prevp*/)) == NULL) {
+                       MIB_DEBUG_MOUNT(("MIB: mounting failed, path not "
+                           "found\n"));
+
+                       return ENOENT;
+               }
+
+               /* Make sure it is a regular node-type node. */
+               if (SYSCTL_TYPE(node->node_flags) != CTLTYPE_NODE ||
+                   !(node->node_flags & CTLFLAG_PARENT) ||
+                   (node->node_flags & (CTLFLAG_REMOTE | CTLFLAG_PRIVATE))) {
+                       MIB_DEBUG_MOUNT(("MIB: mounting failed, unacceptable "
+                           "node on path\n"));
+
+                       return EPERM;
+               }
+
+               parent = node;
+       }
+
+       /* Now see if the mount point itself exists. */
+       if ((id = mib[miblen - 1]) < 0) {
+               MIB_DEBUG_MOUNT(("MIB: mounting failed, meta-ID in path\n"));
+
+               return EINVAL;
+       }
+
+       /*
+        * If the target node exists and passes all tests, it will simply be
+        * converted to a mount point.  If the target node does not exist, we
+        * have to allocate a temporary node as mount point.
+        */
+       if ((node = mib_find(parent, id, NULL /*prevp*/)) != NULL) {
+               /*
+                * We are about to mount on an existing node.  As stated above,
+                * the node flags must match the given flags exactly.
+                */
+               if (SYSCTL_TYPE(node->node_flags) != CTLTYPE_NODE ||
+                   SYSCTL_FLAGS(node->node_flags) !=
+                   (SYSCTL_FLAGS(flags) | CTLFLAG_PARENT)) {
+                       MIB_DEBUG_MOUNT(("MIB: mounting failed, target node "
+                           "mismatch (%"PRIx32", %"PRIx32")\n",
+                           node->node_flags, flags));
+
+                       return EPERM;
+               }
+
+               /*
+                * If the node has dynamically added children, we will not be
+                * able to restore the node to its old state when unmounting.
+                */
+               if (node->node_size != node->node_csize) {
+                       MIB_DEBUG_MOUNT(("MIB: mounting failed, node has "
+                           "dynamic children\n"));
+
+                       return EBUSY;
+               }
+
+               mib_upgrade(node);
+       } else {
+               /*
+                * We are going to create a temporary mount point.  Much of the
+                * procedure that follows is a rather selective extract from
+                * mib_create().  Start with a check for the impossible.
+                */
+               if (parent->node_csize == INT_MAX) {
+                       MIB_DEBUG_MOUNT(("MIB: mounting failed, parent node "
+                           "full\n"));
+
+                       return EINVAL;
+               }
+
+               /*
+                * In order to create the new node, we also need the node's
+                * name and description; those did not fit in the request
+                * message.  Ask the caller to copy these strings to us.
+                */
+               name[0] = '\0';
+               scratch[0] = '\0';
+
+               if ((r = mib_remote_info(eid, rid, name, sizeof(name), scratch,
+                   MAXDESCLEN)) != OK) {
+                       MIB_DEBUG_MOUNT(("MIB: mounting failed, node info "
+                           "request yielded %d\n", r));
+
+                       return r;
+               }
+
+               /* Make sure the name is valid. */
+               if ((namelen = mib_check_name(name, sizeof(name))) == 0) {
+                       printf("MIB: mounting failed, bad name\n");
+
+                       return EINVAL;
+               }
+
+               /* Just forcefully terminate the description. */
+               scratch[MAXDESCLEN - 1] = '\0';
+               desclen = strlen(scratch);
+
+               /*
+                * We know the identifier is not in use yet; make sure that the
+                * name is not, either.  As a side effect, find out where the
+                * new node should be inserted upon success.
+                */
+               if (mib_scan(parent, id, name, &id /*unused*/, &prevp,
+                   &node /*unused*/) != OK) {
+                       MIB_DEBUG_MOUNT(("MIB: mounting failed, name "
+                           "conflict\n"));
+
+                       return EEXIST;
+               }
+
+               /*
+                * Allocate a dynamic node.  Unlike for user-created dynamic
+                * nodes, temporary mount points also include the description
+                * in the dynode object.
+                */
+               size = sizeof(*dynode) + namelen + desclen + 1;
+
+               if ((dynode = malloc(size)) == NULL) {
+                       printf("MIB: out of memory!\n");
+
+                       return ENOMEM;
+               }
+               mib_objects++;
+
+               /* Initialize the dynamic node. */
+               memset(dynode, 0, sizeof(*dynode));
+               dynode->dynode_id = id;
+               strlcpy(dynode->dynode_name, name, namelen + 1);
+               desc = &dynode->dynode_name[namelen + 1];
+               strlcpy(desc, scratch, desclen + 1);
+
+               node = &dynode->dynode_node;
+               node->node_flags = flags & ~SYSCTL_VERS_MASK;
+               node->node_size = 0;
+               node->node_parent = parent;
+               node->node_name = dynode->dynode_name;
+               node->node_desc = desc;
+
+               /*
+                * Add the new dynamic node into the tree, and adjust versions
+                * and counters.
+                */
+               mib_add(dynode, prevp);
+       }
+
+       /* Success!  Perform the actual mount, and return the target node. */
+       node->node_flags |= CTLFLAG_REMOTE;
+       node->node_eid = eid;
+       node->node_rcsize = csize;
+       node->node_rclen = clen;
+       node->node_rid = rid;
+
+       mib_remotes++;
+
+       *nodep = node;
+       return OK;
+}
+
+/*
+ * Unmount the remote subtree identified by the given node.  Release the mount
+ * point by reversing the action performed while mounting.  Also bump the
+ * version numbers on the path, so that userland knows that it is to expect a
+ * change of contents in the subtree.  This function always succeeds, and may
+ * deallocate the given node.
+ */
+void
+mib_unmount(struct mib_node * node)
+{
+       struct mib_dynode **prevp;
+       struct mib_node *child;
+       int id;
+
+       assert(SYSCTL_TYPE(node->node_flags) == CTLTYPE_NODE);
+       assert(node->node_flags & CTLFLAG_REMOTE);
+
+       /*
+        * Given that the node has the CTLFLAG_REMOTE flag set, we can now tell
+        * whether the remote subtree obscured a preexisting node or we created
+        * a temporary mount point, by checking its CTLFLAG_PARENT flag.
+        */
+       if (node->node_flags & CTLFLAG_PARENT) {
+               /*
+                * Return the node to its former pre-mount state.  Restore the
+                * original node_clen field by recomputing it.
+                */
+               node->node_flags &= ~CTLFLAG_REMOTE;
+               node->node_csize = node->node_size;
+               node->node_clen = 0;
+
+               for (id = 0; IS_STATIC_ID(node, id); id++) {
+                       child = &node->node_scptr[id];
+
+                       if (child->node_flags != 0)
+                               node->node_clen++;
+               }
+
+               node->node_dcptr = NULL;
+
+               /* Increase version numbers on the path to the node. */
+               mib_upgrade(node);
+       } else {
+               /*
+                * We know that we dynamically allocated this node; find its
+                * parent's pointer to it.
+                */
+               for (prevp = &node->node_parent->node_dcptr; *prevp != NULL;
+                   prevp = &(*prevp)->dynode_next) {
+                       if (&(*prevp)->dynode_node == node)
+                               break;
+               }
+               assert(*prevp != NULL);
+
+               /* Free the node, and adjust counts and versions. */
+               mib_remove(node, prevp);
+       }
+
+       assert(mib_remotes > 0);
+       mib_remotes--;
  }
diff --git a/minix/tests/Makefile b/minix/tests/Makefile

index c4de4425dc757bfd1d36f692d787240a6cb84845..3d62445dcdc8adf98b6b68c447b7475070306337 100644 (file)
--- a/minix/tests/Makefile
+++ b/minix/tests/Makefile
@@ -20,6 +20,7 @@ LDADD+=               -lm
  
  SUBDIR+=               blocktest
  SUBDIR+=               ddekit
+SUBDIR+=               rmibtest
  
  # Some have special flags compiling
  CPPFLAGS.test56.c += -D_MINIX_SYSTEM=1
@@ -77,7 +78,7 @@ PROGS+=       t10a t11a t11b t40a t40b t40c t40d t40e t40f t40g t60a t60b \
         t67a t67b t68a t68b tvnd t84_h_spawn t84_h_spawnattr
  
  SCRIPTS+= run check-install testinterp.sh testsh1.sh testsh2.sh testmfs.sh \
-         testisofs.sh testvnd.sh testkyua.sh testrelpol.sh
+         testisofs.sh testvnd.sh testkyua.sh testrelpol.sh testrmib.sh
  
  # test57loop.S is not linked into the .bcl file.
  # This way, we can link it in when linking the final binary
diff --git a/minix/tests/rmibtest/Makefile b/minix/tests/rmibtest/Makefile

new file mode 100644 (file)

index 0000000..54abf55
--- /dev/null
+++ b/minix/tests/rmibtest/Makefile
@@ -0,0 +1,14 @@
+# Makefile for the Remote MIB test service (rmibtest)
+PROG=  rmibtest
+SRCS=  rmibtest.c
+FILES= rmibtest.conf
+
+DPADD+=        ${LIBSYS}
+LDADD+=        -lsys
+
+MAN=
+
+BINDIR?= /usr/tests/minix-posix/rmibtest
+FILESDIR?= /usr/tests/minix-posix/rmibtest
+
+.include <minix.service.mk>
diff --git a/minix/tests/rmibtest/rmibtest.c b/minix/tests/rmibtest/rmibtest.c

new file mode 100644 (file)

index 0000000..eec1c3a
--- /dev/null
+++ b/minix/tests/rmibtest/rmibtest.c
@@ -0,0 +1,267 @@
+/* Remote MIB (RMIB) test service - by D.C. van Moolenbroek */
+/*
+ * This test is a good start, but not an exhaustive coverage test for all
+ * possible failure cases.  The reason for that is mainly that there are
+ * various scenarios that we cannot generate without implementing our own local
+ * bogus RMIB code.  Adding that is something for later - TODO.
+ */
+#include <minix/drivers.h>
+#include <minix/sysctl.h>
+#include <minix/rmib.h>
+
+static int running;
+
+/* The following is a copy of the minix.test subtree in the MIB service. */
+static char test_string[16], test_struct[12];
+
+static struct rmib_node minix_test_secret_table[] = {
+/* 0*/ [SECRET_VALUE]          = RMIB_INT(RMIB_RO, 12345, "value",
+                                   "The combination to my luggage"),
+};
+
+static struct rmib_node minix_test_table[] = {
+/* 0*/ [TEST_INT]              = RMIB_INT(RMIB_RO | CTLFLAG_HEX, 0x01020304,
+                                   "int", "Value test field"),
+/* 1*/ [TEST_BOOL]             = RMIB_BOOL(RMIB_RW, 0, "bool",
+                                   "Boolean test field"),
+/* 2*/ [TEST_QUAD]             = RMIB_QUAD(RMIB_RW, 0, "quad",
+                                   "Quad test field"),
+/* 3*/ [TEST_STRING]           = RMIB_STRING(RMIB_RW, test_string, "string",
+                                   "String test field"),
+/* 4*/ [TEST_STRUCT]           = RMIB_STRUCT(RMIB_RW, sizeof(test_struct),
+                                   test_struct, "struct",
+                                   "Structure test field"),
+/* 5*/ [TEST_PRIVATE]          = RMIB_INT(RMIB_RW | CTLFLAG_PRIVATE, -5375,
+                                   "private", "Private test field"),
+/* 6*/ [TEST_ANYWRITE]         = RMIB_INT(RMIB_RW | CTLFLAG_ANYWRITE, 0,
+                                   "anywrite", "AnyWrite test field"),
+/* 7*/ [TEST_DYNAMIC]          = RMIB_INT(RMIB_RO, 0, "deleteme",
+                                   "This node will be destroyed"),
+/* 8*/ [TEST_SECRET]           = RMIB_NODE(RMIB_RO | CTLFLAG_PRIVATE,
+                                   minix_test_secret_table, "secret",
+                                   "Private subtree"),
+/* 9*/ [TEST_PERM]             = RMIB_INT(RMIB_RO, 1, "permanent", NULL),
+/*10*/ [TEST_DESTROY1]         = RMIB_INT(RMIB_RO, 123, "destroy1", NULL),
+/*11*/ [TEST_DESTROY2]         = RMIB_INT(RMIB_RO, 456, "destroy2",
+                                   "This node will be destroyed"),
+};
+
+static struct rmib_node minix_test = RMIB_NODE(RMIB_RW | CTLFLAG_HIDDEN,
+    minix_test_table, "test", "Test87 testing ground");
+/* Here ends the copy of the minix.test subtree in the MIB service. */
+
+static struct rmib_node test_table[] = {
+};
+
+static struct rmib_node test_rnode = RMIB_NODE(RMIB_RO, test_table, "test",
+    "Test node");
+
+static int value = 5375123;
+
+static ssize_t test_func(struct rmib_call *, struct rmib_node *,
+    struct rmib_oldp *, struct rmib_newp *);
+
+/* No defined constants because userland will access these by name anyway. */
+static struct rmib_node minix_rtest_table[] = {
+       [1]                     = RMIB_INTPTR(RMIB_RW, &value, "int",
+                                   "Test description"),
+       [2]                     = RMIB_FUNC(CTLTYPE_INT | RMIB_RW, sizeof(int),
+                                   test_func, "func", "Test function"),
+};
+
+static struct rmib_node minix_rtest = RMIB_NODE(RMIB_RO, minix_rtest_table,
+    "rtest", "Remote test subtree");
+
+/*
+ * Test function that deflects reads and writes to its sibling node.  Not a
+ * super useful thing to do, but a decent test of functionality regardless.
+ */
+static ssize_t
+test_func(struct rmib_call * call, struct rmib_node * node,
+       struct rmib_oldp * oldp, struct rmib_newp * newp)
+{
+
+       return rmib_readwrite(call, &minix_rtest_table[1], oldp, newp);
+}
+
+/*
+ * Attempt to perform registrations that should be rejected locally, and thus
+ * result in failure immediately.  Unfortunately, we cannot verify that the MIB
+ * service also verifies these aspects remotely, at least without talking to it
+ * directly.
+ */
+static void
+test_local_failures(void)
+{
+       int r, mib[CTL_SHORTNAME + 1];
+
+       memset(mib, 0, sizeof(mib));
+
+       /* Test an empty path. */
+       if ((r = rmib_register(mib, 0, &test_rnode)) != EINVAL)
+               panic("registering remote MIB subtree yielded: %d", r);
+
+       /* Test a path that is too long. */
+       if ((r = rmib_register(mib, CTL_SHORTNAME + 1, &test_rnode)) != EINVAL)
+               panic("registering remote MIB subtree yielded: %d", r);
+
+       /* Test a mount point that is not a node-type (parent) node. */
+       mib[0] = CTL_MINIX;
+       mib[1] = MINIX_TEST;
+       mib[2] = TEST_INT;
+       if ((r = rmib_register(mib, 3, &minix_test_table[TEST_INT])) != EINVAL)
+               panic("registering remote MIB subtree yielded: %d", r);
+}
+
+/*
+ * Perform a number of registrations that will not be accepted by the MIB
+ * service.  We will never know, but the userland test script can verify the
+ * difference by comparing the number of remotes before and after.
+ */
+static void
+test_remote_failures(void)
+{
+       int r, mib[CTL_SHORTNAME];
+
+       /* Test an existing one-node path. */
+       mib[0] = CTL_KERN;
+       if ((r = rmib_register(mib, 1, &test_rnode)) != OK)
+               panic("unable to register remote MIB subtree: %d", r);
+       rmib_reset();
+
+       /* Test a path in which a non-final component does not exist. */
+       mib[1] = CREATE_BASE - 1; /* probably as safe as it gets.. */
+       mib[2] = 0;
+       if ((r = rmib_register(mib, 3, &test_rnode)) != OK)
+               panic("unable to register remote MIB subtree: %d", r);
+       rmib_reset();
+
+       /* Test a path in which a non-final component is not a parent node. */
+       mib[1] = KERN_OSTYPE;
+       if ((r = rmib_register(mib, 3, &test_rnode)) != OK)
+               panic("unable to register remote MIB subtree: %d", r);
+       rmib_reset();
+
+       /* Test a path in which a non-final component is a meta-identifier. */
+       mib[1] = CTL_QUERY;
+       if ((r = rmib_register(mib, 3, &test_rnode)) != OK)
+               panic("unable to register remote MIB subtree: %d", r);
+       rmib_reset();
+
+       /* Test a path in which the final component is a meta-identifier. */
+       if ((r = rmib_register(mib, 2, &test_rnode)) != OK)
+               panic("unable to register remote MIB subtree: %d", r);
+       rmib_reset();
+
+       /* Test a path in which the final component identifies a non-parent. */
+       mib[1] = KERN_OSTYPE;
+       if ((r = rmib_register(mib, 2, &test_rnode)) != OK)
+               panic("unable to register remote MIB subtree: %d", r);
+       rmib_reset();
+
+       /* Test a path with unacceptable flags for the final component. */
+       mib[0] = CTL_MINIX;
+       mib[1] = MINIX_TEST;
+       mib[2] = TEST_SECRET;
+       if ((r = rmib_register(mib, 3, &test_rnode)) != OK)
+               panic("unable to register remote MIB subtree: %d", r);
+       rmib_reset();
+
+       /* Test a path of which the name, but not the ID, already exists. */
+       mib[1] = CREATE_BASE - 1;
+       if ((r = rmib_register(mib, 2, &test_rnode)) != OK)
+               panic("unable to register remote MIB subtree: %d", r);
+       /*
+        * Do NOT call rmib_reset() anymore now: we want to let the MIB service
+        * get the name from us.
+        */
+}
+
+static int
+init(int type __unused, sef_init_info_t * info __unused)
+{
+       const int new_mib[] = { CTL_MINIX, CREATE_BASE - 2 };
+       const int shadow_mib[] = { CTL_MINIX, MINIX_TEST };
+       int r;
+
+       test_local_failures();
+
+       test_remote_failures();
+
+       /*
+        * We must now register our new test tree before shadowing minix.test,
+        * because if any of the previous requests actually did succeed, the
+        * next registration will be rejected (ID 0 already in use) and no
+        * difference would be detected because of "successful" shadowing.
+        */
+       r = rmib_register(new_mib, __arraycount(new_mib), &minix_rtest);
+       if (r != OK)
+               panic("unable to register remote MIB subtree: %d", r);
+
+       r = rmib_register(shadow_mib, __arraycount(shadow_mib), &minix_test);
+       if (r != OK)
+               panic("unable to register remote MIB subtree: %d", r);
+
+       running = TRUE;
+
+       return OK;
+}
+
+static void
+cleanup(void)
+{
+       int r;
+
+       if ((r = rmib_deregister(&minix_rtest)) != OK)
+               panic("unable to deregister: %d", r);
+       if ((r = rmib_deregister(&minix_test)) != OK)
+               panic("unable to deregister: %d", r);
+
+       /*
+        * TODO: the fact that the MIB service can currently not detect the
+        * death of other services is creating somewhat of a problem here: if
+        * we deregister shortly before exiting, the asynchronous deregister
+        * requests may not be delivered before we actually exit (and take our
+        * asynsend table with us), and leave around the remote subtrees until
+        * a user process tries accessing them.  We work around this here by
+        * delaying the exit by half a second - shorter than RS's timeout, but
+        * long enough to allow deregistration.
+        */
+       sys_setalarm(sys_hz() / 2, 0);
+
+       running = FALSE;
+}
+
+static void
+got_signal(int sig)
+{
+
+       if (sig == SIGTERM && running)
+               cleanup();
+}
+
+int
+main(void)
+{
+       message m;
+       int r, ipc_status;
+
+       sef_setcb_init_fresh(init);
+       sef_setcb_signal_handler(got_signal);
+
+       sef_startup();
+
+       for (;;) {
+               r = sef_receive_status(ANY, &m, &ipc_status);
+
+               if (r != OK)
+                       panic("sef_receive_status failed: %d", r);
+
+               if (m.m_source == CLOCK && is_ipc_notify(ipc_status))
+                       break; /* the intended exit path; see above */
+               if (m.m_source == MIB_PROC_NR)
+                       rmib_process(&m, ipc_status);
+       }
+
+       return EXIT_SUCCESS;
+}
diff --git a/minix/tests/rmibtest/rmibtest.conf b/minix/tests/rmibtest/rmibtest.conf

new file mode 100644 (file)

index 0000000..b76e999
--- /dev/null
+++ b/minix/tests/rmibtest/rmibtest.conf
@@ -0,0 +1,2 @@
+service rmibtest {
+};
diff --git a/minix/tests/run b/minix/tests/run

index f70172123e0cf8a690b3366dd535a13f97a96f45..2f79fea42c489213b149144fe0e3b3fc01553a56 100755 (executable)
--- a/minix/tests/run
+++ b/minix/tests/run
@@ -24,13 +24,13 @@ export USENETWORK           # set to "yes" for test48+82 to use the network
  setuids="test11 test33 test43 test44 test46 test56 test60 test61 test65 \
          test69 test73 test74 test78 test83 test85 test87 test88 test89"
  # Scripts that require to be run as root
-rootscripts="testisofs testvnd testrelpol"
+rootscripts="testisofs testvnd testrmib testrelpol"
  
  alltests="1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 \
           21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 \
           41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 \
           61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 \
-         81 82 83 84 85 86 87 88 89 sh1 sh2 interp mfs isofs vnd"
+         81 82 83 84 85 86 87 88 89 sh1 sh2 interp mfs isofs vnd rmib"
  tests_no=`expr 0`
  
  # If root, make sure the setuid tests have the correct permissions
diff --git a/minix/tests/test87.c b/minix/tests/test87.c

index 7a0b774a146cd71f97277287a4df2af5f9b825be..99c621d0e10eb3c3bc5f72c6ddfa42bfaef775fd 100644 (file)
--- a/minix/tests/test87.c
+++ b/minix/tests/test87.c
@@ -625,7 +625,7 @@ sub87b(void)
          */
         if (scn[0].sysctl_num != TEST_INT) e(0);
         if (SYSCTL_TYPE(scn[0].sysctl_flags) != CTLTYPE_INT) e(0);
-       if (SYSCTL_FLAGS(scn[0].sysctl_flags) !=
+       if ((SYSCTL_FLAGS(scn[0].sysctl_flags) & ~CTLFLAG_PERMANENT) !=
             (CTLFLAG_READONLY | CTLFLAG_IMMEDIATE | CTLFLAG_HEX)) e(0);
         if (SYSCTL_VERS(scn[0].sysctl_flags) != SYSCTL_VERSION) e(0);
         if (strcmp(scn[0].sysctl_name, "int")) e(0);
@@ -638,7 +638,7 @@ sub87b(void)
                         break;
         if (i == count) e(0);
         if (SYSCTL_TYPE(scn[i].sysctl_flags) != CTLTYPE_INT) e(0);
-       if (SYSCTL_FLAGS(scn[i].sysctl_flags) !=
+       if ((SYSCTL_FLAGS(scn[i].sysctl_flags) & ~CTLFLAG_PERMANENT) !=
             (CTLFLAG_READWRITE | CTLFLAG_PRIVATE | CTLFLAG_IMMEDIATE)) e(0);
         if (SYSCTL_VERS(scn[i].sysctl_flags) != SYSCTL_VERSION) e(0);
         if (strcmp(scn[i].sysctl_name, "private")) e(0);
@@ -650,7 +650,7 @@ sub87b(void)
                         break;
         if (i == count) e(0);
         if (SYSCTL_TYPE(scn[i].sysctl_flags) != CTLTYPE_NODE) e(0);
-       if (SYSCTL_FLAGS(scn[i].sysctl_flags) !=
+       if ((SYSCTL_FLAGS(scn[i].sysctl_flags) & ~CTLFLAG_PERMANENT) !=
             (CTLFLAG_READONLY | CTLFLAG_PRIVATE)) e(0);
         if (SYSCTL_VERS(scn[i].sysctl_flags) != SYSCTL_VERSION) e(0);
         if (strcmp(scn[i].sysctl_name, "secret")) e(0);
@@ -747,11 +747,13 @@ test87b(void)
          * order for at least the static nodes.  We do not make assumptions
          * about whether dynamic nodes are merged in or (as is the case as of
          * writing) returned after the static nodes.  At this point there
-        * should be no dynamic nodes here yet anyway.
+        * should be no dynamic nodes here yet anyway.  We mostly ignore
+        * CTLFLAG_PERMANENT in order to facilitate running this test on a
+        * remotely mounted subtree.
          */
         if (scn[0].sysctl_num != TEST_INT) e(0);
         if (SYSCTL_TYPE(scn[0].sysctl_flags) != CTLTYPE_INT) e(0);
-       if (SYSCTL_FLAGS(scn[0].sysctl_flags) !=
+       if ((SYSCTL_FLAGS(scn[0].sysctl_flags) & ~CTLFLAG_PERMANENT) !=
             (CTLFLAG_READONLY | CTLFLAG_IMMEDIATE | CTLFLAG_HEX)) e(0);
         if (SYSCTL_VERS(scn[0].sysctl_flags) != SYSCTL_VERSION) e(0);
         if (strcmp(scn[0].sysctl_name, "int")) e(0);
@@ -761,7 +763,7 @@ test87b(void)
  
         if (scn[1].sysctl_num != TEST_BOOL) e(0);
         if (SYSCTL_TYPE(scn[1].sysctl_flags) != CTLTYPE_BOOL) e(0);
-       if (SYSCTL_FLAGS(scn[1].sysctl_flags) !=
+       if ((SYSCTL_FLAGS(scn[1].sysctl_flags) & ~CTLFLAG_PERMANENT) !=
             (CTLFLAG_READWRITE | CTLFLAG_IMMEDIATE)) e(0);
         if (SYSCTL_VERS(scn[1].sysctl_flags) != SYSCTL_VERSION) e(0);
         if (strcmp(scn[1].sysctl_name, "bool")) e(0);
@@ -771,7 +773,7 @@ test87b(void)
  
         if (scn[2].sysctl_num != TEST_QUAD) e(0);
         if (SYSCTL_TYPE(scn[2].sysctl_flags) != CTLTYPE_QUAD) e(0);
-       if (SYSCTL_FLAGS(scn[2].sysctl_flags) !=
+       if ((SYSCTL_FLAGS(scn[2].sysctl_flags) & ~CTLFLAG_PERMANENT) !=
             (CTLFLAG_READWRITE | CTLFLAG_IMMEDIATE)) e(0);
         if (SYSCTL_VERS(scn[2].sysctl_flags) != SYSCTL_VERSION) e(0);
         if (strcmp(scn[2].sysctl_name, "quad")) e(0);
@@ -781,7 +783,8 @@ test87b(void)
  
         if (scn[3].sysctl_num != TEST_STRING) e(0);
         if (SYSCTL_TYPE(scn[3].sysctl_flags) != CTLTYPE_STRING) e(0);
-       if (SYSCTL_FLAGS(scn[3].sysctl_flags) != CTLFLAG_READWRITE) e(0);
+       if ((SYSCTL_FLAGS(scn[3].sysctl_flags) & ~CTLFLAG_PERMANENT) !=
+           CTLFLAG_READWRITE) e(0);
         if (SYSCTL_VERS(scn[3].sysctl_flags) != SYSCTL_VERSION) e(0);
         if (strcmp(scn[3].sysctl_name, "string")) e(0);
         if (scn[3].sysctl_ver == 0) e(0);
@@ -789,7 +792,8 @@ test87b(void)
  
         if (scn[4].sysctl_num != TEST_STRUCT) e(0);
         if (SYSCTL_TYPE(scn[4].sysctl_flags) != CTLTYPE_STRUCT) e(0);
-       if (SYSCTL_FLAGS(scn[4].sysctl_flags) != CTLFLAG_READWRITE) e(0);
+       if ((SYSCTL_FLAGS(scn[4].sysctl_flags) & ~CTLFLAG_PERMANENT) !=
+           CTLFLAG_READWRITE) e(0);
         if (SYSCTL_VERS(scn[4].sysctl_flags) != SYSCTL_VERSION) e(0);
         if (strcmp(scn[4].sysctl_name, "struct")) e(0);
         if (scn[4].sysctl_ver == 0) e(0);
@@ -797,7 +801,7 @@ test87b(void)
  
         if (scn[5].sysctl_num != TEST_PRIVATE) e(0);
         if (SYSCTL_TYPE(scn[5].sysctl_flags) != CTLTYPE_INT) e(0);
-       if (SYSCTL_FLAGS(scn[5].sysctl_flags) !=
+       if ((SYSCTL_FLAGS(scn[5].sysctl_flags) & ~CTLFLAG_PERMANENT) !=
             (CTLFLAG_READWRITE | CTLFLAG_PRIVATE | CTLFLAG_IMMEDIATE)) e(0);
         if (SYSCTL_VERS(scn[5].sysctl_flags) != SYSCTL_VERSION) e(0);
         if (strcmp(scn[5].sysctl_name, "private")) e(0);
@@ -807,7 +811,7 @@ test87b(void)
  
         if (scn[6].sysctl_num != TEST_ANYWRITE) e(0);
         if (SYSCTL_TYPE(scn[6].sysctl_flags) != CTLTYPE_INT) e(0);
-       if (SYSCTL_FLAGS(scn[6].sysctl_flags) !=
+       if ((SYSCTL_FLAGS(scn[6].sysctl_flags) & ~CTLFLAG_PERMANENT) !=
             (CTLFLAG_READWRITE | CTLFLAG_ANYWRITE | CTLFLAG_IMMEDIATE)) e(0);
         if (SYSCTL_VERS(scn[6].sysctl_flags) != SYSCTL_VERSION) e(0);
         if (strcmp(scn[6].sysctl_name, "anywrite")) e(0);
@@ -818,7 +822,7 @@ test87b(void)
  
         if (scn[i].sysctl_num != TEST_SECRET) e(0);
         if (SYSCTL_TYPE(scn[i].sysctl_flags) != CTLTYPE_NODE) e(0);
-       if (SYSCTL_FLAGS(scn[i].sysctl_flags) !=
+       if ((SYSCTL_FLAGS(scn[i].sysctl_flags) & ~CTLFLAG_PERMANENT) !=
             (CTLFLAG_READONLY | CTLFLAG_PRIVATE)) e(0);
         if (SYSCTL_VERS(scn[i].sysctl_flags) != SYSCTL_VERSION) e(0);
         if (strcmp(scn[i].sysctl_name, "secret")) e(0);
@@ -895,7 +899,7 @@ test87b(void)
  
         if (scn[0].sysctl_num != SECRET_VALUE) e(0);
         if (SYSCTL_TYPE(scn[0].sysctl_flags) != CTLTYPE_INT) e(0);
-       if (SYSCTL_FLAGS(scn[0].sysctl_flags) !=
+       if ((SYSCTL_FLAGS(scn[0].sysctl_flags) & ~CTLFLAG_PERMANENT) !=
             (CTLFLAG_READONLY | CTLFLAG_IMMEDIATE)) e(0);
         if (SYSCTL_VERS(scn[0].sysctl_flags) != SYSCTL_VERSION) e(0);
         if (strcmp(scn[0].sysctl_name, "value")) e(0);
diff --git a/minix/tests/testrmib.sh b/minix/tests/testrmib.sh

new file mode 100755 (executable)

index 0000000..3bd8c06
--- /dev/null
+++ b/minix/tests/testrmib.sh
@@ -0,0 +1,74 @@
+#!/bin/sh
+
+# Shell script used to test the Remote MIB (RMIB) functionality.
+
+# We test a couple of things here, using the rmibtest service and test87:
+# - some cases where remote MIB subtree registration should fail;
+# - a new mount point (minix.rtest) with a small tree behind it, on which we
+#   test some basic reads and writes on an integer pointer and a function;
+# - shadowing of an existing subtree (minix.test) with a similarly looking
+#   subtree, which we then subject to a subset of test87;
+# - resource accounting, making sure everything is the same before and after.
+
+bomb() {
+  echo $*
+  service down rmibtest 2>/dev/null
+  exit 1
+}
+
+PATH=/bin:/usr/bin:/sbin:/usr/sbin
+export PATH
+
+echo -n "Test RMIB "
+
+cd rmibtest
+
+sysctl -q minix.rtest && bomb "there should not be a minix.rtest"
+
+old_nodes=`sysctl -n minix.mib.nodes 2>/dev/null` || bomb "no MIB stats?"
+old_objects=`sysctl -n minix.mib.objects 2>/dev/null` || bomb "no MIB stats?"
+old_remotes=`sysctl -n minix.mib.remotes 2>/dev/null` || bomb "no MIB stats?"
+
+service up `pwd`/rmibtest -label rmibtest -config rmibtest.conf || \
+  bomb "unable to start test service"
+
+cd ..
+
+sleep 1
+
+new_remotes=`sysctl -n minix.mib.remotes 2>/dev/null` || \
+  bomb "unable to get mount stats"
+[ $(($old_remotes + 2)) -eq $new_remotes ] || bomb "mounting subtree failed"
+
+# Test the temporary minix.rtest subtree with its two mirroring nodes
+sysctl -q minix.rtest || bomb "there should be a minix.rtest"
+
+[ $(sysctl -n minix.rtest.int) -eq 5375123 ] || bomb "unexpected int value"
+[ $(sysctl -n minix.rtest.func) -eq 5375123 ] || bomb "unexpected func value"
+sysctl -wq minix.rtest.int=456 || bomb "unable to set int value"
+[ $(sysctl -n minix.rtest.int) -eq 456 ] || bomb "unexpected int value"
+[ $(sysctl -n minix.rtest.func) -eq 456 ] || bomb "unexpected func value"
+sysctl -wq minix.rtest.func=7895375 || bomb "unable to set func value"
+[ $(sysctl -n minix.rtest.int) -eq 7895375 ] || bomb "unexpected int value"
+[ $(sysctl -n minix.rtest.func) -eq 7895375 ] || bomb "unexpected func value"
+
+# Test the minix.test shadowing subtree using a subset of the regular MIB test
+./test87 19 >/dev/null || bomb "test87 reported failure"
+
+service down rmibtest
+
+sleep 1
+
+# Is everything back to the old situation?
+new_nodes=`sysctl -n minix.mib.nodes 2>/dev/null` || bomb "no MIB stats?"
+new_objects=`sysctl -n minix.mib.objects 2>/dev/null` || bomb "no MIB stats?"
+new_remotes=`sysctl -n minix.mib.remotes 2>/dev/null` || bomb "no MIB stats?"
+
+[ $old_nodes -eq $new_nodes ] || bomb "stats not equal after unmount"
+[ $old_objects -eq $new_objects ] || bomb "stats not equal after unmount"
+[ $old_remotes -eq $new_remotes ] || bomb "stats not equal after unmount"
+
+sysctl -q minix.rtest && bomb "there should not be a minix.rtest anymore"
+
+echo "ok"
+exit 0
author	David van Moolenbroek <david@minix3.org>
	Sat, 23 Apr 2016 18:07:39 +0000 (18:07 +0000)
committer	David van Moolenbroek <david@minix3.org>
	Sat, 18 Jun 2016 12:46:59 +0000 (12:46 +0000)
distrib/sets/lists/minix-comp/mi		patch \| blob \| history
distrib/sets/lists/minix-tests/mi		patch \| blob \| history
etc/mtree/NetBSD.dist.base		patch \| blob \| history
minix/include/minix/Makefile		patch \| blob \| history
minix/include/minix/com.h		patch \| blob \| history
minix/include/minix/drivers.h		patch \| blob \| history
minix/include/minix/ipc.h		patch \| blob \| history
minix/include/minix/rmib.h	[new file with mode: 0644]	patch \| blob
minix/include/minix/sysctl.h		patch \| blob \| history
minix/kernel/system/do_safecopy.c		patch \| blob \| history
minix/lib/libsys/Makefile		patch \| blob \| history
minix/lib/libsys/rmib.c	[new file with mode: 0644]	patch \| blob
minix/servers/mib/Makefile		patch \| blob \| history
minix/servers/mib/main.c		patch \| blob \| history
minix/servers/mib/mib.h		patch \| blob \| history
minix/servers/mib/minix.c		patch \| blob \| history
minix/servers/mib/remote.c	[new file with mode: 0644]	patch \| blob
minix/servers/mib/tree.c		patch \| blob \| history
minix/tests/Makefile		patch \| blob \| history
minix/tests/rmibtest/Makefile	[new file with mode: 0644]	patch \| blob
minix/tests/rmibtest/rmibtest.c	[new file with mode: 0644]	patch \| blob
minix/tests/rmibtest/rmibtest.conf	[new file with mode: 0644]	patch \| blob
minix/tests/run		patch \| blob \| history
minix/tests/test87.c		patch \| blob \| history
minix/tests/testrmib.sh	[new file with mode: 0755]	patch \| blob