;
};
+service procfs
+{
+ system
+ VIRCOPY # 15
+ ;
+ vm
+ INFO
+ ;
+ uid 0;
+};
+
service isofs
{
system
minix/fslib.h minix/ioctl.h minix/ipc.h minix/ipcconst.h \
minix/keymap.h minix/minlib.h minix/mq.h \
minix/netdriver.h minix/partition.h minix/paths.h \
- minix/portio.h minix/priv.h minix/profile.h minix/queryparam.h \
+ minix/portio.h minix/priv.h minix/procfs.h minix/profile.h \
+ minix/queryparam.h \
minix/rs.h minix/safecopies.h minix/sched.h minix/sef.h minix/sound.h \
minix/spin.h minix/sys_config.h minix/sysinfo.h minix/syslib.h \
minix/sysutil.h minix/timers.h minix/tty.h minix/type.h minix/types.h \
--- /dev/null
+#ifndef _MINIX_PROCFS_H
+#define _MINIX_PROCFS_H
+
+/* The compatibility model is as follows. The current format should be retained
+ * for as long as possible; new fields can be added at the end of the line,
+ * because ps/top only read as much as they know of from the start of the line.
+ * Once fields (really) have to be removed, or the whole line becomes too big
+ * of a mess, a completely new format string can be put in, but with an
+ * increased PSINFO_VERSION at the beginning. That way, older ps/top copies
+ * will not misinterpret the new fields, but rather fail cleanly.
+ */
+#define PSINFO_VERSION 0
+
+/* Process types. */
+#define TYPE_TASK 'T'
+#define TYPE_SYSTEM 'S'
+#define TYPE_USER 'U'
+
+/* General process states. */
+#define STATE_SLEEP 'S'
+#define STATE_WAIT 'W'
+#define STATE_ZOMBIE 'Z'
+#define STATE_RUN 'R'
+#define STATE_STOP 'T'
+
+/* PM sleep states. */
+#define PSTATE_NONE '-'
+#define PSTATE_PAUSED 'P'
+#define PSTATE_WAITING 'W'
+#define PSTATE_SIGSUSP 'S'
+
+/* VFS block states. */
+#define FSTATE_NONE '-'
+#define FSTATE_PIPE 'P'
+#define FSTATE_LOCK 'L'
+#define FSTATE_POPEN 'O'
+#define FSTATE_SELECT 'S'
+#define FSTATE_DOPEN 'D'
+#define FSTATE_TASK 'T'
+#define FSTATE_UNKNOWN '?'
+
+#endif /* _MINIX_PROCFS_H */
.include <bsd.own.mk>
-SUBDIR= ds ext2 hgfs inet init ipc is iso9660fs mfs pfs pm rs sched vfs vm
+SUBDIR= ds ext2 hgfs inet init ipc is iso9660fs \
+ mfs pfs pm procfs rs sched vfs vm
IMAGE_SUBDIR= ds init mfs pfs pm rs sched vfs vm
--- /dev/null
+# Makefile for ProcFS server
+PROG= procfs
+SRCS= buf.c main.c pid.c root.c tree.c util.c
+
+CPPFLAGS+= -I${MINIXSRCDIR} -I${MINIXSRCDIR}/servers
+
+DPADD+= ${LIBVTREEFS} ${LIBSYS}
+LDADD+= -lvtreefs -lsys
+
+MAN=
+
+BINDIR?= /sbin
+
+.include <bsd.prog.mk>
--- /dev/null
+Development notes regarding ProcFS. Original document by David van Moolenbroek.
+
+
+SECURITY MODEL
+
+Right now, procfs is not able to deal with security-sensitive information,
+because there would be too many opportunities for rogue processes to obtain
+values they shouldn't be able to get to. This is mainly due to the fact that
+while procfs is running, the environment around it may change arbitrarily: for
+example, a /proc/<pid>/mem file could offer access to a process's core memory,
+but if a rogue process opened that file right before the victim process invokes
+an exec() on a setuid binary, the rogue process could read from the victim
+process's memory while a victim user provides this process with their password.
+This is only one example out of many; such time-to-check/time-to-use race
+conditions are inherent to the inherently race-prone situation that procfs
+finds itself in, trying to provide information about an asynchronously running
+system.
+
+A little more specifically, this problem mainly comes up when system calls are
+made to obtain information (long) after a certain PID directory has been
+updated, which typically happens right after pulling in a new copy of the
+process tables of the kernel, PM, and VFS. Returning stale information from
+those tables is usually not a problem: at worst, the caller gets outdated
+information about the system as it once was, after passing a security check for
+that point in time. Hence, it can not obtain information it never had access
+to. Using information from those tables to perform calls later, however, is
+a different case. In the "mem" example above, procfs would have the old user ID
+in its copy of the process tables, and yet perform on-demand sys_datacopy calls
+(or something similar) to retrieve memory from the process, bypassing a check
+on the then-current user ID. A similar situation already exists right now for
+the /proc/<pid>/map file for example, which pulls in information on demand -
+but it provides only public information anyway, just like the other files that
+procfs currently exposes.
+
+A proper solution to this problem has simply not been implemented yet. It is
+possible to change the system in such a way that procfs check whether the
+target process is still in the same security state before returning information
+to the caller process. This can be done either while or after obtaining the
+information, depending on what is most convenient for the design of the system.
+Any such solution obviously has an impact on system design and procfs'
+performance, and was found not worth implementing for the first version of
+procfs, since all offered information was public anyway. However, such a change
+*must* be made before procfs can expose anything that provides a potential for
+security breaches.
+
+Finally, it must be kept in mind that even updating the process tables from
+various other sources is not an atomic operation. There might be mismatches
+between the tables. Procfs must be able to handle such occurrences with care,
+from both a security perspective and a general functionality perspective.
+
+
+FUTURE EXPANSIONS
+
+It would be trivial to add a /proc/self symlink pointing to the caller's PID
+directory, if the VFS-FS protocol's REQ_RDLINK request were augmented to
+include the caller's PID or endpoint. However, this would be a procfs-specific
+protocol change, and there does not seem to be a need for this just yet.
+
+Even more custom protocol changes or procfs-specific backcalls would have to be
+added to expose processes' current working directory, root directory,
+executable path, or open files. A number of VFS parts would have to be changed
+significantly to fully support all of these, possibly including an entire DNLC.
+
+All the necessary infrastructure is there to add static (sub)directories - for
+example, a /proc/net/ directory. It would be more tricky to add subdirectories
+for dynamic (process) directories, for example /proc/<pid>/fd/. This would
+require some changes to the VTreeFS side of the tree management. Some of the
+current assumptions are documented in type.h.
--- /dev/null
+/* ProcFS - buf.c - by Alen Stojanov and David van Moolenbroek */
+
+#include "inc.h"
+#include <stdarg.h>
+
+#define BUF_SIZE 4096
+
+PRIVATE char buf[BUF_SIZE + 1];
+PRIVATE size_t off, left, used;
+PRIVATE off_t skip;
+
+/*===========================================================================*
+ * buf_init *
+ *===========================================================================*/
+PUBLIC void buf_init(off_t start, size_t len)
+{
+ /* Initialize the buffer for fresh use. The first 'start' bytes of the
+ * produced output are to be skipped. After that, up to a total of
+ * 'len' bytes are requested.
+ */
+
+ skip = start;
+ left = MIN(len, BUF_SIZE);
+ off = 0;
+ used = 0;
+}
+
+/*===========================================================================*
+ * buf_printf *
+ *===========================================================================*/
+PUBLIC void buf_printf(char *fmt, ...)
+{
+ /* Add formatted text to the end of the buffer.
+ */
+ va_list args;
+ ssize_t len, max;
+
+ if (left == 0)
+ return;
+
+ /* There is no way to estimate how much space the result will take, so
+ * we need to produce the string even when skipping part of the start.
+ * If part of the result is to be skipped, do not memcpy; instead, save
+ * the offset of where the result starts within the buffer.
+ *
+ * The null terminating character is not part of the result, so room
+ * must be given for it to be stored after completely filling up the
+ * requested part of the buffer.
+ */
+ max = MIN(skip + left, BUF_SIZE);
+
+ va_start(args, fmt);
+ len = vsnprintf(&buf[off + used], max + 1, fmt, args);
+ va_end(args);
+
+ if (skip > 0) {
+ assert(off == 0);
+ assert(used == 0);
+
+ if (skip >= len) {
+ skip -= len;
+
+ return;
+ }
+
+ off = skip;
+ if (left > BUF_SIZE - off)
+ left = BUF_SIZE - off;
+ len -= off;
+ skip = 0;
+ }
+
+ assert(skip == 0);
+ assert(len >= 0);
+ assert((long) left >= 0);
+
+ if (len > (ssize_t) left)
+ len = left;
+
+ used += len;
+ left -= len;
+}
+
+/*===========================================================================*
+ * buf_append *
+ *===========================================================================*/
+PUBLIC void buf_append(char *data, size_t len)
+{
+ /* Add arbitrary data to the end of the buffer.
+ */
+
+ if (left == 0)
+ return;
+
+ if (skip > 0) {
+ if (skip >= (ssize_t) len) {
+ skip -= len;
+
+ return;
+ }
+
+ data += skip;
+ len -= skip;
+ skip = 0;
+ }
+
+ if (len > left)
+ len = left;
+
+ memcpy(&buf[off + used], data, len);
+
+ used += len;
+ left -= len;
+}
+
+/*===========================================================================*
+ * buf_get *
+ *===========================================================================*/
+PUBLIC size_t buf_get(char **ptr)
+{
+ /* Return the buffer's starting address and the length of the used
+ * part, not counting the trailing null character for the latter.
+ */
+
+ *ptr = &buf[off];
+
+ return used;
+}
--- /dev/null
+#ifndef _PROCFS_CONST_H
+#define _PROCFS_CONST_H
+
+/* The minimum number of inodes depends on a number of factors:
+ * - Each statically created inode (e.g., /proc/hz) needs an inode. As of
+ * writing, this requires about a dozen inodes.
+ * - Deleted inodes that are still in use by VFS must be retained. For deleted
+ * directories, all their containing directories up to the root must be
+ * retained as well (to allow the user to "cd .." out). VTreeFS already takes
+ * care of this. In the case of ProcFS, only PID-based directories can be
+ * deleted; no other directories are dynamically created. These directories
+ * currently do not contain subdirectories, either. Hence, for deleted open
+ * inodes, we need to reserve at most NR_VNODES inodes in the worst case.
+ * - In order for getdents to be able to return all PID-based directories,
+ * inodes must not be recycled while generating the list of these PID-based
+ * directories. In the worst case, this means (NR_TASKS + NR_PROCS) extra
+ * inodes.
+ * The sum of these is the bare minimum for correct operation in all possible
+ * circumstances. In practice, not all open files will be deleted files in
+ * ProcFS, and not all process slots will be in use either, so the average use
+ * will be a lot less. However, setting the value too low allows for a
+ * potential denial-of-service attack by a non-root user.
+ *
+ * For the moment, we simply set this value to something reasonable.
+ */
+#define NR_INODES ((NR_TASKS + NR_PROCS) * 4)
+
+/* Various file modes. */
+#define REG_ALL_MODE (S_IFREG | 0444) /* world-readable regular */
+#define DIR_ALL_MODE (S_IFDIR | 0555) /* world-accessible directory */
+#define LNK_ALL_MODE (S_IFLNK | 0777) /* symbolic link */
+
+#endif /* _PROCFS_CONST_H */
--- /dev/null
+#ifndef _PROCFS_GLO_H
+#define _PROCFS_GLO_H
+
+/* pid.c */
+extern struct file pid_files[];
+
+/* root.c */
+extern struct file root_files[];
+
+/* tree.c */
+extern struct proc proc[NR_PROCS + NR_TASKS]; /* process table from kernel */
+extern struct mproc mproc[NR_PROCS]; /* process table from PM */
+extern struct fproc fproc[NR_PROCS]; /* process table from VFS */
+
+#endif /* _PROCFS_GLO_H */
--- /dev/null
+#ifndef _PROCFS_INC_H
+#define _PROCFS_INC_H
+
+#define _POSIX_SOURCE 1
+#define _MINIX 1
+#define _SYSTEM 1
+
+#include <minix/config.h>
+#include <ansi.h>
+#include <limits.h>
+#include <errno.h>
+#include <signal.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <lib.h>
+#include <timers.h>
+#include <a.out.h>
+#include <dirent.h>
+
+#include <minix/callnr.h>
+#include <minix/type.h>
+#include <minix/const.h>
+#include <minix/com.h>
+#include <minix/syslib.h>
+#include <minix/sysutil.h>
+#include <minix/keymap.h>
+#include <minix/bitmap.h>
+#include <minix/vfsif.h>
+#include <minix/endpoint.h>
+#include <minix/sysinfo.h>
+#include <minix/u64.h>
+#include <minix/sysinfo.h>
+#include <minix/type.h>
+#include <minix/ipc.h>
+
+#include <sys/utsname.h>
+#include <sys/time.h>
+#include <sys/times.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+
+#include <machine/archtypes.h>
+#include "kernel/const.h"
+#include "kernel/type.h"
+#include "kernel/proc.h"
+#include "pm/mproc.h"
+#include "vfs/const.h"
+#include "vfs/fproc.h"
+
+#include <minix/vtreefs.h>
+#include <minix/procfs.h>
+
+#include "const.h"
+#include "type.h"
+#include "proto.h"
+#include "glo.h"
+
+#endif /* _PROCFS_INC_H */
--- /dev/null
+/* ProcFS - main.c - by Alen Stojanov and David van Moolenbroek */
+
+#include "inc.h"
+
+FORWARD _PROTOTYPE( void init_hook, (void) );
+
+/* The hook functions that will be called by VTreeFS. */
+PRIVATE struct fs_hooks hooks = {
+ init_hook,
+ NULL, /* cleanup_hook */
+ lookup_hook,
+ getdents_hook,
+ read_hook,
+ rdlink_hook,
+ NULL /* message_hook */
+};
+
+/*===========================================================================*
+ * construct_tree *
+ *===========================================================================*/
+PRIVATE void construct_tree(struct inode *dir, struct file *files)
+{
+ /* Construct a tree of static files from a null-terminated array of
+ * file structures, recursively creating directories which have their
+ * associated data point to child file structures.
+ */
+ struct file *file;
+ struct inode *node;
+ struct inode_stat stat;
+
+ stat.uid = SUPER_USER;
+ stat.gid = SUPER_USER;
+ stat.size = 0;
+ stat.dev = NO_DEV;
+
+ for (file = files; file->name != NULL; file++) {
+ stat.mode = file->mode;
+
+ node = add_inode(dir, file->name, NO_INDEX, &stat, (index_t) 0,
+ (cbdata_t) file->data);
+
+ assert(node != NULL);
+
+ if (S_ISDIR(file->mode))
+ construct_tree(node, (struct file *) file->data);
+ }
+}
+
+/*===========================================================================*
+ * init_hook *
+ *===========================================================================*/
+PRIVATE void init_hook(void)
+{
+ /* Initialization hook. Generate the static part of the tree.
+ */
+ struct inode *root;
+
+ root = get_root_inode();
+
+ construct_tree(root, root_files);
+}
+
+/*===========================================================================*
+ * main *
+ *===========================================================================*/
+PUBLIC int main(int argc, char *argv[])
+{
+ /* ProcFS entry point.
+ */
+ struct inode_stat stat;
+ int r;
+
+ /* Initialize some state. If we are incompatible with the kernel, exit
+ * immediately.
+ */
+ if ((r = init_tree()) != OK)
+ return r;
+
+ /* Properties of the root directory. */
+ stat.mode = DIR_ALL_MODE;
+ stat.uid = SUPER_USER;
+ stat.gid = SUPER_USER;
+ stat.size = 0;
+ stat.dev = NO_DEV;
+
+ /* Start VTreeFS. This call does not return. */
+ start_vtreefs(&hooks, NR_INODES, &stat, NR_PROCS + NR_TASKS);
+
+ return 0;
+}
--- /dev/null
+/* ProcFS - pid.c - by Alen Stojanov and David van Moolenbroek */
+
+#include "inc.h"
+
+#include <sys/mman.h>
+#include <minix/vm.h>
+
+#define S_FRAME_SIZE 4096 /* use malloc if larger than this */
+PRIVATE char s_frame[S_FRAME_SIZE]; /* static storage for process frame */
+PRIVATE char *frame; /* pointer to process frame buffer */
+
+FORWARD _PROTOTYPE( void pid_psinfo, (int slot) );
+FORWARD _PROTOTYPE( void pid_cmdline, (int slot) );
+FORWARD _PROTOTYPE( void pid_environ, (int slot) );
+FORWARD _PROTOTYPE( void pid_map, (int slot) );
+
+/* The files that are dynamically created in each PID directory. The data field
+ * contains each file's read function. Subdirectories are not yet supported.
+ */
+PUBLIC struct file pid_files[] = {
+ { "psinfo", REG_ALL_MODE, (data_t) pid_psinfo },
+ { "cmdline", REG_ALL_MODE, (data_t) pid_cmdline },
+ { "environ", REG_ALL_MODE, (data_t) pid_environ },
+ { "map", REG_ALL_MODE, (data_t) pid_map },
+ { NULL, 0, (data_t) NULL }
+};
+
+/*===========================================================================*
+ * is_zombie *
+ *===========================================================================*/
+PRIVATE int is_zombie(int slot)
+{
+ /* Is the given slot a zombie process?
+ */
+
+ return (slot >= NR_TASKS &&
+ (mproc[slot - NR_TASKS].mp_flags & (TRACE_ZOMBIE | ZOMBIE)));
+}
+
+/*===========================================================================*
+ * pid_psinfo *
+ *===========================================================================*/
+PRIVATE void pid_psinfo(int i)
+{
+ /* Print information used by ps(1) and top(1).
+ */
+ int pi, task, state, type, p_state, f_state;
+ char name[PROC_NAME_LEN+1], *p;
+ struct vm_usage_info vui;
+ pid_t ppid;
+
+ pi = i - NR_TASKS;
+ task = proc[i].p_nr < 0;
+
+ /* Get the name of the process. Spaces would mess up the format.. */
+ if (task || mproc[i].mp_name[0] == 0)
+ strncpy(name, proc[i].p_name, sizeof(name) - 1);
+ else
+ strncpy(name, mproc[pi].mp_name, sizeof(name) - 1);
+ name[sizeof(name) - 1] = 0;
+ if ((p = strchr(name, ' ')) != NULL)
+ p[0] = 0;
+
+ /* Get the type of the process. */
+ if (task)
+ type = TYPE_TASK;
+ else if (mproc[i].mp_flags & PRIV_PROC)
+ type = TYPE_SYSTEM;
+ else
+ type = TYPE_USER;
+
+ /* Get the state of the process. */
+ if (!task) {
+ if (is_zombie(i))
+ state = STATE_ZOMBIE; /* zombie */
+ else if (mproc[pi].mp_flags & STOPPED)
+ state = STATE_STOP; /* stopped (traced) */
+ else if (proc[i].p_rts_flags == 0)
+ state = STATE_RUN; /* in run-queue */
+ else if (fp_is_blocked(&fproc[pi]) ||
+ (mproc[pi].mp_flags & (WAITING | PAUSED | SIGSUSPENDED)))
+ state = STATE_SLEEP; /* sleeping */
+ else
+ state = STATE_WAIT; /* waiting */
+ } else {
+ if (proc[i].p_rts_flags == 0)
+ state = STATE_RUN; /* in run-queue */
+ else
+ state = STATE_WAIT; /* other i.e. waiting */
+ }
+
+ /* We assume that even if a process has become a zombie, its kernel
+ * proc entry still contains the old (but valid) information. Currently
+ * this is true, but in the future we may have to filter some fields.
+ */
+ buf_printf("%d %c %d %s %c %d %d %lu %lu %lu %lu",
+ PSINFO_VERSION, /* information version */
+ type, /* process type */
+ (int) proc[i].p_endpoint, /* process endpoint */
+ name, /* process name */
+ state, /* process state letter */
+ (int) P_BLOCKEDON(&proc[i]), /* endpt blocked on, or NONE */
+ (int) proc[i].p_priority, /* process priority */
+ (long) proc[i].p_user_time, /* user time */
+ (long) proc[i].p_sys_time, /* system time */
+ ex64hi(proc[i].p_cycles), /* execution cycles */
+ ex64lo(proc[i].p_cycles)
+ );
+
+ /* If the process is not a kernel task, we add some extra info. */
+ if (!task) {
+ memset(&vui, 0, sizeof(vui));
+
+ if (!is_zombie(i)) {
+ /* We don't care if this fails. It may still return
+ * zero memory usage for processes that don't have a
+ * pagetable, though. Look at vui_total instead.
+ */
+ (void) vm_info_usage(proc[i].p_endpoint, &vui);
+
+ if (vui.vui_total == 0L) {
+ vui.vui_total =
+ (proc[i].p_memmap[T].mem_len +
+ proc[i].p_memmap[D].mem_len) <<
+ CLICK_SHIFT;
+ }
+ }
+
+ if (mproc[pi].mp_flags & PAUSED)
+ p_state = PSTATE_PAUSED;
+ else if (mproc[pi].mp_flags & WAITING)
+ p_state = PSTATE_WAITING;
+ else if (mproc[pi].mp_flags & SIGSUSPENDED)
+ p_state = PSTATE_SIGSUSP;
+ else
+ p_state = '-';
+
+ if (mproc[pi].mp_parent == pi)
+ ppid = NO_PID;
+ else
+ ppid = mproc[mproc[pi].mp_parent].mp_pid;
+
+ switch (fproc[pi].fp_blocked_on) {
+ case FP_BLOCKED_ON_NONE: f_state = FSTATE_NONE; break;
+ case FP_BLOCKED_ON_PIPE: f_state = FSTATE_PIPE; break;
+ case FP_BLOCKED_ON_LOCK: f_state = FSTATE_LOCK; break;
+ case FP_BLOCKED_ON_POPEN: f_state = FSTATE_POPEN; break;
+ case FP_BLOCKED_ON_SELECT: f_state = FSTATE_SELECT; break;
+ case FP_BLOCKED_ON_DOPEN: f_state = FSTATE_DOPEN; break;
+ case FP_BLOCKED_ON_OTHER: f_state = FSTATE_TASK; break;
+ default: f_state = FSTATE_UNKNOWN;
+ }
+
+ buf_printf(" %lu %lu %lu %c %d %u %u %u %d %c %d %u",
+ vui.vui_total, /* total memory */
+ vui.vui_common, /* common memory */
+ vui.vui_shared, /* shared memory */
+ p_state, /* sleep state */
+ ppid, /* parent PID */
+ mproc[pi].mp_realuid, /* real UID */
+ mproc[pi].mp_effuid, /* effective UID */
+ mproc[pi].mp_procgrp, /* process group */
+ mproc[pi].mp_nice, /* nice value */
+ f_state, /* VFS block state */
+ (int) (fproc[pi].fp_blocked_on == FP_BLOCKED_ON_OTHER)
+ ? fproc[pi].fp_task : NONE, /* block proc */
+ fproc[pi].fp_tty /* controlling tty */
+ );
+ }
+
+ /* Newline at the end of the file. */
+ buf_printf("\n");
+}
+
+/*===========================================================================*
+ * put_frame *
+ *===========================================================================*/
+PRIVATE void put_frame(void)
+{
+ /* If we allocated memory dynamically during a call to get_frame(),
+ * free it up here.
+ */
+
+ if (frame != s_frame)
+ free(frame);
+}
+
+/*===========================================================================*
+ * get_frame *
+ *===========================================================================*/
+PRIVATE int get_frame(int slot, vir_bytes *basep, vir_bytes *sizep,
+ size_t *nargsp)
+{
+ /* Get the execution frame from the top of the given process's stack.
+ * It may be very large, in which case we temporarily allocate memory
+ * for it (up to a certain size).
+ */
+ vir_bytes base, size;
+ size_t nargs;
+
+ if (proc[slot].p_nr < 0 || is_zombie(slot))
+ return FALSE;
+
+ /* Get the frame base address and size. Limit the size to whatever we
+ * can handle. If our static buffer is not sufficiently large to store
+ * the entire frame, allocate memory dynamically. It is then later
+ * freed by put_frame().
+ */
+ base = mproc[slot - NR_TASKS].mp_frame_addr;
+ size = mproc[slot - NR_TASKS].mp_frame_len;
+
+ if (size < sizeof(size_t)) return FALSE;
+
+ if (size > ARG_MAX) size = ARG_MAX;
+
+ if (size > sizeof(s_frame)) {
+ frame = malloc(size);
+
+ if (frame == NULL)
+ return FALSE;
+ }
+ else frame = s_frame;
+
+ /* Copy in the complete process frame. */
+ if (sys_datacopy(proc[slot].p_endpoint, base,
+ SELF, (vir_bytes) frame, (phys_bytes) size) != OK) {
+ put_frame();
+
+ return FALSE;
+ }
+
+ frame[size] = 0; /* terminate any last string */
+
+ nargs = * (size_t *) frame;
+ if (nargs < 1 || sizeof(size_t) + sizeof(char *) * (nargs + 1) > size) {
+ put_frame();
+
+ return FALSE;
+ }
+
+ *basep = base;
+ *sizep = size;
+ *nargsp = nargs;
+
+ /* The caller now has to called put_frame() to clean up. */
+ return TRUE;
+}
+
+/*===========================================================================*
+ * pid_cmdline *
+ *===========================================================================*/
+PRIVATE void pid_cmdline(int slot)
+{
+ /* Dump the process's command line as it is contained in the process
+ * itself. Each argument is terminated with a null character.
+ */
+ vir_bytes base, size, ptr;
+ size_t i, len, nargs;
+ char **argv;
+
+ if (!get_frame(slot, &base, &size, &nargs))
+ return;
+
+ argv = (char **) &frame[sizeof(size_t)];
+
+ for (i = 0; i < nargs; i++) {
+ ptr = (vir_bytes) argv[i] - base;
+
+ /* Check for bad pointers. */
+ if ((long) ptr < 0L || ptr >= size)
+ break;
+
+ len = strlen(&frame[ptr]) + 1;
+
+ buf_append(&frame[ptr], len);
+ }
+
+ put_frame();
+}
+
+/*===========================================================================*
+ * pid_environ *
+ *===========================================================================*/
+PRIVATE void pid_environ(int slot)
+{
+ /* Dump the process's initial environment as it is contained in the
+ * process itself. Each entry is terminated with a null character.
+ */
+ vir_bytes base, size, ptr;
+ size_t nargs, off, len;
+ char **envp;
+
+ if (!get_frame(slot, &base, &size, &nargs))
+ return;
+
+ off = sizeof(size_t) + sizeof(char *) * (nargs + 1);
+ envp = (char **) &frame[off];
+
+ for (;;) {
+ /* Make sure there is no buffer overrun. */
+ if (off + sizeof(char *) > size)
+ break;
+
+ ptr = (vir_bytes) *envp;
+
+ /* Stop at the terminating NULL pointer. */
+ if (ptr == 0L)
+ break;
+
+ ptr -= base;
+
+ /* Check for bad pointers. */
+ if ((long) ptr < 0L || ptr >= size)
+ break;
+
+ len = strlen(&frame[ptr]) + 1;
+
+ buf_append(&frame[ptr], len);
+
+ off += sizeof(char *);
+ envp++;
+ }
+
+ put_frame();
+}
+
+/*===========================================================================*
+ * dump_regions *
+ *===========================================================================*/
+PRIVATE int dump_regions(int slot)
+{
+ /* Print the virtual memory regions of a process.
+ */
+ struct vm_region_info vri[MAX_VRI_COUNT];
+ vir_bytes next;
+ int i, r, seg, count;
+
+ count = 0;
+ next = 0;
+
+ do {
+ r = vm_info_region(proc[slot].p_endpoint, vri, MAX_VRI_COUNT,
+ &next);
+
+ if (r < 0)
+ return r;
+
+ if (r == 0)
+ break;
+
+ for (i = 0; i < r; i++) {
+ switch (vri[i].vri_seg) {
+ case T: seg = 'T'; break;
+ case D: seg = 'D'; break;
+ default: seg = '?'; break;
+ }
+
+ buf_printf("%c %08lx-%08lx %c%c%c %c\n",
+ seg, vri[i].vri_addr,
+ vri[i].vri_addr + vri[i].vri_length,
+ (vri[i].vri_prot & PROT_READ) ? 'r' : '-',
+ (vri[i].vri_prot & PROT_WRITE) ? 'w' : '-',
+ (vri[i].vri_prot & PROT_EXEC) ? 'x' : '-',
+ (vri[i].vri_flags & MAP_SHARED) ? 's' : 'p');
+
+ count++;
+ }
+ } while (r == MAX_VRI_COUNT);
+
+ return count;
+}
+
+/*===========================================================================*
+ * dump_segments *
+ *===========================================================================*/
+PRIVATE void dump_segments(int slot)
+{
+ /* Print the memory segments of a process.
+ */
+ int i;
+
+ for (i = 0; i < NR_LOCAL_SEGS; i++) {
+ buf_printf("%c %08lx-%08lx %s -\n",
+ i == T ? 'T' : 'D',
+ proc[slot].p_memmap[i].mem_vir << CLICK_SHIFT,
+ (proc[slot].p_memmap[i].mem_vir +
+ proc[slot].p_memmap[i].mem_len) << CLICK_SHIFT,
+ (i == T) ? "r-x" :
+ (proc[slot].p_memmap[T].mem_len == 0) ? "rwx" : "rw-");
+ }
+}
+
+/*===========================================================================*
+ * pid_map *
+ *===========================================================================*/
+PRIVATE void pid_map(int slot)
+{
+ /* Print a memory map of the process. Obtain the information from VM if
+ * possible; otherwise fall back on segments from the kernel.
+ */
+
+ /* Zombies have no memory. */
+ if (is_zombie(slot))
+ return;
+
+ /* Kernel tasks also have no memory. */
+ if (proc[slot].p_nr >= 0) {
+ if (dump_regions(slot) != 0)
+ return;
+ }
+
+ /* For kernel tasks, or for processes that have no regions according to
+ * VM, we assume they are not using virtual memory, and we print their
+ * segments instead.
+ */
+ dump_segments(slot);
+}
--- /dev/null
+#ifndef _PROCFS_PROTO_H
+#define _PROCFS_PROTO_H
+
+/* buf.c */
+_PROTOTYPE( void buf_init, (off_t start, size_t len) );
+_PROTOTYPE( void buf_printf, (char *fmt, ...) );
+_PROTOTYPE( void buf_append, (char *data, size_t len) );
+_PROTOTYPE( size_t buf_get, (char **ptr) );
+
+/* tree.c */
+_PROTOTYPE( int init_tree, (void) );
+_PROTOTYPE( int lookup_hook, (struct inode *parent, char *name,
+ cbdata_t cbdata) );
+_PROTOTYPE( int getdents_hook, (struct inode *inode, cbdata_t cbdata) );
+_PROTOTYPE( int read_hook, (struct inode *inode, off_t offset,
+ char **ptr, size_t *len, cbdata_t cbdata) );
+_PROTOTYPE( int rdlink_hook, (struct inode *inode, char *ptr,
+ size_t max, cbdata_t cbdata) );
+
+/* util.c */
+_PROTOTYPE( int procfs_getloadavg, (double *loadavg, int nelem) );
+
+#endif /* _PROCFS_PROTO_H */
--- /dev/null
+/* ProcFS - root.c - by Alen Stojanov and David van Moolenbroek */
+
+#include "inc.h"
+#include <machine/pci.h>
+
+FORWARD _PROTOTYPE( void root_hz, (void) );
+FORWARD _PROTOTYPE( void root_uptime, (void) );
+FORWARD _PROTOTYPE( void root_loadavg, (void) );
+FORWARD _PROTOTYPE( void root_kinfo, (void) );
+FORWARD _PROTOTYPE( void root_meminfo, (void) );
+FORWARD _PROTOTYPE( void root_pci, (void) );
+
+struct file root_files[] = {
+ { "hz", REG_ALL_MODE, (data_t) root_hz },
+ { "uptime", REG_ALL_MODE, (data_t) root_uptime },
+ { "loadavg", REG_ALL_MODE, (data_t) root_loadavg },
+ { "kinfo", REG_ALL_MODE, (data_t) root_kinfo },
+ { "meminfo", REG_ALL_MODE, (data_t) root_meminfo },
+ { "pci", REG_ALL_MODE, (data_t) root_pci },
+ { NULL, 0, NULL }
+};
+
+/*===========================================================================*
+ * root_hz *
+ *===========================================================================*/
+PRIVATE void root_hz(void)
+{
+ /* Print the system clock frequency.
+ */
+
+ buf_printf("%lu\n", (long) sys_hz());
+}
+
+/*===========================================================================*
+ * root_loadavg *
+ *===========================================================================*/
+PRIVATE void root_loadavg(void)
+{
+ /* Print load averages.
+ */
+ double avg[3];
+
+ if (procfs_getloadavg(avg, 3) != 3)
+ return;
+
+ buf_printf("%.2lf %.2lf %.2lf\n", avg[0], avg[1], avg[2]);
+}
+
+/*===========================================================================*
+ * root_uptime *
+ *===========================================================================*/
+PRIVATE void root_uptime(void)
+{
+ /* Print the current uptime.
+ */
+ clock_t ticks;
+
+ if (getuptime(&ticks) != OK)
+ return;
+
+ buf_printf("%.2lf\n", (double) ticks / (double) sys_hz());
+}
+
+/*===========================================================================*
+ * root_kinfo *
+ *===========================================================================*/
+PRIVATE void root_kinfo(void)
+{
+ /* Print general kernel information.
+ */
+ struct kinfo kinfo;
+
+ if (sys_getkinfo(&kinfo) != OK)
+ return;
+
+ buf_printf("%u %u\n", kinfo.nr_procs, kinfo.nr_tasks);
+}
+
+/*===========================================================================*
+ * root_meminfo *
+ *===========================================================================*/
+PRIVATE void root_meminfo(void)
+{
+ /* Print general memory information.
+ */
+ struct vm_stats_info vsi;
+
+ if (vm_info_stats(&vsi) != OK)
+ return;
+
+ buf_printf("%u %lu %lu %lu %lu\n", vsi.vsi_pagesize,
+ vsi.vsi_total, vsi.vsi_free, vsi.vsi_largest, vsi.vsi_cached);
+}
+
+/*===========================================================================*
+ * root_pci *
+ *===========================================================================*/
+PRIVATE void root_pci(void)
+{
+ /* Print information about PCI devices present in the system.
+ */
+ u16_t vid, did;
+ u8_t bcr, scr, pifr;
+ char *slot_name, *dev_name;
+ int r, devind;
+ static int first = TRUE;
+
+ /* This should be taken care of behind the scenes by the PCI lib. */
+ if (first) {
+ pci_init();
+ first = FALSE;
+ }
+
+ /* Iterate over all devices, printing info for each of them. */
+ r = pci_first_dev(&devind, &vid, &did);
+ while (r == 1) {
+ slot_name = pci_slot_name(devind);
+ dev_name = pci_dev_name(vid, did);
+
+ bcr = pci_attr_r8(devind, PCI_BCR);
+ scr = pci_attr_r8(devind, PCI_SCR);
+ pifr = pci_attr_r8(devind, PCI_PIFR);
+
+ buf_printf("%s %x/%x/%x %04X:%04X %s\n",
+ slot_name ? slot_name : "-",
+ bcr, scr, pifr, vid, did,
+ dev_name ? dev_name : "");
+
+ r = pci_next_dev(&devind, &vid, &did);
+ }
+}
--- /dev/null
+/* ProcFS - tree.c - by Alen Stojanov and David van Moolenbroek */
+
+#include "inc.h"
+
+PUBLIC struct proc proc[NR_PROCS + NR_TASKS];
+PUBLIC struct mproc mproc[NR_PROCS];
+PUBLIC struct fproc fproc[NR_PROCS];
+
+PRIVATE int nr_pid_entries;
+
+/*===========================================================================*
+ * slot_in_use *
+ *===========================================================================*/
+PRIVATE int slot_in_use(int slot)
+{
+ /* Return whether the given slot is in use by a process.
+ */
+
+ return (proc[slot].p_rts_flags != RTS_SLOT_FREE ||
+ (slot >= NR_TASKS &&
+ (mproc[slot - NR_TASKS].mp_flags & IN_USE)));
+}
+
+/*===========================================================================*
+ * check_owner *
+ *===========================================================================*/
+PRIVATE int check_owner(struct inode *node, int slot)
+{
+ /* Check if the owner user and group ID of the inode are still in sync
+ * the current effective user and group ID of the given process.
+ */
+ struct inode_stat stat;
+
+ if (slot < NR_TASKS) return TRUE;
+
+ get_inode_stat(node, &stat);
+
+ return (stat.uid == mproc[slot - NR_TASKS].mp_effuid &&
+ stat.gid == mproc[slot - NR_TASKS].mp_effgid);
+}
+
+/*===========================================================================*
+ * make_stat *
+ *===========================================================================*/
+PRIVATE void make_stat(struct inode_stat *stat, int slot, int index)
+{
+ /* Fill in an inode_stat structure for the given process slot and
+ * per-pid file index (or NO_INDEX for the process subdirectory root).
+ */
+
+ if (index == NO_INDEX)
+ stat->mode = DIR_ALL_MODE;
+ else
+ stat->mode = pid_files[index].mode;
+
+ if (slot < NR_TASKS) {
+ stat->uid = SUPER_USER;
+ stat->gid = SUPER_USER;
+ } else {
+ stat->uid = mproc[slot - NR_TASKS].mp_effuid;
+ stat->gid = mproc[slot - NR_TASKS].mp_effgid;
+ }
+
+ stat->size = 0;
+ stat->dev = NO_DEV;
+}
+
+/*===========================================================================*
+ * dir_is_pid *
+ *===========================================================================*/
+PRIVATE int dir_is_pid(struct inode *node)
+{
+ /* Return whether the given node is a PID directory.
+ */
+
+ return (get_parent_inode(node) == get_root_inode() &&
+ get_inode_index(node) != NO_INDEX);
+}
+
+/*===========================================================================*
+ * update_tables *
+ *===========================================================================*/
+PRIVATE int update_tables(void)
+{
+ /* Get the process tables from the kernel, PM, and VFS.
+ * Check the magic number in the kernel table entries.
+ */
+ int r, slot;
+
+ if ((r = sys_getproctab(proc)) != OK) return r;
+
+ for (slot = 0; slot < NR_PROCS + NR_TASKS; slot++) {
+ if (proc[slot].p_magic != PMAGIC) {
+ printf("PROCFS: system version mismatch!\n");
+
+ return EINVAL;
+ }
+ }
+
+ if ((r = getsysinfo(PM_PROC_NR, SI_PROC_TAB, mproc)) != OK) return r;
+
+ if ((r = getsysinfo(VFS_PROC_NR, SI_PROC_TAB, fproc)) != OK) return r;
+
+ return OK;
+}
+
+/*===========================================================================*
+ * init_tree *
+ *===========================================================================*/
+PUBLIC int init_tree(void)
+{
+ /* Initialize this module, before VTreeFS is started. As part of the
+ * process, check if we're not compiled against a kernel different from
+ * the one that is running at the moment.
+ */
+ int i, r;
+
+ if ((r = update_tables()) != OK)
+ return r;
+
+ /* Get the maximum number of entries that we may add to each PID's
+ * directory. We could just default to a large value, but why not get
+ * it right?
+ */
+ for (i = 0; pid_files[i].name != NULL; i++);
+
+ nr_pid_entries = i;
+
+ return OK;
+}
+
+/*===========================================================================*
+ * out_of_inodes *
+ *===========================================================================*/
+PRIVATE void out_of_inodes(void)
+{
+ /* Out of inodes - the NR_INODES value is set too low. We can not do
+ * much, but we might be able to continue with degraded functionality,
+ * so do not panic. If the NR_INODES value is not below the *crucial*
+ * minimum, the symptom of this case will be an incomplete listing of
+ * the main proc directory.
+ */
+ static int warned = FALSE;
+
+ if (warned == FALSE) {
+ printf("PROCFS: out of inodes!\n");
+
+ warned = TRUE;
+ }
+}
+
+/*===========================================================================*
+ * construct_pid_dirs *
+ *===========================================================================*/
+PRIVATE void construct_pid_dirs(void)
+{
+ /* Regenerate the set of PID directories in the root directory of the
+ * file system. Add new directories and delete old directories as
+ * appropriate; leave unchanged those that should remain the same.
+ */
+ struct inode *root, *node;
+ struct inode_stat stat;
+ char name[PNAME_MAX+1];
+ pid_t pid;
+ int i;
+
+ root = get_root_inode();
+
+ for (i = 0; i < NR_PROCS + NR_TASKS; i++) {
+ /* Do we already have an inode associated with this slot? */
+ node = get_inode_by_index(root, i);
+
+ /* If the process slot is not in use, delete the associated
+ * inode if there was one, and skip this slot entirely.
+ */
+ if (!slot_in_use(i)) {
+ if (node != NULL)
+ delete_inode(node);
+
+ continue;
+ }
+
+ /* Get the process ID. */
+ if (i < NR_TASKS)
+ pid = (pid_t) (i - NR_TASKS);
+ else
+ pid = mproc[i - NR_TASKS].mp_pid;
+
+ /* If there is an old entry, see if the pid matches the current
+ * entry, and the owner is still the same. Otherwise, delete
+ * the old entry first. We reconstruct the entire subtree even
+ * if only the owner changed, for security reasons: if a
+ * process could keep open a file or directory across the owner
+ * change, it might be able to access information it shouldn't.
+ */
+ if (node != NULL) {
+ if (pid == (pid_t) get_inode_cbdata(node) &&
+ check_owner(node, i))
+ continue;
+
+ delete_inode(node);
+ }
+
+ /* Add the entry for the process slot. */
+ sprintf(name, "%d", pid);
+
+ make_stat(&stat, i, NO_INDEX);
+
+ node = add_inode(root, name, i, &stat, nr_pid_entries,
+ (cbdata_t) pid);
+
+ if (node == NULL)
+ out_of_inodes();
+ }
+}
+
+/*===========================================================================*
+ * make_one_pid_entry *
+ *===========================================================================*/
+PRIVATE void make_one_pid_entry(struct inode *parent, char *name, int slot)
+{
+ /* Construct one file in a PID directory, if a file with the given name
+ * should exist at all.
+ */
+ struct inode *node;
+ struct inode_stat stat;
+ int i;
+
+ /* Don't readd if it is already there. */
+ node = get_inode_by_name(parent, name);
+ if (node != NULL)
+ return;
+
+ /* Only add the file if it is a known, registered name. */
+ for (i = 0; pid_files[i].name != NULL; i++) {
+ if (!strcmp(name, pid_files[i].name)) {
+ make_stat(&stat, slot, i);
+
+ node = add_inode(parent, name, i, &stat,
+ (index_t) 0, (cbdata_t) 0);
+
+ if (node == NULL)
+ out_of_inodes();
+
+ break;
+ }
+ }
+}
+
+/*===========================================================================*
+ * make_all_pid_entries *
+ *===========================================================================*/
+PRIVATE void make_all_pid_entries(struct inode *parent, int slot)
+{
+ /* Construct all files in a PID directory.
+ */
+ struct inode *node;
+ struct inode_stat stat;
+ int i;
+
+ for (i = 0; pid_files[i].name != NULL; i++) {
+ node = get_inode_by_index(parent, i);
+ if (node != NULL)
+ continue;
+
+ make_stat(&stat, slot, i);
+
+ node = add_inode(parent, pid_files[i].name, i, &stat,
+ (index_t) 0, (cbdata_t) 0);
+
+ if (node == NULL)
+ out_of_inodes();
+ }
+}
+
+/*===========================================================================*
+ * construct_pid_entries *
+ *===========================================================================*/
+PRIVATE void construct_pid_entries(struct inode *parent, char *name)
+{
+ /* Construct one requested file entry, or all file entries, in a PID
+ * directory.
+ */
+ int slot;
+
+ slot = get_inode_index(parent);
+ assert(slot >= 0 && slot < NR_TASKS + NR_PROCS);
+
+ /* If this process is already gone, delete the directory now. */
+ if (!slot_in_use(slot)) {
+ delete_inode(parent);
+
+ return;
+ }
+
+ /* If a specific file name is being looked up, see if we have to add
+ * an inode for that file. If the directory contents are being
+ * retrieved, add all files that have not yet been added.
+ */
+ if (name != NULL)
+ make_one_pid_entry(parent, name, slot);
+ else
+ make_all_pid_entries(parent, slot);
+}
+
+/*===========================================================================*
+ * pid_read *
+ *===========================================================================*/
+PRIVATE void pid_read(struct inode *node)
+{
+ /* Data is requested from one of the files in a PID directory. Call the
+ * function that is responsible for generating the data for that file.
+ */
+ struct inode *parent;
+ int slot, index;
+
+ /* Get the slot number of the process. Note that this currently will
+ * not work for files not in the top-level pid subdirectory.
+ */
+ parent = get_parent_inode(node);
+
+ slot = get_inode_index(parent);
+
+ /* Get this file's index number. */
+ index = get_inode_index(node);
+
+ /* Call the handler procedure for the file. */
+ ((_PROTOTYPE(void (*), (int))) pid_files[index].data)(slot);
+}
+
+/*===========================================================================*
+ * pid_link *
+ *===========================================================================*/
+PRIVATE int pid_link(struct inode *node, char *ptr, int max)
+{
+ /* The contents of a symbolic link in a PID directory are requested.
+ * This function is a placeholder for future use.
+ */
+
+ /* Nothing yet. */
+ strcpy(ptr, "");
+
+ return OK;
+}
+
+/*===========================================================================*
+ * lookup_hook *
+ *===========================================================================*/
+PUBLIC int lookup_hook(struct inode *parent, char *name, cbdata_t cbdata)
+{
+ /* Path name resolution hook, for a specific parent and name pair.
+ * If needed, update our own view of the system first; after that,
+ * determine whether we need to (re)generate certain files.
+ */
+ static clock_t last_update = 0;
+ clock_t now;
+ int r;
+
+ /* Update lazily for lookups, as this gets too expensive otherwise.
+ * Alternative: pull in only PM's table?
+ */
+ if ((r = getuptime(&now)) != OK)
+ panic(__FILE__, "unable to get uptime", r);
+
+ if (last_update != now) {
+ update_tables();
+
+ last_update = now;
+ }
+
+ /* If the parent is the root directory, we must now reconstruct all
+ * entries, because some of them might have been garbage collected.
+ * We must update the entire tree at once; if we update individual
+ * entries, we risk name collisions.
+ */
+ if (parent == get_root_inode()) {
+ construct_pid_dirs();
+ }
+ /* If the parent is a process directory, we may need to (re)construct
+ * the entry being looked up.
+ */
+ else if (dir_is_pid(parent)) {
+ /* We might now have deleted our current containing directory;
+ * construct_pid_entries() will take care of this case.
+ */
+ construct_pid_entries(parent, name);
+ }
+
+ return OK;
+}
+
+/*===========================================================================*
+ * getdents_hook *
+ *===========================================================================*/
+PUBLIC int getdents_hook(struct inode *node, cbdata_t cbdata)
+{
+ /* Directory entry retrieval hook, for potentially all files in a
+ * directory. Make sure that all files that are supposed to be
+ * returned, are actually part of the virtual tree.
+ */
+
+ if (node == get_root_inode()) {
+ update_tables();
+
+ construct_pid_dirs();
+ } else if (dir_is_pid(node)) {
+ construct_pid_entries(node, NULL /*name*/);
+ }
+
+ return OK;
+}
+
+/*===========================================================================*
+ * read_hook *
+ *===========================================================================*/
+PUBLIC int read_hook(struct inode *node, off_t off, char **ptr,
+ size_t *len, cbdata_t cbdata)
+{
+ /* Regular file read hook. Call the appropriate callback function to
+ * generate and return the data.
+ */
+
+ buf_init(off, *len);
+
+ /* Populate the buffer with the proper content. */
+ if (get_inode_index(node) != NO_INDEX) {
+ pid_read(node);
+ } else {
+ ((_PROTOTYPE(void (*), (void))) cbdata)();
+ }
+
+ *len = buf_get(ptr);
+
+ return OK;
+}
+
+/*===========================================================================*
+ * rdlink_hook *
+ *===========================================================================*/
+PUBLIC int rdlink_hook(struct inode *node, char *ptr, size_t max,
+ cbdata_t cbdata)
+{
+ /* Symbolic link resolution hook. Not used yet.
+ */
+ struct inode *parent;
+
+ /* Get the parent inode. */
+ parent = get_parent_inode(node);
+
+ /* If the parent inode is a pid directory, call the pid handler.
+ */
+ if (parent != NULL && dir_is_pid(parent))
+ pid_link(node, ptr, max);
+
+ return OK;
+}
--- /dev/null
+#ifndef _PROCFS_TYPE_H
+#define _PROCFS_TYPE_H
+
+typedef void *data_t; /* abstract data type; can hold pointer */
+
+/* ProcFS supports two groups of files: dynamic files, which are created within
+ * process-specific (PID) directories, and static files, which are global. For
+ * both, the following structure is used to construct the files.
+ *
+ * For dynamic files, the rules are simple: only regular files are supported
+ * (although partial support for symbolic links is already present), and the
+ * 'data' field must be filled with a pointer to a function of the type:
+ *
+ * void (*)(int slot)
+ *
+ * The function will be called whenever a read request for the file is made;
+ * 'slot' contains the kernel slot number of the process being queried (so for
+ * the PM and VFS process tables, NR_TASKS has to be subtracted from the slot
+ * number to find the right slot). The function is expected to produce
+ * appropriate output using the buf_printf() function.
+ *
+ * For static files, regular files and directories are supported. For
+ * directories, the 'data' field must be a pointer to another 'struct file'
+ * array that specifies the contents of the directory - this directory will
+ * the be created recursively. For regular files, the 'data' field must point
+ * to a function of the type:
+ *
+ * void (*)(void)
+ *
+ * Here too, the function will be called upon a read request, and it is
+ * supposed to "fill" the file using buf_printf(). Obviously, for static files,
+ * there is no slot number.
+ *
+ * For both static and dynamic files, 'mode' must specify the file type as well
+ * as the access mode, and in both cases, each array is terminated with an
+ * entry that has its name set to NULL.
+ */
+/* The internal link between static/dynamic files/directories and VTreeFS'
+ * indexes and cbdata values is as follows:
+ * - Dynamic directories are always PID directories in the root directory.
+ * They are generated automatically, and are not specified using a "struct
+ * file" structure. Their index is their slot number, so that getdents()
+ * calls always return any PID at most once. Their cbdata value is the PID of
+ * the process associated with that dynamic directory, for the purpose of
+ * comparing old and new PIDs after updating process tables (without having
+ * to atoi() the directory's name).
+ * - Dynamic files are always in such a dynamic directory. Their index is the
+ * array index into the "struct file" array of pid files (pid_files[]). They
+ * are indexed at all, because they may be deleted at any time due to inode
+ * shortages, independently of other dynamic files in the same directory, and
+ * recreating them without index would again risk possibly inconsistent
+ * getdents() results, where for example the same file shows up twice.
+ * VTreeFS currently does not distinguish between indexed and delatable files
+ * and hence, all dynamic files must be indexed so as to be deletable anyway.
+ * - Static directories have no index (they are not and must not be deletable),
+ * and although their cbdata is their associated 'data' field from their
+ * "struct file" entries, their cbdata value is currently not relied on
+ * anywhere. Then again, as of writing, there are no static directories at
+ * all.
+ * - Static files have no index either (for the same reason). Their cbdata is
+ * also their 'data' field from the "struct file" entry creating the file,
+ * and this is used to actually call the callback function directly.
+ */
+struct file {
+ char *name; /* file name, maximum length PNAME_MAX */
+ mode_t mode; /* file mode, including file type */
+ data_t data; /* custom data associated with this file */
+};
+
+#endif /* _PROCFS_TYPE_H */
--- /dev/null
+/* ProcFS - util.c - by Alen Stojanov and David van Moolenbroek */
+
+#include "inc.h"
+
+/*===========================================================================*
+ * procfs_getloadavg *
+ *===========================================================================*/
+PUBLIC int procfs_getloadavg(double *loadavg, int nelem)
+{
+ /* Retrieve system load average information.
+ */
+ struct loadinfo loadinfo;
+ u32_t system_hz, ticks_per_slot;
+ int p, unfilled_ticks;
+ int minutes[3] = { 1, 5, 15 };
+ ssize_t l;
+
+ if(nelem < 1) {
+ errno = ENOSPC;
+ return -1;
+ }
+
+ system_hz = sys_hz();
+
+ if((l=sys_getloadinfo(&loadinfo)) != OK)
+ return -1;
+ if(nelem > 3)
+ nelem = 3;
+
+ /* How many ticks are missing from the newest-filled slot? */
+ ticks_per_slot = _LOAD_UNIT_SECS * system_hz;
+ unfilled_ticks =
+ ticks_per_slot - (loadinfo.last_clock % ticks_per_slot);
+
+ for(p = 0; p < nelem; p++) {
+ int h, slots;
+ double l = 0.0;
+ int latest = loadinfo.proc_last_slot;
+ slots = minutes[p] * 60 / _LOAD_UNIT_SECS;
+
+ /* Add up the total number of process ticks for this number
+ * of minutes (minutes[p]). Start with the newest slot, which
+ * is latest, and count back for the number of slots that
+ * correspond to the right number of minutes. Take wraparound
+ * into account by calculating the index modulo _LOAD_HISTORY,
+ * which is the number of slots of history kept.
+ */
+ for(h = 0; h < slots; h++) {
+ int slot;
+ slot = (latest - h + _LOAD_HISTORY) % _LOAD_HISTORY;
+ l += (double) loadinfo.proc_load_history[slot];
+ }
+
+ /* The load average over this number of minutes is the number
+ * of process-ticks divided by the number of ticks, not
+ * counting the number of ticks the last slot hasn't been
+ * around yet.
+ */
+ loadavg[p] = l / (slots * ticks_per_slot - unfilled_ticks);
+ }
+
+ return nelem;
+}