From: David van Moolenbroek Date: Tue, 14 Sep 2010 21:25:25 +0000 (+0000) Subject: ProcFS server, by Alen Stojanov and David van Moolenbroek X-Git-Tag: v3.2.0~879 X-Git-Url: http://zhaoyanbai.com/repos/icons/debian/static/datamaps.china.min.js?a=commitdiff_plain;h=2c5c5c06ea54bdb072d520e6b3aa5b0ef284ee2e;p=minix.git ProcFS server, by Alen Stojanov and David van Moolenbroek --- diff --git a/etc/system.conf b/etc/system.conf index bfa1638b2..0c28b5965 100644 --- a/etc/system.conf +++ b/etc/system.conf @@ -429,6 +429,17 @@ service bios_wini ; }; +service procfs +{ + system + VIRCOPY # 15 + ; + vm + INFO + ; + uid 0; +}; + service isofs { system diff --git a/include/Makefile b/include/Makefile index 5e10829f1..7756bf513 100644 --- a/include/Makefile +++ b/include/Makefile @@ -21,7 +21,8 @@ INCS+= minix/a.out.h minix/bitmap.h minix/callnr.h minix/cdrom.h \ minix/fslib.h minix/ioctl.h minix/ipc.h minix/ipcconst.h \ minix/keymap.h minix/minlib.h minix/mq.h \ minix/netdriver.h minix/partition.h minix/paths.h \ - minix/portio.h minix/priv.h minix/profile.h minix/queryparam.h \ + minix/portio.h minix/priv.h minix/procfs.h minix/profile.h \ + minix/queryparam.h \ minix/rs.h minix/safecopies.h minix/sched.h minix/sef.h minix/sound.h \ minix/spin.h minix/sys_config.h minix/sysinfo.h minix/syslib.h \ minix/sysutil.h minix/timers.h minix/tty.h minix/type.h minix/types.h \ diff --git a/include/minix/procfs.h b/include/minix/procfs.h new file mode 100644 index 000000000..fec097b61 --- /dev/null +++ b/include/minix/procfs.h @@ -0,0 +1,42 @@ +#ifndef _MINIX_PROCFS_H +#define _MINIX_PROCFS_H + +/* The compatibility model is as follows. The current format should be retained + * for as long as possible; new fields can be added at the end of the line, + * because ps/top only read as much as they know of from the start of the line. + * Once fields (really) have to be removed, or the whole line becomes too big + * of a mess, a completely new format string can be put in, but with an + * increased PSINFO_VERSION at the beginning. That way, older ps/top copies + * will not misinterpret the new fields, but rather fail cleanly. + */ +#define PSINFO_VERSION 0 + +/* Process types. */ +#define TYPE_TASK 'T' +#define TYPE_SYSTEM 'S' +#define TYPE_USER 'U' + +/* General process states. */ +#define STATE_SLEEP 'S' +#define STATE_WAIT 'W' +#define STATE_ZOMBIE 'Z' +#define STATE_RUN 'R' +#define STATE_STOP 'T' + +/* PM sleep states. */ +#define PSTATE_NONE '-' +#define PSTATE_PAUSED 'P' +#define PSTATE_WAITING 'W' +#define PSTATE_SIGSUSP 'S' + +/* VFS block states. */ +#define FSTATE_NONE '-' +#define FSTATE_PIPE 'P' +#define FSTATE_LOCK 'L' +#define FSTATE_POPEN 'O' +#define FSTATE_SELECT 'S' +#define FSTATE_DOPEN 'D' +#define FSTATE_TASK 'T' +#define FSTATE_UNKNOWN '?' + +#endif /* _MINIX_PROCFS_H */ diff --git a/servers/Makefile b/servers/Makefile index a1faf82b5..72fac3b8a 100644 --- a/servers/Makefile +++ b/servers/Makefile @@ -3,7 +3,8 @@ .include -SUBDIR= ds ext2 hgfs inet init ipc is iso9660fs mfs pfs pm rs sched vfs vm +SUBDIR= ds ext2 hgfs inet init ipc is iso9660fs \ + mfs pfs pm procfs rs sched vfs vm IMAGE_SUBDIR= ds init mfs pfs pm rs sched vfs vm diff --git a/servers/procfs/Makefile b/servers/procfs/Makefile new file mode 100644 index 000000000..38ae2ab06 --- /dev/null +++ b/servers/procfs/Makefile @@ -0,0 +1,14 @@ +# Makefile for ProcFS server +PROG= procfs +SRCS= buf.c main.c pid.c root.c tree.c util.c + +CPPFLAGS+= -I${MINIXSRCDIR} -I${MINIXSRCDIR}/servers + +DPADD+= ${LIBVTREEFS} ${LIBSYS} +LDADD+= -lvtreefs -lsys + +MAN= + +BINDIR?= /sbin + +.include diff --git a/servers/procfs/NOTES b/servers/procfs/NOTES new file mode 100644 index 000000000..a14f08491 --- /dev/null +++ b/servers/procfs/NOTES @@ -0,0 +1,68 @@ +Development notes regarding ProcFS. Original document by David van Moolenbroek. + + +SECURITY MODEL + +Right now, procfs is not able to deal with security-sensitive information, +because there would be too many opportunities for rogue processes to obtain +values they shouldn't be able to get to. This is mainly due to the fact that +while procfs is running, the environment around it may change arbitrarily: for +example, a /proc//mem file could offer access to a process's core memory, +but if a rogue process opened that file right before the victim process invokes +an exec() on a setuid binary, the rogue process could read from the victim +process's memory while a victim user provides this process with their password. +This is only one example out of many; such time-to-check/time-to-use race +conditions are inherent to the inherently race-prone situation that procfs +finds itself in, trying to provide information about an asynchronously running +system. + +A little more specifically, this problem mainly comes up when system calls are +made to obtain information (long) after a certain PID directory has been +updated, which typically happens right after pulling in a new copy of the +process tables of the kernel, PM, and VFS. Returning stale information from +those tables is usually not a problem: at worst, the caller gets outdated +information about the system as it once was, after passing a security check for +that point in time. Hence, it can not obtain information it never had access +to. Using information from those tables to perform calls later, however, is +a different case. In the "mem" example above, procfs would have the old user ID +in its copy of the process tables, and yet perform on-demand sys_datacopy calls +(or something similar) to retrieve memory from the process, bypassing a check +on the then-current user ID. A similar situation already exists right now for +the /proc//map file for example, which pulls in information on demand - +but it provides only public information anyway, just like the other files that +procfs currently exposes. + +A proper solution to this problem has simply not been implemented yet. It is +possible to change the system in such a way that procfs check whether the +target process is still in the same security state before returning information +to the caller process. This can be done either while or after obtaining the +information, depending on what is most convenient for the design of the system. +Any such solution obviously has an impact on system design and procfs' +performance, and was found not worth implementing for the first version of +procfs, since all offered information was public anyway. However, such a change +*must* be made before procfs can expose anything that provides a potential for +security breaches. + +Finally, it must be kept in mind that even updating the process tables from +various other sources is not an atomic operation. There might be mismatches +between the tables. Procfs must be able to handle such occurrences with care, +from both a security perspective and a general functionality perspective. + + +FUTURE EXPANSIONS + +It would be trivial to add a /proc/self symlink pointing to the caller's PID +directory, if the VFS-FS protocol's REQ_RDLINK request were augmented to +include the caller's PID or endpoint. However, this would be a procfs-specific +protocol change, and there does not seem to be a need for this just yet. + +Even more custom protocol changes or procfs-specific backcalls would have to be +added to expose processes' current working directory, root directory, +executable path, or open files. A number of VFS parts would have to be changed +significantly to fully support all of these, possibly including an entire DNLC. + +All the necessary infrastructure is there to add static (sub)directories - for +example, a /proc/net/ directory. It would be more tricky to add subdirectories +for dynamic (process) directories, for example /proc//fd/. This would +require some changes to the VTreeFS side of the tree management. Some of the +current assumptions are documented in type.h. diff --git a/servers/procfs/buf.c b/servers/procfs/buf.c new file mode 100644 index 000000000..8f947db58 --- /dev/null +++ b/servers/procfs/buf.c @@ -0,0 +1,128 @@ +/* ProcFS - buf.c - by Alen Stojanov and David van Moolenbroek */ + +#include "inc.h" +#include + +#define BUF_SIZE 4096 + +PRIVATE char buf[BUF_SIZE + 1]; +PRIVATE size_t off, left, used; +PRIVATE off_t skip; + +/*===========================================================================* + * buf_init * + *===========================================================================*/ +PUBLIC void buf_init(off_t start, size_t len) +{ + /* Initialize the buffer for fresh use. The first 'start' bytes of the + * produced output are to be skipped. After that, up to a total of + * 'len' bytes are requested. + */ + + skip = start; + left = MIN(len, BUF_SIZE); + off = 0; + used = 0; +} + +/*===========================================================================* + * buf_printf * + *===========================================================================*/ +PUBLIC void buf_printf(char *fmt, ...) +{ + /* Add formatted text to the end of the buffer. + */ + va_list args; + ssize_t len, max; + + if (left == 0) + return; + + /* There is no way to estimate how much space the result will take, so + * we need to produce the string even when skipping part of the start. + * If part of the result is to be skipped, do not memcpy; instead, save + * the offset of where the result starts within the buffer. + * + * The null terminating character is not part of the result, so room + * must be given for it to be stored after completely filling up the + * requested part of the buffer. + */ + max = MIN(skip + left, BUF_SIZE); + + va_start(args, fmt); + len = vsnprintf(&buf[off + used], max + 1, fmt, args); + va_end(args); + + if (skip > 0) { + assert(off == 0); + assert(used == 0); + + if (skip >= len) { + skip -= len; + + return; + } + + off = skip; + if (left > BUF_SIZE - off) + left = BUF_SIZE - off; + len -= off; + skip = 0; + } + + assert(skip == 0); + assert(len >= 0); + assert((long) left >= 0); + + if (len > (ssize_t) left) + len = left; + + used += len; + left -= len; +} + +/*===========================================================================* + * buf_append * + *===========================================================================*/ +PUBLIC void buf_append(char *data, size_t len) +{ + /* Add arbitrary data to the end of the buffer. + */ + + if (left == 0) + return; + + if (skip > 0) { + if (skip >= (ssize_t) len) { + skip -= len; + + return; + } + + data += skip; + len -= skip; + skip = 0; + } + + if (len > left) + len = left; + + memcpy(&buf[off + used], data, len); + + used += len; + left -= len; +} + +/*===========================================================================* + * buf_get * + *===========================================================================*/ +PUBLIC size_t buf_get(char **ptr) +{ + /* Return the buffer's starting address and the length of the used + * part, not counting the trailing null character for the latter. + */ + + *ptr = &buf[off]; + + return used; +} diff --git a/servers/procfs/const.h b/servers/procfs/const.h new file mode 100644 index 000000000..18ad75dc3 --- /dev/null +++ b/servers/procfs/const.h @@ -0,0 +1,33 @@ +#ifndef _PROCFS_CONST_H +#define _PROCFS_CONST_H + +/* The minimum number of inodes depends on a number of factors: + * - Each statically created inode (e.g., /proc/hz) needs an inode. As of + * writing, this requires about a dozen inodes. + * - Deleted inodes that are still in use by VFS must be retained. For deleted + * directories, all their containing directories up to the root must be + * retained as well (to allow the user to "cd .." out). VTreeFS already takes + * care of this. In the case of ProcFS, only PID-based directories can be + * deleted; no other directories are dynamically created. These directories + * currently do not contain subdirectories, either. Hence, for deleted open + * inodes, we need to reserve at most NR_VNODES inodes in the worst case. + * - In order for getdents to be able to return all PID-based directories, + * inodes must not be recycled while generating the list of these PID-based + * directories. In the worst case, this means (NR_TASKS + NR_PROCS) extra + * inodes. + * The sum of these is the bare minimum for correct operation in all possible + * circumstances. In practice, not all open files will be deleted files in + * ProcFS, and not all process slots will be in use either, so the average use + * will be a lot less. However, setting the value too low allows for a + * potential denial-of-service attack by a non-root user. + * + * For the moment, we simply set this value to something reasonable. + */ +#define NR_INODES ((NR_TASKS + NR_PROCS) * 4) + +/* Various file modes. */ +#define REG_ALL_MODE (S_IFREG | 0444) /* world-readable regular */ +#define DIR_ALL_MODE (S_IFDIR | 0555) /* world-accessible directory */ +#define LNK_ALL_MODE (S_IFLNK | 0777) /* symbolic link */ + +#endif /* _PROCFS_CONST_H */ diff --git a/servers/procfs/glo.h b/servers/procfs/glo.h new file mode 100644 index 000000000..c70890a67 --- /dev/null +++ b/servers/procfs/glo.h @@ -0,0 +1,15 @@ +#ifndef _PROCFS_GLO_H +#define _PROCFS_GLO_H + +/* pid.c */ +extern struct file pid_files[]; + +/* root.c */ +extern struct file root_files[]; + +/* tree.c */ +extern struct proc proc[NR_PROCS + NR_TASKS]; /* process table from kernel */ +extern struct mproc mproc[NR_PROCS]; /* process table from PM */ +extern struct fproc fproc[NR_PROCS]; /* process table from VFS */ + +#endif /* _PROCFS_GLO_H */ diff --git a/servers/procfs/inc.h b/servers/procfs/inc.h new file mode 100644 index 000000000..4773f5889 --- /dev/null +++ b/servers/procfs/inc.h @@ -0,0 +1,64 @@ +#ifndef _PROCFS_INC_H +#define _PROCFS_INC_H + +#define _POSIX_SOURCE 1 +#define _MINIX 1 +#define _SYSTEM 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include "kernel/const.h" +#include "kernel/type.h" +#include "kernel/proc.h" +#include "pm/mproc.h" +#include "vfs/const.h" +#include "vfs/fproc.h" + +#include +#include + +#include "const.h" +#include "type.h" +#include "proto.h" +#include "glo.h" + +#endif /* _PROCFS_INC_H */ diff --git a/servers/procfs/main.c b/servers/procfs/main.c new file mode 100644 index 000000000..152acb212 --- /dev/null +++ b/servers/procfs/main.c @@ -0,0 +1,90 @@ +/* ProcFS - main.c - by Alen Stojanov and David van Moolenbroek */ + +#include "inc.h" + +FORWARD _PROTOTYPE( void init_hook, (void) ); + +/* The hook functions that will be called by VTreeFS. */ +PRIVATE struct fs_hooks hooks = { + init_hook, + NULL, /* cleanup_hook */ + lookup_hook, + getdents_hook, + read_hook, + rdlink_hook, + NULL /* message_hook */ +}; + +/*===========================================================================* + * construct_tree * + *===========================================================================*/ +PRIVATE void construct_tree(struct inode *dir, struct file *files) +{ + /* Construct a tree of static files from a null-terminated array of + * file structures, recursively creating directories which have their + * associated data point to child file structures. + */ + struct file *file; + struct inode *node; + struct inode_stat stat; + + stat.uid = SUPER_USER; + stat.gid = SUPER_USER; + stat.size = 0; + stat.dev = NO_DEV; + + for (file = files; file->name != NULL; file++) { + stat.mode = file->mode; + + node = add_inode(dir, file->name, NO_INDEX, &stat, (index_t) 0, + (cbdata_t) file->data); + + assert(node != NULL); + + if (S_ISDIR(file->mode)) + construct_tree(node, (struct file *) file->data); + } +} + +/*===========================================================================* + * init_hook * + *===========================================================================*/ +PRIVATE void init_hook(void) +{ + /* Initialization hook. Generate the static part of the tree. + */ + struct inode *root; + + root = get_root_inode(); + + construct_tree(root, root_files); +} + +/*===========================================================================* + * main * + *===========================================================================*/ +PUBLIC int main(int argc, char *argv[]) +{ + /* ProcFS entry point. + */ + struct inode_stat stat; + int r; + + /* Initialize some state. If we are incompatible with the kernel, exit + * immediately. + */ + if ((r = init_tree()) != OK) + return r; + + /* Properties of the root directory. */ + stat.mode = DIR_ALL_MODE; + stat.uid = SUPER_USER; + stat.gid = SUPER_USER; + stat.size = 0; + stat.dev = NO_DEV; + + /* Start VTreeFS. This call does not return. */ + start_vtreefs(&hooks, NR_INODES, &stat, NR_PROCS + NR_TASKS); + + return 0; +} diff --git a/servers/procfs/pid.c b/servers/procfs/pid.c new file mode 100644 index 000000000..88a9868aa --- /dev/null +++ b/servers/procfs/pid.c @@ -0,0 +1,417 @@ +/* ProcFS - pid.c - by Alen Stojanov and David van Moolenbroek */ + +#include "inc.h" + +#include +#include + +#define S_FRAME_SIZE 4096 /* use malloc if larger than this */ +PRIVATE char s_frame[S_FRAME_SIZE]; /* static storage for process frame */ +PRIVATE char *frame; /* pointer to process frame buffer */ + +FORWARD _PROTOTYPE( void pid_psinfo, (int slot) ); +FORWARD _PROTOTYPE( void pid_cmdline, (int slot) ); +FORWARD _PROTOTYPE( void pid_environ, (int slot) ); +FORWARD _PROTOTYPE( void pid_map, (int slot) ); + +/* The files that are dynamically created in each PID directory. The data field + * contains each file's read function. Subdirectories are not yet supported. + */ +PUBLIC struct file pid_files[] = { + { "psinfo", REG_ALL_MODE, (data_t) pid_psinfo }, + { "cmdline", REG_ALL_MODE, (data_t) pid_cmdline }, + { "environ", REG_ALL_MODE, (data_t) pid_environ }, + { "map", REG_ALL_MODE, (data_t) pid_map }, + { NULL, 0, (data_t) NULL } +}; + +/*===========================================================================* + * is_zombie * + *===========================================================================*/ +PRIVATE int is_zombie(int slot) +{ + /* Is the given slot a zombie process? + */ + + return (slot >= NR_TASKS && + (mproc[slot - NR_TASKS].mp_flags & (TRACE_ZOMBIE | ZOMBIE))); +} + +/*===========================================================================* + * pid_psinfo * + *===========================================================================*/ +PRIVATE void pid_psinfo(int i) +{ + /* Print information used by ps(1) and top(1). + */ + int pi, task, state, type, p_state, f_state; + char name[PROC_NAME_LEN+1], *p; + struct vm_usage_info vui; + pid_t ppid; + + pi = i - NR_TASKS; + task = proc[i].p_nr < 0; + + /* Get the name of the process. Spaces would mess up the format.. */ + if (task || mproc[i].mp_name[0] == 0) + strncpy(name, proc[i].p_name, sizeof(name) - 1); + else + strncpy(name, mproc[pi].mp_name, sizeof(name) - 1); + name[sizeof(name) - 1] = 0; + if ((p = strchr(name, ' ')) != NULL) + p[0] = 0; + + /* Get the type of the process. */ + if (task) + type = TYPE_TASK; + else if (mproc[i].mp_flags & PRIV_PROC) + type = TYPE_SYSTEM; + else + type = TYPE_USER; + + /* Get the state of the process. */ + if (!task) { + if (is_zombie(i)) + state = STATE_ZOMBIE; /* zombie */ + else if (mproc[pi].mp_flags & STOPPED) + state = STATE_STOP; /* stopped (traced) */ + else if (proc[i].p_rts_flags == 0) + state = STATE_RUN; /* in run-queue */ + else if (fp_is_blocked(&fproc[pi]) || + (mproc[pi].mp_flags & (WAITING | PAUSED | SIGSUSPENDED))) + state = STATE_SLEEP; /* sleeping */ + else + state = STATE_WAIT; /* waiting */ + } else { + if (proc[i].p_rts_flags == 0) + state = STATE_RUN; /* in run-queue */ + else + state = STATE_WAIT; /* other i.e. waiting */ + } + + /* We assume that even if a process has become a zombie, its kernel + * proc entry still contains the old (but valid) information. Currently + * this is true, but in the future we may have to filter some fields. + */ + buf_printf("%d %c %d %s %c %d %d %lu %lu %lu %lu", + PSINFO_VERSION, /* information version */ + type, /* process type */ + (int) proc[i].p_endpoint, /* process endpoint */ + name, /* process name */ + state, /* process state letter */ + (int) P_BLOCKEDON(&proc[i]), /* endpt blocked on, or NONE */ + (int) proc[i].p_priority, /* process priority */ + (long) proc[i].p_user_time, /* user time */ + (long) proc[i].p_sys_time, /* system time */ + ex64hi(proc[i].p_cycles), /* execution cycles */ + ex64lo(proc[i].p_cycles) + ); + + /* If the process is not a kernel task, we add some extra info. */ + if (!task) { + memset(&vui, 0, sizeof(vui)); + + if (!is_zombie(i)) { + /* We don't care if this fails. It may still return + * zero memory usage for processes that don't have a + * pagetable, though. Look at vui_total instead. + */ + (void) vm_info_usage(proc[i].p_endpoint, &vui); + + if (vui.vui_total == 0L) { + vui.vui_total = + (proc[i].p_memmap[T].mem_len + + proc[i].p_memmap[D].mem_len) << + CLICK_SHIFT; + } + } + + if (mproc[pi].mp_flags & PAUSED) + p_state = PSTATE_PAUSED; + else if (mproc[pi].mp_flags & WAITING) + p_state = PSTATE_WAITING; + else if (mproc[pi].mp_flags & SIGSUSPENDED) + p_state = PSTATE_SIGSUSP; + else + p_state = '-'; + + if (mproc[pi].mp_parent == pi) + ppid = NO_PID; + else + ppid = mproc[mproc[pi].mp_parent].mp_pid; + + switch (fproc[pi].fp_blocked_on) { + case FP_BLOCKED_ON_NONE: f_state = FSTATE_NONE; break; + case FP_BLOCKED_ON_PIPE: f_state = FSTATE_PIPE; break; + case FP_BLOCKED_ON_LOCK: f_state = FSTATE_LOCK; break; + case FP_BLOCKED_ON_POPEN: f_state = FSTATE_POPEN; break; + case FP_BLOCKED_ON_SELECT: f_state = FSTATE_SELECT; break; + case FP_BLOCKED_ON_DOPEN: f_state = FSTATE_DOPEN; break; + case FP_BLOCKED_ON_OTHER: f_state = FSTATE_TASK; break; + default: f_state = FSTATE_UNKNOWN; + } + + buf_printf(" %lu %lu %lu %c %d %u %u %u %d %c %d %u", + vui.vui_total, /* total memory */ + vui.vui_common, /* common memory */ + vui.vui_shared, /* shared memory */ + p_state, /* sleep state */ + ppid, /* parent PID */ + mproc[pi].mp_realuid, /* real UID */ + mproc[pi].mp_effuid, /* effective UID */ + mproc[pi].mp_procgrp, /* process group */ + mproc[pi].mp_nice, /* nice value */ + f_state, /* VFS block state */ + (int) (fproc[pi].fp_blocked_on == FP_BLOCKED_ON_OTHER) + ? fproc[pi].fp_task : NONE, /* block proc */ + fproc[pi].fp_tty /* controlling tty */ + ); + } + + /* Newline at the end of the file. */ + buf_printf("\n"); +} + +/*===========================================================================* + * put_frame * + *===========================================================================*/ +PRIVATE void put_frame(void) +{ + /* If we allocated memory dynamically during a call to get_frame(), + * free it up here. + */ + + if (frame != s_frame) + free(frame); +} + +/*===========================================================================* + * get_frame * + *===========================================================================*/ +PRIVATE int get_frame(int slot, vir_bytes *basep, vir_bytes *sizep, + size_t *nargsp) +{ + /* Get the execution frame from the top of the given process's stack. + * It may be very large, in which case we temporarily allocate memory + * for it (up to a certain size). + */ + vir_bytes base, size; + size_t nargs; + + if (proc[slot].p_nr < 0 || is_zombie(slot)) + return FALSE; + + /* Get the frame base address and size. Limit the size to whatever we + * can handle. If our static buffer is not sufficiently large to store + * the entire frame, allocate memory dynamically. It is then later + * freed by put_frame(). + */ + base = mproc[slot - NR_TASKS].mp_frame_addr; + size = mproc[slot - NR_TASKS].mp_frame_len; + + if (size < sizeof(size_t)) return FALSE; + + if (size > ARG_MAX) size = ARG_MAX; + + if (size > sizeof(s_frame)) { + frame = malloc(size); + + if (frame == NULL) + return FALSE; + } + else frame = s_frame; + + /* Copy in the complete process frame. */ + if (sys_datacopy(proc[slot].p_endpoint, base, + SELF, (vir_bytes) frame, (phys_bytes) size) != OK) { + put_frame(); + + return FALSE; + } + + frame[size] = 0; /* terminate any last string */ + + nargs = * (size_t *) frame; + if (nargs < 1 || sizeof(size_t) + sizeof(char *) * (nargs + 1) > size) { + put_frame(); + + return FALSE; + } + + *basep = base; + *sizep = size; + *nargsp = nargs; + + /* The caller now has to called put_frame() to clean up. */ + return TRUE; +} + +/*===========================================================================* + * pid_cmdline * + *===========================================================================*/ +PRIVATE void pid_cmdline(int slot) +{ + /* Dump the process's command line as it is contained in the process + * itself. Each argument is terminated with a null character. + */ + vir_bytes base, size, ptr; + size_t i, len, nargs; + char **argv; + + if (!get_frame(slot, &base, &size, &nargs)) + return; + + argv = (char **) &frame[sizeof(size_t)]; + + for (i = 0; i < nargs; i++) { + ptr = (vir_bytes) argv[i] - base; + + /* Check for bad pointers. */ + if ((long) ptr < 0L || ptr >= size) + break; + + len = strlen(&frame[ptr]) + 1; + + buf_append(&frame[ptr], len); + } + + put_frame(); +} + +/*===========================================================================* + * pid_environ * + *===========================================================================*/ +PRIVATE void pid_environ(int slot) +{ + /* Dump the process's initial environment as it is contained in the + * process itself. Each entry is terminated with a null character. + */ + vir_bytes base, size, ptr; + size_t nargs, off, len; + char **envp; + + if (!get_frame(slot, &base, &size, &nargs)) + return; + + off = sizeof(size_t) + sizeof(char *) * (nargs + 1); + envp = (char **) &frame[off]; + + for (;;) { + /* Make sure there is no buffer overrun. */ + if (off + sizeof(char *) > size) + break; + + ptr = (vir_bytes) *envp; + + /* Stop at the terminating NULL pointer. */ + if (ptr == 0L) + break; + + ptr -= base; + + /* Check for bad pointers. */ + if ((long) ptr < 0L || ptr >= size) + break; + + len = strlen(&frame[ptr]) + 1; + + buf_append(&frame[ptr], len); + + off += sizeof(char *); + envp++; + } + + put_frame(); +} + +/*===========================================================================* + * dump_regions * + *===========================================================================*/ +PRIVATE int dump_regions(int slot) +{ + /* Print the virtual memory regions of a process. + */ + struct vm_region_info vri[MAX_VRI_COUNT]; + vir_bytes next; + int i, r, seg, count; + + count = 0; + next = 0; + + do { + r = vm_info_region(proc[slot].p_endpoint, vri, MAX_VRI_COUNT, + &next); + + if (r < 0) + return r; + + if (r == 0) + break; + + for (i = 0; i < r; i++) { + switch (vri[i].vri_seg) { + case T: seg = 'T'; break; + case D: seg = 'D'; break; + default: seg = '?'; break; + } + + buf_printf("%c %08lx-%08lx %c%c%c %c\n", + seg, vri[i].vri_addr, + vri[i].vri_addr + vri[i].vri_length, + (vri[i].vri_prot & PROT_READ) ? 'r' : '-', + (vri[i].vri_prot & PROT_WRITE) ? 'w' : '-', + (vri[i].vri_prot & PROT_EXEC) ? 'x' : '-', + (vri[i].vri_flags & MAP_SHARED) ? 's' : 'p'); + + count++; + } + } while (r == MAX_VRI_COUNT); + + return count; +} + +/*===========================================================================* + * dump_segments * + *===========================================================================*/ +PRIVATE void dump_segments(int slot) +{ + /* Print the memory segments of a process. + */ + int i; + + for (i = 0; i < NR_LOCAL_SEGS; i++) { + buf_printf("%c %08lx-%08lx %s -\n", + i == T ? 'T' : 'D', + proc[slot].p_memmap[i].mem_vir << CLICK_SHIFT, + (proc[slot].p_memmap[i].mem_vir + + proc[slot].p_memmap[i].mem_len) << CLICK_SHIFT, + (i == T) ? "r-x" : + (proc[slot].p_memmap[T].mem_len == 0) ? "rwx" : "rw-"); + } +} + +/*===========================================================================* + * pid_map * + *===========================================================================*/ +PRIVATE void pid_map(int slot) +{ + /* Print a memory map of the process. Obtain the information from VM if + * possible; otherwise fall back on segments from the kernel. + */ + + /* Zombies have no memory. */ + if (is_zombie(slot)) + return; + + /* Kernel tasks also have no memory. */ + if (proc[slot].p_nr >= 0) { + if (dump_regions(slot) != 0) + return; + } + + /* For kernel tasks, or for processes that have no regions according to + * VM, we assume they are not using virtual memory, and we print their + * segments instead. + */ + dump_segments(slot); +} diff --git a/servers/procfs/proto.h b/servers/procfs/proto.h new file mode 100644 index 000000000..01b62d24f --- /dev/null +++ b/servers/procfs/proto.h @@ -0,0 +1,23 @@ +#ifndef _PROCFS_PROTO_H +#define _PROCFS_PROTO_H + +/* buf.c */ +_PROTOTYPE( void buf_init, (off_t start, size_t len) ); +_PROTOTYPE( void buf_printf, (char *fmt, ...) ); +_PROTOTYPE( void buf_append, (char *data, size_t len) ); +_PROTOTYPE( size_t buf_get, (char **ptr) ); + +/* tree.c */ +_PROTOTYPE( int init_tree, (void) ); +_PROTOTYPE( int lookup_hook, (struct inode *parent, char *name, + cbdata_t cbdata) ); +_PROTOTYPE( int getdents_hook, (struct inode *inode, cbdata_t cbdata) ); +_PROTOTYPE( int read_hook, (struct inode *inode, off_t offset, + char **ptr, size_t *len, cbdata_t cbdata) ); +_PROTOTYPE( int rdlink_hook, (struct inode *inode, char *ptr, + size_t max, cbdata_t cbdata) ); + +/* util.c */ +_PROTOTYPE( int procfs_getloadavg, (double *loadavg, int nelem) ); + +#endif /* _PROCFS_PROTO_H */ diff --git a/servers/procfs/root.c b/servers/procfs/root.c new file mode 100644 index 000000000..d57bc0a55 --- /dev/null +++ b/servers/procfs/root.c @@ -0,0 +1,131 @@ +/* ProcFS - root.c - by Alen Stojanov and David van Moolenbroek */ + +#include "inc.h" +#include + +FORWARD _PROTOTYPE( void root_hz, (void) ); +FORWARD _PROTOTYPE( void root_uptime, (void) ); +FORWARD _PROTOTYPE( void root_loadavg, (void) ); +FORWARD _PROTOTYPE( void root_kinfo, (void) ); +FORWARD _PROTOTYPE( void root_meminfo, (void) ); +FORWARD _PROTOTYPE( void root_pci, (void) ); + +struct file root_files[] = { + { "hz", REG_ALL_MODE, (data_t) root_hz }, + { "uptime", REG_ALL_MODE, (data_t) root_uptime }, + { "loadavg", REG_ALL_MODE, (data_t) root_loadavg }, + { "kinfo", REG_ALL_MODE, (data_t) root_kinfo }, + { "meminfo", REG_ALL_MODE, (data_t) root_meminfo }, + { "pci", REG_ALL_MODE, (data_t) root_pci }, + { NULL, 0, NULL } +}; + +/*===========================================================================* + * root_hz * + *===========================================================================*/ +PRIVATE void root_hz(void) +{ + /* Print the system clock frequency. + */ + + buf_printf("%lu\n", (long) sys_hz()); +} + +/*===========================================================================* + * root_loadavg * + *===========================================================================*/ +PRIVATE void root_loadavg(void) +{ + /* Print load averages. + */ + double avg[3]; + + if (procfs_getloadavg(avg, 3) != 3) + return; + + buf_printf("%.2lf %.2lf %.2lf\n", avg[0], avg[1], avg[2]); +} + +/*===========================================================================* + * root_uptime * + *===========================================================================*/ +PRIVATE void root_uptime(void) +{ + /* Print the current uptime. + */ + clock_t ticks; + + if (getuptime(&ticks) != OK) + return; + + buf_printf("%.2lf\n", (double) ticks / (double) sys_hz()); +} + +/*===========================================================================* + * root_kinfo * + *===========================================================================*/ +PRIVATE void root_kinfo(void) +{ + /* Print general kernel information. + */ + struct kinfo kinfo; + + if (sys_getkinfo(&kinfo) != OK) + return; + + buf_printf("%u %u\n", kinfo.nr_procs, kinfo.nr_tasks); +} + +/*===========================================================================* + * root_meminfo * + *===========================================================================*/ +PRIVATE void root_meminfo(void) +{ + /* Print general memory information. + */ + struct vm_stats_info vsi; + + if (vm_info_stats(&vsi) != OK) + return; + + buf_printf("%u %lu %lu %lu %lu\n", vsi.vsi_pagesize, + vsi.vsi_total, vsi.vsi_free, vsi.vsi_largest, vsi.vsi_cached); +} + +/*===========================================================================* + * root_pci * + *===========================================================================*/ +PRIVATE void root_pci(void) +{ + /* Print information about PCI devices present in the system. + */ + u16_t vid, did; + u8_t bcr, scr, pifr; + char *slot_name, *dev_name; + int r, devind; + static int first = TRUE; + + /* This should be taken care of behind the scenes by the PCI lib. */ + if (first) { + pci_init(); + first = FALSE; + } + + /* Iterate over all devices, printing info for each of them. */ + r = pci_first_dev(&devind, &vid, &did); + while (r == 1) { + slot_name = pci_slot_name(devind); + dev_name = pci_dev_name(vid, did); + + bcr = pci_attr_r8(devind, PCI_BCR); + scr = pci_attr_r8(devind, PCI_SCR); + pifr = pci_attr_r8(devind, PCI_PIFR); + + buf_printf("%s %x/%x/%x %04X:%04X %s\n", + slot_name ? slot_name : "-", + bcr, scr, pifr, vid, did, + dev_name ? dev_name : ""); + + r = pci_next_dev(&devind, &vid, &did); + } +} diff --git a/servers/procfs/tree.c b/servers/procfs/tree.c new file mode 100644 index 000000000..412878707 --- /dev/null +++ b/servers/procfs/tree.c @@ -0,0 +1,456 @@ +/* ProcFS - tree.c - by Alen Stojanov and David van Moolenbroek */ + +#include "inc.h" + +PUBLIC struct proc proc[NR_PROCS + NR_TASKS]; +PUBLIC struct mproc mproc[NR_PROCS]; +PUBLIC struct fproc fproc[NR_PROCS]; + +PRIVATE int nr_pid_entries; + +/*===========================================================================* + * slot_in_use * + *===========================================================================*/ +PRIVATE int slot_in_use(int slot) +{ + /* Return whether the given slot is in use by a process. + */ + + return (proc[slot].p_rts_flags != RTS_SLOT_FREE || + (slot >= NR_TASKS && + (mproc[slot - NR_TASKS].mp_flags & IN_USE))); +} + +/*===========================================================================* + * check_owner * + *===========================================================================*/ +PRIVATE int check_owner(struct inode *node, int slot) +{ + /* Check if the owner user and group ID of the inode are still in sync + * the current effective user and group ID of the given process. + */ + struct inode_stat stat; + + if (slot < NR_TASKS) return TRUE; + + get_inode_stat(node, &stat); + + return (stat.uid == mproc[slot - NR_TASKS].mp_effuid && + stat.gid == mproc[slot - NR_TASKS].mp_effgid); +} + +/*===========================================================================* + * make_stat * + *===========================================================================*/ +PRIVATE void make_stat(struct inode_stat *stat, int slot, int index) +{ + /* Fill in an inode_stat structure for the given process slot and + * per-pid file index (or NO_INDEX for the process subdirectory root). + */ + + if (index == NO_INDEX) + stat->mode = DIR_ALL_MODE; + else + stat->mode = pid_files[index].mode; + + if (slot < NR_TASKS) { + stat->uid = SUPER_USER; + stat->gid = SUPER_USER; + } else { + stat->uid = mproc[slot - NR_TASKS].mp_effuid; + stat->gid = mproc[slot - NR_TASKS].mp_effgid; + } + + stat->size = 0; + stat->dev = NO_DEV; +} + +/*===========================================================================* + * dir_is_pid * + *===========================================================================*/ +PRIVATE int dir_is_pid(struct inode *node) +{ + /* Return whether the given node is a PID directory. + */ + + return (get_parent_inode(node) == get_root_inode() && + get_inode_index(node) != NO_INDEX); +} + +/*===========================================================================* + * update_tables * + *===========================================================================*/ +PRIVATE int update_tables(void) +{ + /* Get the process tables from the kernel, PM, and VFS. + * Check the magic number in the kernel table entries. + */ + int r, slot; + + if ((r = sys_getproctab(proc)) != OK) return r; + + for (slot = 0; slot < NR_PROCS + NR_TASKS; slot++) { + if (proc[slot].p_magic != PMAGIC) { + printf("PROCFS: system version mismatch!\n"); + + return EINVAL; + } + } + + if ((r = getsysinfo(PM_PROC_NR, SI_PROC_TAB, mproc)) != OK) return r; + + if ((r = getsysinfo(VFS_PROC_NR, SI_PROC_TAB, fproc)) != OK) return r; + + return OK; +} + +/*===========================================================================* + * init_tree * + *===========================================================================*/ +PUBLIC int init_tree(void) +{ + /* Initialize this module, before VTreeFS is started. As part of the + * process, check if we're not compiled against a kernel different from + * the one that is running at the moment. + */ + int i, r; + + if ((r = update_tables()) != OK) + return r; + + /* Get the maximum number of entries that we may add to each PID's + * directory. We could just default to a large value, but why not get + * it right? + */ + for (i = 0; pid_files[i].name != NULL; i++); + + nr_pid_entries = i; + + return OK; +} + +/*===========================================================================* + * out_of_inodes * + *===========================================================================*/ +PRIVATE void out_of_inodes(void) +{ + /* Out of inodes - the NR_INODES value is set too low. We can not do + * much, but we might be able to continue with degraded functionality, + * so do not panic. If the NR_INODES value is not below the *crucial* + * minimum, the symptom of this case will be an incomplete listing of + * the main proc directory. + */ + static int warned = FALSE; + + if (warned == FALSE) { + printf("PROCFS: out of inodes!\n"); + + warned = TRUE; + } +} + +/*===========================================================================* + * construct_pid_dirs * + *===========================================================================*/ +PRIVATE void construct_pid_dirs(void) +{ + /* Regenerate the set of PID directories in the root directory of the + * file system. Add new directories and delete old directories as + * appropriate; leave unchanged those that should remain the same. + */ + struct inode *root, *node; + struct inode_stat stat; + char name[PNAME_MAX+1]; + pid_t pid; + int i; + + root = get_root_inode(); + + for (i = 0; i < NR_PROCS + NR_TASKS; i++) { + /* Do we already have an inode associated with this slot? */ + node = get_inode_by_index(root, i); + + /* If the process slot is not in use, delete the associated + * inode if there was one, and skip this slot entirely. + */ + if (!slot_in_use(i)) { + if (node != NULL) + delete_inode(node); + + continue; + } + + /* Get the process ID. */ + if (i < NR_TASKS) + pid = (pid_t) (i - NR_TASKS); + else + pid = mproc[i - NR_TASKS].mp_pid; + + /* If there is an old entry, see if the pid matches the current + * entry, and the owner is still the same. Otherwise, delete + * the old entry first. We reconstruct the entire subtree even + * if only the owner changed, for security reasons: if a + * process could keep open a file or directory across the owner + * change, it might be able to access information it shouldn't. + */ + if (node != NULL) { + if (pid == (pid_t) get_inode_cbdata(node) && + check_owner(node, i)) + continue; + + delete_inode(node); + } + + /* Add the entry for the process slot. */ + sprintf(name, "%d", pid); + + make_stat(&stat, i, NO_INDEX); + + node = add_inode(root, name, i, &stat, nr_pid_entries, + (cbdata_t) pid); + + if (node == NULL) + out_of_inodes(); + } +} + +/*===========================================================================* + * make_one_pid_entry * + *===========================================================================*/ +PRIVATE void make_one_pid_entry(struct inode *parent, char *name, int slot) +{ + /* Construct one file in a PID directory, if a file with the given name + * should exist at all. + */ + struct inode *node; + struct inode_stat stat; + int i; + + /* Don't readd if it is already there. */ + node = get_inode_by_name(parent, name); + if (node != NULL) + return; + + /* Only add the file if it is a known, registered name. */ + for (i = 0; pid_files[i].name != NULL; i++) { + if (!strcmp(name, pid_files[i].name)) { + make_stat(&stat, slot, i); + + node = add_inode(parent, name, i, &stat, + (index_t) 0, (cbdata_t) 0); + + if (node == NULL) + out_of_inodes(); + + break; + } + } +} + +/*===========================================================================* + * make_all_pid_entries * + *===========================================================================*/ +PRIVATE void make_all_pid_entries(struct inode *parent, int slot) +{ + /* Construct all files in a PID directory. + */ + struct inode *node; + struct inode_stat stat; + int i; + + for (i = 0; pid_files[i].name != NULL; i++) { + node = get_inode_by_index(parent, i); + if (node != NULL) + continue; + + make_stat(&stat, slot, i); + + node = add_inode(parent, pid_files[i].name, i, &stat, + (index_t) 0, (cbdata_t) 0); + + if (node == NULL) + out_of_inodes(); + } +} + +/*===========================================================================* + * construct_pid_entries * + *===========================================================================*/ +PRIVATE void construct_pid_entries(struct inode *parent, char *name) +{ + /* Construct one requested file entry, or all file entries, in a PID + * directory. + */ + int slot; + + slot = get_inode_index(parent); + assert(slot >= 0 && slot < NR_TASKS + NR_PROCS); + + /* If this process is already gone, delete the directory now. */ + if (!slot_in_use(slot)) { + delete_inode(parent); + + return; + } + + /* If a specific file name is being looked up, see if we have to add + * an inode for that file. If the directory contents are being + * retrieved, add all files that have not yet been added. + */ + if (name != NULL) + make_one_pid_entry(parent, name, slot); + else + make_all_pid_entries(parent, slot); +} + +/*===========================================================================* + * pid_read * + *===========================================================================*/ +PRIVATE void pid_read(struct inode *node) +{ + /* Data is requested from one of the files in a PID directory. Call the + * function that is responsible for generating the data for that file. + */ + struct inode *parent; + int slot, index; + + /* Get the slot number of the process. Note that this currently will + * not work for files not in the top-level pid subdirectory. + */ + parent = get_parent_inode(node); + + slot = get_inode_index(parent); + + /* Get this file's index number. */ + index = get_inode_index(node); + + /* Call the handler procedure for the file. */ + ((_PROTOTYPE(void (*), (int))) pid_files[index].data)(slot); +} + +/*===========================================================================* + * pid_link * + *===========================================================================*/ +PRIVATE int pid_link(struct inode *node, char *ptr, int max) +{ + /* The contents of a symbolic link in a PID directory are requested. + * This function is a placeholder for future use. + */ + + /* Nothing yet. */ + strcpy(ptr, ""); + + return OK; +} + +/*===========================================================================* + * lookup_hook * + *===========================================================================*/ +PUBLIC int lookup_hook(struct inode *parent, char *name, cbdata_t cbdata) +{ + /* Path name resolution hook, for a specific parent and name pair. + * If needed, update our own view of the system first; after that, + * determine whether we need to (re)generate certain files. + */ + static clock_t last_update = 0; + clock_t now; + int r; + + /* Update lazily for lookups, as this gets too expensive otherwise. + * Alternative: pull in only PM's table? + */ + if ((r = getuptime(&now)) != OK) + panic(__FILE__, "unable to get uptime", r); + + if (last_update != now) { + update_tables(); + + last_update = now; + } + + /* If the parent is the root directory, we must now reconstruct all + * entries, because some of them might have been garbage collected. + * We must update the entire tree at once; if we update individual + * entries, we risk name collisions. + */ + if (parent == get_root_inode()) { + construct_pid_dirs(); + } + /* If the parent is a process directory, we may need to (re)construct + * the entry being looked up. + */ + else if (dir_is_pid(parent)) { + /* We might now have deleted our current containing directory; + * construct_pid_entries() will take care of this case. + */ + construct_pid_entries(parent, name); + } + + return OK; +} + +/*===========================================================================* + * getdents_hook * + *===========================================================================*/ +PUBLIC int getdents_hook(struct inode *node, cbdata_t cbdata) +{ + /* Directory entry retrieval hook, for potentially all files in a + * directory. Make sure that all files that are supposed to be + * returned, are actually part of the virtual tree. + */ + + if (node == get_root_inode()) { + update_tables(); + + construct_pid_dirs(); + } else if (dir_is_pid(node)) { + construct_pid_entries(node, NULL /*name*/); + } + + return OK; +} + +/*===========================================================================* + * read_hook * + *===========================================================================*/ +PUBLIC int read_hook(struct inode *node, off_t off, char **ptr, + size_t *len, cbdata_t cbdata) +{ + /* Regular file read hook. Call the appropriate callback function to + * generate and return the data. + */ + + buf_init(off, *len); + + /* Populate the buffer with the proper content. */ + if (get_inode_index(node) != NO_INDEX) { + pid_read(node); + } else { + ((_PROTOTYPE(void (*), (void))) cbdata)(); + } + + *len = buf_get(ptr); + + return OK; +} + +/*===========================================================================* + * rdlink_hook * + *===========================================================================*/ +PUBLIC int rdlink_hook(struct inode *node, char *ptr, size_t max, + cbdata_t cbdata) +{ + /* Symbolic link resolution hook. Not used yet. + */ + struct inode *parent; + + /* Get the parent inode. */ + parent = get_parent_inode(node); + + /* If the parent inode is a pid directory, call the pid handler. + */ + if (parent != NULL && dir_is_pid(parent)) + pid_link(node, ptr, max); + + return OK; +} diff --git a/servers/procfs/type.h b/servers/procfs/type.h new file mode 100644 index 000000000..202551f94 --- /dev/null +++ b/servers/procfs/type.h @@ -0,0 +1,70 @@ +#ifndef _PROCFS_TYPE_H +#define _PROCFS_TYPE_H + +typedef void *data_t; /* abstract data type; can hold pointer */ + +/* ProcFS supports two groups of files: dynamic files, which are created within + * process-specific (PID) directories, and static files, which are global. For + * both, the following structure is used to construct the files. + * + * For dynamic files, the rules are simple: only regular files are supported + * (although partial support for symbolic links is already present), and the + * 'data' field must be filled with a pointer to a function of the type: + * + * void (*)(int slot) + * + * The function will be called whenever a read request for the file is made; + * 'slot' contains the kernel slot number of the process being queried (so for + * the PM and VFS process tables, NR_TASKS has to be subtracted from the slot + * number to find the right slot). The function is expected to produce + * appropriate output using the buf_printf() function. + * + * For static files, regular files and directories are supported. For + * directories, the 'data' field must be a pointer to another 'struct file' + * array that specifies the contents of the directory - this directory will + * the be created recursively. For regular files, the 'data' field must point + * to a function of the type: + * + * void (*)(void) + * + * Here too, the function will be called upon a read request, and it is + * supposed to "fill" the file using buf_printf(). Obviously, for static files, + * there is no slot number. + * + * For both static and dynamic files, 'mode' must specify the file type as well + * as the access mode, and in both cases, each array is terminated with an + * entry that has its name set to NULL. + */ +/* The internal link between static/dynamic files/directories and VTreeFS' + * indexes and cbdata values is as follows: + * - Dynamic directories are always PID directories in the root directory. + * They are generated automatically, and are not specified using a "struct + * file" structure. Their index is their slot number, so that getdents() + * calls always return any PID at most once. Their cbdata value is the PID of + * the process associated with that dynamic directory, for the purpose of + * comparing old and new PIDs after updating process tables (without having + * to atoi() the directory's name). + * - Dynamic files are always in such a dynamic directory. Their index is the + * array index into the "struct file" array of pid files (pid_files[]). They + * are indexed at all, because they may be deleted at any time due to inode + * shortages, independently of other dynamic files in the same directory, and + * recreating them without index would again risk possibly inconsistent + * getdents() results, where for example the same file shows up twice. + * VTreeFS currently does not distinguish between indexed and delatable files + * and hence, all dynamic files must be indexed so as to be deletable anyway. + * - Static directories have no index (they are not and must not be deletable), + * and although their cbdata is their associated 'data' field from their + * "struct file" entries, their cbdata value is currently not relied on + * anywhere. Then again, as of writing, there are no static directories at + * all. + * - Static files have no index either (for the same reason). Their cbdata is + * also their 'data' field from the "struct file" entry creating the file, + * and this is used to actually call the callback function directly. + */ +struct file { + char *name; /* file name, maximum length PNAME_MAX */ + mode_t mode; /* file mode, including file type */ + data_t data; /* custom data associated with this file */ +}; + +#endif /* _PROCFS_TYPE_H */ diff --git a/servers/procfs/util.c b/servers/procfs/util.c new file mode 100644 index 000000000..703af706a --- /dev/null +++ b/servers/procfs/util.c @@ -0,0 +1,63 @@ +/* ProcFS - util.c - by Alen Stojanov and David van Moolenbroek */ + +#include "inc.h" + +/*===========================================================================* + * procfs_getloadavg * + *===========================================================================*/ +PUBLIC int procfs_getloadavg(double *loadavg, int nelem) +{ + /* Retrieve system load average information. + */ + struct loadinfo loadinfo; + u32_t system_hz, ticks_per_slot; + int p, unfilled_ticks; + int minutes[3] = { 1, 5, 15 }; + ssize_t l; + + if(nelem < 1) { + errno = ENOSPC; + return -1; + } + + system_hz = sys_hz(); + + if((l=sys_getloadinfo(&loadinfo)) != OK) + return -1; + if(nelem > 3) + nelem = 3; + + /* How many ticks are missing from the newest-filled slot? */ + ticks_per_slot = _LOAD_UNIT_SECS * system_hz; + unfilled_ticks = + ticks_per_slot - (loadinfo.last_clock % ticks_per_slot); + + for(p = 0; p < nelem; p++) { + int h, slots; + double l = 0.0; + int latest = loadinfo.proc_last_slot; + slots = minutes[p] * 60 / _LOAD_UNIT_SECS; + + /* Add up the total number of process ticks for this number + * of minutes (minutes[p]). Start with the newest slot, which + * is latest, and count back for the number of slots that + * correspond to the right number of minutes. Take wraparound + * into account by calculating the index modulo _LOAD_HISTORY, + * which is the number of slots of history kept. + */ + for(h = 0; h < slots; h++) { + int slot; + slot = (latest - h + _LOAD_HISTORY) % _LOAD_HISTORY; + l += (double) loadinfo.proc_load_history[slot]; + } + + /* The load average over this number of minutes is the number + * of process-ticks divided by the number of ticks, not + * counting the number of ticks the last slot hasn't been + * around yet. + */ + loadavg[p] = l / (slots * ticks_per_slot - unfilled_ticks); + } + + return nelem; +}