From: David van Moolenbroek <david@minix3.org>
Date: Tue, 4 Nov 2014 21:33:04 +0000 (+0000)
Subject: Add trace(1): the MINIX3 system call tracer
X-Git-Url: http://zhaoyanbai.com/repos/?a=commitdiff_plain;h=521fa314e2aaec3c192c15f2aaa4c677a544e62a;p=minix.git

Add trace(1): the MINIX3 system call tracer

Change-Id: Ib970c8647409196902ed53d6e9631a1673a4ab2e
---

diff --git a/distrib/sets/lists/minix/mi b/distrib/sets/lists/minix/mi
index 792effd3f..39570edcb 100644
--- a/distrib/sets/lists/minix/mi
+++ b/distrib/sets/lists/minix/mi
@@ -522,6 +522,7 @@
 ./usr/bin/touch				minix-sys
 ./usr/bin/tput				minix-sys
 ./usr/bin/tr				minix-sys
+./usr/bin/trace				minix-sys
 ./usr/bin/true				minix-sys
 ./usr/bin/truncate			minix-sys
 ./usr/bin/tsort				minix-sys
@@ -2560,6 +2561,7 @@
 ./usr/man/man1/touch.1			minix-sys
 ./usr/man/man1/tput.1			minix-sys
 ./usr/man/man1/tr.1			minix-sys
+./usr/man/man1/trace.1			minix-sys
 ./usr/man/man1/trap.1			minix-sys	obsolete
 ./usr/man/man1/true.1			minix-sys
 ./usr/man/man1/truncate.1		minix-sys
diff --git a/minix/usr.bin/Makefile b/minix/usr.bin/Makefile
index 60d19bf33..660309d14 100644
--- a/minix/usr.bin/Makefile
+++ b/minix/usr.bin/Makefile
@@ -8,5 +8,6 @@ SUBDIR+=	grep
 SUBDIR+=	ministat
 SUBDIR+=	top
 SUBDIR+=	toproto
+SUBDIR+=	trace
 
 .include <bsd.subdir.mk>
diff --git a/minix/usr.bin/trace/Makefile b/minix/usr.bin/trace/Makefile
new file mode 100644
index 000000000..af2d63b86
--- /dev/null
+++ b/minix/usr.bin/trace/Makefile
@@ -0,0 +1,21 @@
+.include <bsd.own.mk>
+
+PROG=	trace
+SRCS=	call.o error.o escape.o format.o ioctl.o kernel.o mem.o output.o \
+	proc.o signal.o trace.o
+.PATH:	${.CURDIR}/service
+SRCS+=	pm.o vfs.o rs.o vm.o ipc.o
+.PATH:	${.CURDIR}/ioctl
+SRCS+=	block.o char.o net.o svrctl.o
+
+CPPFLAGS+=	-D_MINIX_SYSTEM=1 -I${.CURDIR} -I${NETBSDSRCDIR}/minix
+
+error.c: error.awk ${NETBSDSRCDIR}/sys/sys/errno.h
+	${TOOL_AWK} -f ${.ALLSRC} > ${.TARGET}
+
+signal.c: signal.awk ${NETBSDSRCDIR}/sys/sys/signal.h
+	${TOOL_AWK} -f ${.ALLSRC} > ${.TARGET}
+
+CLEANFILES+=	error.c signal.c
+
+.include <bsd.prog.mk>
diff --git a/minix/usr.bin/trace/NOTES b/minix/usr.bin/trace/NOTES
new file mode 100644
index 000000000..91d8cb761
--- /dev/null
+++ b/minix/usr.bin/trace/NOTES
@@ -0,0 +1,255 @@
+Developer notes regarding trace(1), by David van Moolenbroek.
+
+
+OVERALL CODE STRUCTURE
+
+The general tracing engine is in trace.c.  It passes IPC-level system call
+enter and leave events off to call.c, which handles IPC-level system call
+printing and passes off system calls to be interpreted by a service-specific
+system call handler whenever possible.  All the service-specific code is in the
+service/ subdirectory, grouped by destination service.  IOCTLs are a special
+case, which are handled in ioctl.c and passed on to driver-type-grouped IOCTL
+handlers in the ioctl/ subdirectory (this grouping is not strict).  Some of the
+generated output goes through the formatting code in format.c, and all of it
+ends up in output.c.  The remaining source files contain support code.
+
+
+ADDING A SYSTEM CALL HANDLER
+
+In principle, every system call stops the traced process twice: once when the
+system call is started (the call-enter event) and once when the system call
+returns (the call-leave event).  The tracer uses the call-enter event to print
+the request being made, and the call-leave event to print the result of the
+call.  The output format is supposed to mimic largely what the system call
+looks like from a C program, although with additional information where that
+makes sense.  The general output format for system calls is:
+
+  name(parameters) = result
+
+..where "name" is the name of the system call, "parameters" is a list of system
+call parameters, and "result" is the result of the system call.  If possible,
+the part up to and including the equals sign is printed from the call-enter
+event, and the result is printed from the call-leave event.  However, many
+system calls actually pass a pointer to a block of memory that is filled with
+meaningful content as part of the system call.  For that reason, it is also
+possible that the call-enter event stops printing somewhere inside the
+parameters block, and the call-leave event prints the rest of the parameters,
+as well as the equals sign and the result after it.  The place in the printed
+system call where the call-enter printer stops and the call-leave printer is
+supposed to pick up again, is referred to as the "call split".
+
+The tracer has to a handler structure for every system call that can be made by
+a user program to any of the the MINIX3 services.  This handler structure
+provides three elements: the name of the system call, an "out" function that
+handles printing of the call-enter part of the system call, and an "in"
+function that handles printing of the call-leave part of the system call.  The
+"out" function is expected to print zero or more call parameters, and then
+return a call type, which indicates whether all parameters have been printed
+yet, or not.  In fact, there are three call types, shown here with an example
+which has a "|" pipe symbol added to indicate the call split:
+
+  CT_DONE:       write(5, "foo", 3) = |3
+  CT_NOTDONE:    read(5, |"foo", 1024) = 3
+  CT_NORETURN:   execve("foo", ["foo"], []")| = -1 [ENOENT]
+
+The CT_DONE call type indicates that the handler is done printing all the
+parameters during the call-enter event, and the call split will be after the
+equals sign.  The CT_NOTDONE call type indicates that the handler is not done
+printing all parameters yet, thus yielding a call split in the middle of the
+parameters block (or even right after the opening parenthesis).  The no-return
+(CT_NORETURN) call type is used for a small number of functions that do not
+return on success.  Currently, these are the exit(), execve(), and sigreturn()
+system calls.  For these calls, no result will be printed at all, unless such
+a call fails, in which case a failure result is printed after all.  The call
+split is such that the entire parameters block is printed upon entering the
+call, but the equals sign and result are printed only if the call does return.
+
+Now more about the handler structure for the system call.  First of all, each
+system call has a name, which must be a static string.  It may be supplied
+either as a string, or as a function that returns a name string.  The latter is
+for cases where one message-level system call is used to implement multiple
+C-level system calls (such as setitimer() and getitimer() both going through
+PM_ITIMER).  The name function has the following prototype:
+
+  const char *svc_syscall_name(const message *m_out);
+
+..where "m_out" is a local copy of the request message, which the name function
+can use to decide what string to return for the system call.  As a sidenote,
+in the future, the system call name will be used to implement call filtering.
+
+An "out" printer function has the following prototype:
+
+  int svc_syscall_out(struct trace_proc *proc, const message *m_out);
+
+Here, "proc" is a pointer to the process structure containing information about
+the process making the system call; proc->pid returns the process PID, but the
+function should not access any other fields of this structure directly.
+Instead, many of the output primitive and helper functions (which are all
+prefixed with "put_") take this pointer as part of the call.  "m_out" is a
+local copy of the request message, and the printer may access its fields as it
+sees fit.
+
+The printer function should simply print parameters.  The call name and the
+opening parenthesis are printed by the main output routine.
+
+All simple call parameters should be printed using the put_field() and
+put_value() functions.  The former prints a parameter or field name as flat
+text; the latter is a printf-like interface to the former.  By default, call
+paramaters are simply printed as "value", but if printing all names is enabled,
+call parameters are printed as "name=value".  Thus, all parameters should be
+given a name, even if this name does not show up by default.  Either way, these
+two functions take care of deciding whether to print the name, as well as of
+printing separators between the parameters.  More about printing more complex
+parameters (such as structures) in a bit.
+
+The out printer function must return one of the three CT_ call type values.  If
+it returns CT_DONE, the main output routine will immediately print the closing
+parenthesis and equals sign.  If it returns CF_NORETURN, a closing parenthesis
+will be printed.  If it return CF_NOTDONE, only a parameter field separator
+(that is, a comma and a space) will be printed--after all, it can be assumed
+that more parameters will be printed later.
+
+An "in" printer function has the following prototype:
+
+  void svc_syscall_in(struct trace_proc *proc, const message *m_out,
+          const message *m_in, int failed);
+
+Again, "proc" is the traced process of which its current system call has now
+returned.  "m_out" is again the request message, guaranteed to be unchanged
+since the "out" call.  "m_in" is the reply message from the service.  "failed"
+is either 0 to indicate that the call appears to have succeeded, or PF_FAILED
+to indicate that the call definitely failed.  If PF_FAILED is set, the call
+has failed either at the IPC level or at the system call level (or for another,
+less common reason).  In that case, the contents of "m_in" may be garbage and
+"m_in" must not be used at all.
+
+For CF_NOTDONE type calls, the in printer function should first print the
+remaining parameters.  Here especially, it is important to consider that the
+entire call may fail.  In that case, the parameters of which the contents were
+still going to be printed may also contain garbage, since they were never
+filled.  The expected behavior is to print such parameters as pointer or "&.."
+or something else to indicate that their actual contents are not valid.
+
+Either way, once a CF_NOTDONE type call function is done printing the remaining
+parameters, it must call put_equals(proc) to print the closing parenthesis of
+the call and the equals sign.  CF_NORETURN calls must also use put_equals(proc)
+to print the equals sign.
+
+Then comes the result part.  If the call failed, the in printer function *must*
+use put_result(proc) to print the failure result.  This call not only takes
+care of converting negative error codes from m_in->m_type into "-1 [ECODE]" but
+also prints appropriate failure codes for IPC-level and other exceptional
+failures.  Only if the system call did not fail, may the in printer function
+choose to not call put_result(proc), which on success simply prints
+m_in->m_type as an integer.  Similarly, if the system call succeeded, the in
+printer function may print extended results after the primary result, generally
+in parentheses.  For example, getpid() and getppid() share the same system call
+and thus the tracer prints both return values, one as the primary result of the
+actual call and one in parentheses with a clarifying name as extended result:
+
+  getpid() = 3 (ppid=1)
+
+It should now be clear that printing extended results makes no sense if the
+system call failed.
+
+Besidse put_equals and put_result, the following more or less generic support
+functions are available to print the various parts of the requests and replies.
+
+  put_field - output a parameter, structure field, and so on; this function
+              should be used for just about every actual value
+  put_value - printf-like version of put_field
+  put_text  - output plain text; for call handlers, this should be used only to
+              to add things right after a put_field call, never on its own
+  put_fmt   - printf-like version of put_text, should generally not be used
+              from call handlers at all
+  put_open  - open a nested block of fields, surrounded by parentheses,
+              brackets, or something like that; this is used for structures,
+              arrays, and any other similar nontrivial case of nesting
+  put_close - close a previously opened block of fields; the nesting depth is
+              actually tracked (to keep per-level separators etc), so each
+              put_open call must have a corresponding put_close call
+  put_open_struct  - perform several tasks necessary to start printing the
+                     fields of a structure; note that this function may fail!
+  put_close_struct - end successful printing of a structure
+  put_ptr   - print a pointer in the traced process
+  put_buf   - print a buffer or string
+  put_flags - print a bitwise flags field
+  put_tail  - helper function for printing the continuation part of an array
+
+Many of these support functions take a flags field which takes PF_-prefixed
+flags to modify the output they generate.  The value of 'failed' in the in
+printer function may actually be passed (bitwise-OR'ed in) as the PF_FAILED
+flag to these support functions, and they will do the right thing.  For
+example, a call to put_open_struct with the PF_FAILED flag will end up simply
+printing the pointer to the structure, and not allow printing of the contents
+of the structure.
+
+The above support functions are documented (at a basic level) within the code,
+but in many cases, it may be useful to look up how they are used in practice by
+the existing handlers.  The same goes for various less clear cases; while there
+is basic support for printing structures, support for printing arrays must be
+coded fully by hand, as has been done for many places.  A serious attempt has
+been made to make the output consistent across the board (mainly thanks to the
+output format of strace, on which the output of this tracer has been based,
+sometimes very strictly and sometimes more loosely, but that aside) so it is
+always advisable to follow the ways of the existing handlers.  Also keep in
+mind that there are already printer functions for several generic structures,
+and these should be used whenever possible (e.g., see the put_fd() comment).
+
+Finally, the default_out and default_in functions may be used as printer
+functions for call with no parameters, and for functions which need no more
+than put_result() to print their system call result, respectively.
+
+
+INTERNALS: MULTIPROCESS OUTPUT AND PREEMPTION
+
+Things get interesting when multiple processes are traced at once.  Due to the
+nature of process scheduling, system calls may end up being preempted between
+the call-enter and call-leave phases.  This means that the output of a system
+call has to be suspended to give way to an event from another traced process.
+Such preemption may occur with literally all calls; not just "blocking" calls.
+
+The tracer goes through some lengths to aid the user in following the output in
+the light of preemtion.  The most important aspect is that the output of the
+call-enter phase is recorded, so that in the case of preemption, the call-leave
+phase can start by replaying the record.  As a result, the user gets to see the
+whole system call on a single line, instead of just the second half.  Such
+system call resumptions are marked with a "*" in their prefix, to show that
+the call was not just entered.  The output therefore looks like this:
+
+      2| syscall() = <..>
+      3| othercall() = 0
+      2|*syscall() = 0
+
+Signals that arrive during a call will cause a resumption of the call as well.
+As a result, a call may be resumed multiple times:
+
+      2| syscall() = <..>
+      3| othercall() = 0
+      2|*syscall() = ** SIGUSR1 ** ** SIGUSR2 ** <..>
+      3| othercall() = -1 [EBUSY]
+      2|*syscall() = ** SIGHUP ** <..>
+      3| othercall() = 0
+      2|*syscall() = 0
+
+This entire scenario shows one single system call from process 2.
+
+In the current implementation, the output that should be recorded and/or cause
+the "<..>" preemption marker, as well as the cases where the recorded text must
+be replayed, are marked by the code explicitly.  Replay takes place in three
+cases: upon the call-leave event (obviously), upon receiving a signal (as shown
+above), and when it is required that a suspended no-return call is shown as
+completed before continuing with other output.  The last case applies to exit()
+and execve(), and both are documented in the code quite extensively.  Generally
+speaking, in all output lines where no recording or replay actions are
+performed, the recording will not be replayed but also not removed.  This
+allows for intermediate lines for that process in the output.  Practically
+speaking, future support for job control could even print when a process get
+stopped and continued, for that process, while preempting the output for the
+ongoing system call for that same process.
+
+It is possible that the output of the call-enter phase exhausts the recording
+buffer for its process.  In this case, a new, shorter text is generated upon
+process resumption.  There are many other aspects to proper output formatting
+in the light of preemption, but most of them should be documented as part of
+the code reasonably well.
diff --git a/minix/usr.bin/trace/call.c b/minix/usr.bin/trace/call.c
new file mode 100644
index 000000000..441432992
--- /dev/null
+++ b/minix/usr.bin/trace/call.c
@@ -0,0 +1,686 @@
+
+#include "inc.h"
+
+#include <minix/com.h>
+#include <minix/callnr.h>
+#include <minix/endpoint.h>
+
+static const struct calls *call_table[] = {
+	&pm_calls,
+	&vfs_calls,
+	&rs_calls,
+	&vm_calls,
+	&ipc_calls,
+};
+
+/*
+ * Find a call handler for the given endpoint, call number pair.  Return NULL
+ * if no call handler for this call exists.
+ */
+static const struct call_handler *
+find_handler(endpoint_t endpt, int call_nr)
+{
+	int i, index;
+
+	for (i = 0; i < COUNT(call_table); i++) {
+		if (call_table[i]->endpt != ANY &&
+		    call_table[i]->endpt != endpt)
+			continue;
+
+		if (call_nr < call_table[i]->base)
+			continue;
+
+		index = call_nr - call_table[i]->base;
+
+		if (index >= call_table[i]->count)
+			continue;
+
+		if (call_table[i]->map[index].outfunc == NULL)
+			continue;
+
+		return &call_table[i]->map[index];
+	}
+
+	return NULL;
+}
+
+/*
+ * Print an endpoint.
+ */
+void
+put_endpoint(struct trace_proc * proc, const char * name, endpoint_t endpt)
+{
+	const char *text = NULL;
+
+	if (!valuesonly) {
+		switch (endpt) {
+		TEXT(ASYNCM);
+		TEXT(IDLE);
+		TEXT(CLOCK);
+		TEXT(SYSTEM);
+		TEXT(KERNEL);
+		TEXT(PM_PROC_NR);
+		TEXT(VFS_PROC_NR);
+		TEXT(RS_PROC_NR);
+		TEXT(MEM_PROC_NR);
+		TEXT(SCHED_PROC_NR);
+		TEXT(TTY_PROC_NR);
+		TEXT(DS_PROC_NR);
+		TEXT(VM_PROC_NR);
+		TEXT(PFS_PROC_NR);
+		TEXT(ANY);
+		TEXT(NONE);
+		TEXT(SELF);
+		}
+	}
+
+	if (text != NULL)
+		put_field(proc, name, text);
+	else
+		put_value(proc, name, "%d", endpt);
+}
+
+/*
+ * Print a message structure.  The source field will be printed only if the
+ * PF_ALT flag is given.
+ */
+static void
+put_message(struct trace_proc * proc, const char * name, int flags,
+	vir_bytes addr)
+{
+	message m;
+
+	if (!put_open_struct(proc, name, flags, addr, &m, sizeof(m)))
+		return;
+
+	if (flags & PF_ALT)
+		put_endpoint(proc, "m_source", m.m_source);
+
+	put_value(proc, "m_type", "%x", m.m_type);
+
+	put_close_struct(proc, FALSE /*all*/);
+}
+
+/*
+ * Print the call's equals sign, which also implies that the parameters part of
+ * the call has been fully printed and the corresponding closing parenthesis
+ * may have to be printed, if it has not been printed already.
+ */
+void
+put_equals(struct trace_proc * proc)
+{
+
+	/*
+	 * Do not allow multiple equals signs on a single line.  This check is
+	 * protection against badly written handlers.  It does not work for the
+	 * no-return type, but such calls are rare and less error prone anyway.
+	 */
+	assert((proc->call_flags & (CF_DONE | CF_NORETURN)) != CF_DONE);
+
+	/*
+	 * We allow (and in fact force) handlers to call put_equals in order to
+	 * indicate that the call's parameters block has ended, so we must end
+	 * the block here, if we hadn't done so before.
+	 */
+	if (!(proc->call_flags & CF_DONE)) {
+		put_close(proc, ") ");
+
+		proc->call_flags |= CF_DONE;
+	}
+
+	put_align(proc);
+	put_text(proc, "= ");
+
+	format_set_sep(proc, NULL);
+}
+
+/*
+ * Print the primary result of a call, after the equals sign.  It is always
+ * possible that this is an IPC-level or other low-level error, in which case
+ * this takes precedence, which is why this function must be called to print
+ * the result if the call failed in any way at all; it may or may not be used
+ * if the call succeeded.  For regular call results, default MINIX3/POSIX
+ * semantics are used: if the return value is negative, the actual call failed
+ * with -1 and the negative return value is the call's error code.  The caller
+ * may consider other cases a failure (e.g., waitpid() returning 0), but
+ * negative return values *not* signifying an error are currently not supported
+ * since they are not present in MINIX3.
+ */
+void
+put_result(struct trace_proc * proc)
+{
+	const char *errname;
+	int value;
+
+	/* This call should always be preceded by a put_equals call. */
+	assert(proc->call_flags & CF_DONE);
+
+	/*
+	 * If we failed to copy in the result register or message, print a
+	 * basic error and nothing else.
+	 */
+	if (proc->call_flags & (CF_REG_ERR | CF_MSG_ERR)) {
+		put_text(proc, "<fault>");
+
+		return;
+	}
+
+	/*
+	 * If we are printing a system call rather than an IPC call, and an
+	 * error occurred at the IPC level, prefix the output with "<ipc>" to
+	 * indicate the IPC failure.  If we are printing an IPC call, an IPC-
+	 * level result is implied, so we do not print this.
+	 */
+	if (proc->call_handler != NULL && (proc->call_flags & CF_IPC_ERR))
+		put_text(proc, "<ipc> ");
+
+	value = proc->call_result;
+
+	if (value >= 0)
+		put_fmt(proc, "%d", value);
+	else if (!valuesonly && (errname = get_error_name(-value)) != NULL)
+		put_fmt(proc, "-1 [%s]", errname);
+	else
+		put_fmt(proc, "-1 [%d]", -value);
+
+	format_set_sep(proc, " ");
+}
+
+/*
+ * The default enter-call (out) printer, which prints no parameters and is thus
+ * immediately done with printing parameters.
+ */
+int
+default_out(struct trace_proc * __unused proc, const message * __unused m_out)
+{
+
+	return CT_DONE;
+}
+
+/*
+ * The default leave-call (in) printer, which simply prints the call result,
+ * possibly preceded by an equals sign if none was printed yet.  For obvious
+ * reasons, if the handler's out printer returned CT_NOTDONE, this default
+ * printer must not be used.
+ */
+void
+default_in(struct trace_proc * proc, const message * __unused m_out,
+	const message * __unused m_in, int __unused failed)
+{
+
+	if ((proc->call_flags & (CF_DONE | CF_NORETURN)) != CF_DONE)
+		put_equals(proc);
+	put_result(proc);
+}
+
+/*
+ * Prepare a sendrec call, by copying in the request message, determining
+ * whether it is one of the calls that the tracing engine should know about,
+ * searching for a handler for the call, and returning a name for the call.
+ */
+static const char *
+sendrec_prepare(struct trace_proc * proc, endpoint_t endpt, vir_bytes addr,
+	int * trace_class)
+{
+	const char *name;
+	int r;
+
+	r = mem_get_data(proc->pid, addr, &proc->m_out, sizeof(proc->m_out));
+
+	if (r == 0) {
+		if (endpt == PM_PROC_NR) {
+			if (proc->m_out.m_type == PM_EXEC)
+				*trace_class = TC_EXEC;
+			else if (proc->m_out.m_type == PM_SIGRETURN)
+				*trace_class = TC_SIGRET;
+		}
+
+		proc->call_handler = find_handler(endpt, proc->m_out.m_type);
+	} else
+		proc->call_handler = NULL;
+
+	if (proc->call_handler != NULL) {
+		if (proc->call_handler->namefunc != NULL)
+			name = proc->call_handler->namefunc(&proc->m_out);
+		else
+			name = proc->call_handler->name;
+
+		assert(name != NULL);
+	} else
+		name = "ipc_sendrec";
+
+	return name;
+}
+
+/*
+ * Print the outgoing (request) part of a sendrec call.  If we found a call
+ * handler for the call, let the handler generate output.  Otherwise, print the
+ * sendrec call at the kernel IPC level.  Return the resulting call flags.
+ */
+static unsigned int
+sendrec_out(struct trace_proc * proc, endpoint_t endpt, vir_bytes addr)
+{
+
+	if (proc->call_handler != NULL) {
+		return proc->call_handler->outfunc(proc, &proc->m_out);
+	} else {
+		put_endpoint(proc, "src_dest", endpt);
+		/*
+		 * We have already copied in the message, but if we used m_out
+		 * and PF_LOCADDR here, a copy failure would cause "&.." to be
+		 * printed rather than the actual message address.
+		 */
+		put_message(proc, "m_ptr", 0, addr);
+
+		return CT_DONE;
+	}
+}
+
+/*
+ * Print the incoming (reply) part of a sendrec call.  Copy in the reply
+ * message, determine whether the call is considered to have failed, and let
+ * the call handler do the rest.  If no call handler was found, print an
+ * IPC-level result.
+ */
+static void
+sendrec_in(struct trace_proc * proc, int failed)
+{
+	message m_in;
+
+	if (failed) {
+		/* The call failed at the IPC level. */
+		memset(&m_in, 0, sizeof(m_in)); /* not supposed to be used */
+		assert(proc->call_flags & CF_IPC_ERR);
+	} else if (mem_get_data(proc->pid, proc->m_addr, &m_in,
+	    sizeof(m_in)) != 0) {
+		/* The reply message is somehow unavailable to us. */
+		memset(&m_in, 0, sizeof(m_in)); /* not supposed to be used */
+		proc->call_result = EGENERIC; /* not supposed to be used */
+		proc->call_flags |= CF_MSG_ERR;
+		failed = PF_FAILED;
+	} else {
+		/* The result is for the actual call. */
+		proc->call_result = m_in.m_type;
+		failed = (proc->call_result < 0) ? PF_FAILED : 0;
+	}
+
+	if (proc->call_handler != NULL)
+		proc->call_handler->infunc(proc, &proc->m_out, &m_in, failed);
+	else
+		put_result(proc);
+}
+
+/*
+ * Perform preparations for printing a system call.  Return two things: the
+ * name to use for the call, and the trace class of the call.
+ * special treatment).
+ */
+static const char *
+call_prepare(struct trace_proc * proc, reg_t reg[3], int * trace_class)
+{
+
+	switch (proc->call_type) {
+	case SENDREC:
+		return sendrec_prepare(proc, (endpoint_t)reg[1],
+		    (vir_bytes)reg[2], trace_class);
+
+	case SEND:
+		return "ipc_send";
+
+	case SENDNB:
+		return "ipc_sendnb";
+
+	case RECEIVE:
+		return "ipc_receive";
+
+	case NOTIFY:
+		return "ipc_notify";
+
+	case SENDA:
+		return "ipc_senda";
+
+	case MINIX_KERNINFO:
+		return "minix_kerninfo";
+
+	default:
+		/*
+		 * It would be nice to include the call number here, but we
+		 * must return a string that will last until the entire call is
+		 * finished.  Adding another buffer to the trace_proc structure
+		 * is an option, but it seems overkill..
+		 */
+		return "ipc_unknown";
+	}
+}
+
+/*
+ * Print the outgoing (request) part of a system call.  Return the resulting
+ * call flags.
+ */
+static unsigned int
+call_out(struct trace_proc * proc, reg_t reg[3])
+{
+
+	switch (proc->call_type) {
+	case SENDREC:
+		proc->m_addr = (vir_bytes)reg[2];
+
+		return sendrec_out(proc, (endpoint_t)reg[1],
+		    (vir_bytes)reg[2]);
+
+	case SEND:
+	case SENDNB:
+		put_endpoint(proc, "dest", (endpoint_t)reg[1]);
+		put_message(proc, "m_ptr", 0, (vir_bytes)reg[2]);
+
+		return CT_DONE;
+
+	case RECEIVE:
+		proc->m_addr = (vir_bytes)reg[2];
+
+		put_endpoint(proc, "src", (endpoint_t)reg[1]);
+
+		return CT_NOTDONE;
+
+	case NOTIFY:
+		put_endpoint(proc, "dest", (endpoint_t)reg[1]);
+
+		return CT_DONE;
+
+	case SENDA:
+		put_ptr(proc, "table", (vir_bytes)reg[2]);
+		put_value(proc, "count", "%zu", (size_t)reg[1]);
+
+		return CT_DONE;
+
+	case MINIX_KERNINFO:
+	default:
+		return CT_DONE;
+	}
+}
+
+/*
+ * Print the incoming (reply) part of a call.
+ */
+static void
+call_in(struct trace_proc * proc, int failed)
+{
+
+	switch (proc->call_type) {
+	case SENDREC:
+		sendrec_in(proc, failed);
+
+		break;
+
+	case RECEIVE:
+		/* Print the source as well. */
+		put_message(proc, "m_ptr", failed | PF_ALT, proc->m_addr);
+		put_equals(proc);
+		put_result(proc);
+
+		break;
+
+	case MINIX_KERNINFO:
+		/*
+		 * We do not have a platform-independent means to access the
+		 * secondary IPC return value, so we cannot print the receive
+		 * status or minix_kerninfo address.
+		 */
+		/* FALLTHROUGH */
+	default:
+		put_result(proc);
+
+		break;
+	}
+}
+
+/*
+ * Determine whether to skip printing the given call, based on its name.
+ */
+static int
+call_hide(const char * __unused name)
+{
+
+	/*
+	 * TODO: add support for such filtering, with an strace-like -e command
+	 * line option.  For now, we filter nothing, although calls may still
+	 * be hidden as the result of a register retrieval error.
+	 */
+	return FALSE;
+}
+
+/*
+ * The given process entered a system call.  Return the trace class of the
+ * call: TC_EXEC for an execve() call, TC_SIGRET for a sigreturn() call, or
+ * TC_NORMAL for a call that requires no exceptions in the trace engine.
+ */
+int
+call_enter(struct trace_proc * proc, int show_stack)
+{
+	const char *name;
+	reg_t reg[3];
+	int trace_class, type;
+
+	/* Get the IPC-level type and parameters of the system call. */
+	if (kernel_get_syscall(proc->pid, reg) < 0) {
+		/*
+		 * If obtaining the details of the system call failed, even
+		 * though we know the process is stopped on a system call, we
+		 * are going to assume that the process got killed somehow.
+		 * Thus, the best we can do is ignore the system call entirely,
+		 * and hope that the next thing we hear about this process is
+		 * its termination.  At worst, we ignore a serious error..
+		 */
+		proc->call_flags = CF_HIDE;
+
+		return FALSE;
+	}
+
+	/*
+	 * Obtain the call name that is to be used for this call, and decide
+	 * whether we want to print this call at all.
+	 */
+	proc->call_type = (int)reg[0];
+	trace_class = TC_NORMAL;
+
+	name = call_prepare(proc, reg, &trace_class);
+
+	proc->call_name = name;
+
+	if (call_hide(name)) {
+		proc->call_flags = CF_HIDE;
+
+		return trace_class;
+	}
+
+	/* Only print a stack trace if we are printing the call itself. */
+	if (show_stack)
+		kernel_put_stacktrace(proc);
+
+	/*
+	 * Start a new line, start recording, and print the call name and
+	 * opening parenthesis.
+	 */
+	put_newline();
+
+	format_reset(proc);
+
+	record_start(proc);
+
+	put_text(proc, name);
+	put_open(proc, NULL, PF_NONAME, "(", ", ");
+
+	/*
+	 * Print the outgoing part of the call, that is, some or all of its
+	 * parameters.  This call returns flags indicating how far printing
+	 * got, and may be one of the following combinations:
+	 * - CT_NOTDONE (0) if printing parameters is not yet complete; after
+	 *   the call split, the in handler must print the rest itself;
+	 * - CT_DONE (CF_DONE) if printing parameters is complete, and we
+	 *   should now print the closing parenthesis and equals sign;
+	 * - CT_NORETURN (CF_DONE|CF_NORETURN) if printing parameters is
+	 *   complete, but we should not print the equals sign, because the
+	 *   call is expected not to return (the no-return call type).
+	 */
+	type = call_out(proc, reg);
+	assert(type == CT_NOTDONE || type == CT_DONE || type == CT_NORETURN);
+
+	/*
+	 * Print whatever the handler told us to print for now.
+	 */
+	if (type & CF_DONE) {
+		if (type & CF_NORETURN) {
+			put_close(proc, ")");
+
+			put_space(proc);
+
+			proc->call_flags |= type;
+		} else {
+			/*
+			 * The equals sign is printed implicitly for the
+			 * CT_DONE type only.  For CT_NORETURN and CT_NOTDONE,
+			 * the "in" handler has to do it explicitly.
+			 */
+			put_equals(proc);
+		}
+	} else {
+		/*
+		 * If at least one parameter was printed, print the separator
+		 * now.  We know that another parameter will follow (otherwise
+		 * the caller would have returned CT_DONE), and this way the
+		 * output looks better.
+		 */
+		format_push_sep(proc);
+	}
+
+	/*
+	 * We are now at the call split; further printing will be done once the
+	 * call returns, through call_leave.  Stop recording; if the call gets
+	 * suspended and later resumed, we should replay everything up to here.
+	 */
+#if DEBUG
+	put_text(proc, "|"); /* warning, this may push a space */
+#endif
+
+	record_stop(proc);
+
+	output_flush();
+
+	return trace_class;
+}
+
+/*
+ * The given process left a system call, or if skip is set, the leave phase of
+ * the current system call should be ended.
+ */
+void
+call_leave(struct trace_proc * proc, int skip)
+{
+	reg_t retreg;
+	int hide, failed;
+
+	/* If the call is skipped, it must be a no-return type call. */
+	assert(!skip || (proc->call_flags & (CF_NORETURN | CF_HIDE)));
+
+	/*
+	 * Start by replaying the current call, if necessary.  If the call was
+	 * suspended and we are about to print the "in" part, this is obviously
+	 * needed.  If the call is hidden, replaying will be a no-op, since
+	 * nothing was recorded for this call.  The special case is a skipped
+	 * call (which, as established above, must be a no-return call, e.g.
+	 * exec), for which replaying has the effect that if the call was
+	 * previously suspended, it will now be replayed, without suspension:
+	 *
+	 *       2| execve("./test", ["./test"], [..(12)]) <..>
+	 *       3| sigsuspend([]) = <..>
+	 * [A]   2| execve("./test", ["./test"], [..(12)])
+	 *       2| ---
+	 *       2| Tracing test (pid 2)
+	 *
+	 * The [A] line is the result of replaying the skipped call.
+	 */
+	call_replay(proc);
+
+	hide = (proc->call_flags & CF_HIDE);
+
+	if (!hide && !skip) {
+		/* Get the IPC-level result of the call. */
+		if (kernel_get_retreg(proc->pid, &retreg) < 0) {
+			/* This should never happen.  Deal with it anyway. */
+			proc->call_flags |= CF_REG_ERR;
+			failed = PF_FAILED;
+		} else if ((proc->call_result = (int)retreg) < 0) {
+			proc->call_flags |= CF_IPC_ERR;
+			failed = PF_FAILED;
+		} else
+			failed = 0;
+
+		/*
+		 * Print the incoming part of the call, that is, possibly some
+		 * or all of its parameters and the call's closing parenthesis
+		 * (if CT_NOTDONE), and the equals sign (if not CT_DONE), then
+		 * the call result.
+		 */
+		call_in(proc, failed);
+	}
+
+	if (!hide) {
+		/*
+		 * The call is complete now, so clear the recording.  This also
+		 * implies that no suspension marker will be printed anymore.
+		 */
+		record_clear(proc);
+
+		put_newline();
+	}
+
+	/*
+	 * For calls not of the no-return type, an equals sign must have been
+	 * printed by now.  This is protection against badly written handlers.
+	 */
+	assert(proc->call_flags & CF_DONE);
+
+	proc->call_name = NULL;
+	proc->call_flags = 0;
+}
+
+/*
+ * Replay the recorded text, if any, for the enter phase of the given process.
+ * If there is no recorded text, start a new line anyway.
+ */
+void
+call_replay(struct trace_proc * proc)
+{
+
+	/*
+	 * We get TRUE if the recorded call should be replayed, but the
+	 * recorded text for the call did not fit in the recording buffer.
+	 * In that case, we have to come up with a replacement text for the
+	 * call up to the call split.
+	 */
+	if (record_replay(proc) == TRUE) {
+		/*
+		 * We basically place a "<..>" suspension marker in the
+		 * parameters part of the call, and use its call name and flags
+		 * for the rest.  There is a trailing space in all cases.
+		 */
+		put_fmt(proc, "%s(<..>%s", proc->call_name,
+		    !(proc->call_flags & CF_DONE) ? "," :
+		    ((proc->call_flags & CF_NORETURN) ? ")" : ") ="));
+		put_space(proc);
+	}
+}
+
+/*
+ * Return the human-readable name of the call currently being made by the given
+ * process.  The process is guaranteed to be in a call, although the call may
+ * be hidden.  Under no circumstances may this function return a NULL pointer.
+ */
+const char *
+call_name(struct trace_proc * proc)
+{
+
+	assert(proc->call_name != NULL);
+
+	return proc->call_name;
+}
diff --git a/minix/usr.bin/trace/error.awk b/minix/usr.bin/trace/error.awk
new file mode 100644
index 000000000..747616230
--- /dev/null
+++ b/minix/usr.bin/trace/error.awk
@@ -0,0 +1,28 @@
+# Derived from libc errlist.awk
+
+BEGIN {
+	printf("/* This file is automatically generated by error.awk */\n\n");
+	printf("#include \"inc.h\"\n\n");
+	printf("static const char *const errors[] = {\n");
+}
+/^#define/ {
+	name = $2;
+	if (name == "ELAST")
+		next;
+	number = $3;
+	if (number == "(_SIGN")
+		number = $4;
+	if (number < 0 || number == "EAGAIN")
+		next;
+	printf("\t[%s] = \"%s\",\n", name, name);
+}
+END {
+	printf("};\n\n");
+	printf("const char *\nget_error_name(int err)\n{\n\n");
+	printf("\tif (err >= 0 && err < sizeof(errors) / sizeof(errors[0]) &&\n");
+	printf("\t    errors[err] != NULL)\n");
+	printf("\t\treturn errors[err];\n");
+	printf("\telse\n");
+	printf("\t\treturn NULL;\n");
+	printf("}\n");
+}
diff --git a/minix/usr.bin/trace/escape.c b/minix/usr.bin/trace/escape.c
new file mode 100644
index 000000000..7f00ab87d
--- /dev/null
+++ b/minix/usr.bin/trace/escape.c
@@ -0,0 +1,48 @@
+
+#include "inc.h"
+
+static const char *const escape[256] = {
+	"\\0",   "\\x01", "\\x02", "\\x03", "\\x04", "\\x05", "\\x06", "\\x07",
+	"\\x08", "\\t",   "\\n",   "\\x0B", "\\x0C", "\\r",   "\\x0E", "\\x0F",
+	"\\x10", "\\x11", "\\x12", "\\x13", "\\x14", "\\x15", "\\x16", "\\x17",
+	"\\x18", "\\x19", "\\x1A", "\\x1B", "\\x1C", "\\x1D", "\\x1E", "\\x1F",
+	" ",     "!",     "\\\"",  "#",     "$",     "%",     "&",     "'",
+	"(",     ")",     "*",     "+",     ",",     "-",     ".",     "/",
+	"0",     "1",     "2",     "3",     "4",     "5",     "6",     "7",
+	"8",     "9",     ":",     ";",     "<",     "=",     ">",     "?",
+	"@",     "A",     "B",     "C",     "D",     "E",     "F",     "G",
+	"H",     "I",     "J",     "K",     "L",     "M",     "N",     "O",
+	"P",     "Q",     "R",     "S",     "T",     "U",     "V",     "W",
+	"X",     "Y",     "Z",     "[",     "\\",    "]",     "^",     "_",
+	"`",     "a",     "b",     "c",     "d",     "e",     "f",     "g",
+	"h",     "i",     "j",     "k",     "l",     "m",     "n",     "o",
+	"p",     "q",     "r",     "s",     "t",     "u",     "v",     "w",
+	"x",     "y",     "z",     "{",     "|",     "}",     "~",     "\\x7F",
+	"\\x80", "\\x81", "\\x82", "\\x83", "\\x84", "\\x85", "\\x86", "\\x87",
+	"\\x88", "\\x89", "\\x8A", "\\x8B", "\\x8C", "\\x8D", "\\x8E", "\\x8F",
+	"\\x90", "\\x91", "\\x92", "\\x93", "\\x94", "\\x95", "\\x96", "\\x97",
+	"\\x98", "\\x99", "\\x9A", "\\x9B", "\\x9C", "\\x9D", "\\x9E", "\\x9F",
+	"\\xA0", "\\xA1", "\\xA2", "\\xA3", "\\xA4", "\\xA5", "\\xA6", "\\xA7",
+	"\\xA8", "\\xA9", "\\xAA", "\\xAB", "\\xAC", "\\xAD", "\\xAE", "\\xAF",
+	"\\xB0", "\\xB1", "\\xB2", "\\xB3", "\\xB4", "\\xB5", "\\xB6", "\\xB7",
+	"\\xB8", "\\xB9", "\\xBA", "\\xBB", "\\xBC", "\\xBD", "\\xBE", "\\xBF",
+	"\\xC0", "\\xC1", "\\xC2", "\\xC3", "\\xC4", "\\xC5", "\\xC6", "\\xC7",
+	"\\xC8", "\\xC9", "\\xCA", "\\xCB", "\\xCC", "\\xCD", "\\xCE", "\\xCF",
+	"\\xD0", "\\xD1", "\\xD2", "\\xD3", "\\xD4", "\\xD5", "\\xD6", "\\xD7",
+	"\\xD8", "\\xD9", "\\xDA", "\\xDB", "\\xDC", "\\xDD", "\\xDE", "\\xDF",
+	"\\xE0", "\\xE1", "\\xE2", "\\xE3", "\\xE4", "\\xE5", "\\xE6", "\\xE7",
+	"\\xE8", "\\xE9", "\\xEA", "\\xEB", "\\xEC", "\\xED", "\\xEE", "\\xEF",
+	"\\xF0", "\\xF1", "\\xF2", "\\xF3", "\\xF4", "\\xF5", "\\xF6", "\\xF7",
+	"\\xF8", "\\xF9", "\\xFA", "\\xFB", "\\xFC", "\\xFD", "\\xFE", "\\xFF",
+};
+
+/*
+ * For the given character, return a string representing an escaped version of
+ * the character.
+ */
+const char *
+get_escape(char c)
+{
+
+	return escape[(unsigned int)(unsigned char)c];
+}
diff --git a/minix/usr.bin/trace/format.c b/minix/usr.bin/trace/format.c
new file mode 100644
index 000000000..10a6b3eb2
--- /dev/null
+++ b/minix/usr.bin/trace/format.c
@@ -0,0 +1,426 @@
+
+#include "inc.h"
+
+#include <stdarg.h>
+
+/*
+ * The size of the formatting buffer, which in particular limits the maximum
+ * size of the output from the variadic functions.  All printer functions which
+ * are dealing with potentially large or even unbounded output, should be able
+ * to generate their output in smaller chunks.  In the end, nothing that is
+ * being printed as a unit should even come close to reaching this limit.
+ */
+#define FORMAT_BUFSZ	4096
+
+/*
+ * The buffer which is used for all intermediate copying and/or formatting.
+ * Care must be taken that only one function uses this buffer at any time.
+ */
+static char formatbuf[FORMAT_BUFSZ];
+
+/*
+ * Reset the line formatting for the given process.
+ */
+void
+format_reset(struct trace_proc * proc)
+{
+
+	proc->next_sep = NULL;
+	proc->depth = -1;
+}
+
+/*
+ * Set the next separator for the given process.  The given separator may be
+ * NULL.
+ */
+void
+format_set_sep(struct trace_proc * proc, const char * sep)
+{
+
+	proc->next_sep = sep;
+}
+
+/*
+ * Print and clear the next separator for the process, if any.
+ */
+void
+format_push_sep(struct trace_proc * proc)
+{
+
+	if (proc->next_sep != NULL) {
+		put_text(proc, proc->next_sep);
+
+		proc->next_sep = NULL;
+	}
+}
+
+/*
+ * Print a field, e.g. a parameter or a field from a structure, separated from
+ * other fields at the same nesting depth as appropriate.  If the given field
+ * name is not NULL, it may or may not be printed.  The given text is what will
+ * be printed for this field so far, but the caller is allowed to continue
+ * printing text for the same field with e.g. put_text().  As such, the given
+ * text may even be an empty string.
+ */
+void
+put_field(struct trace_proc * proc, const char * name, const char * text)
+{
+
+	/*
+	 * At depth -1 (the basic line level), names are not used.  A name
+	 * should not be supplied by the caller in that case, but, it happens.
+	 */
+	if (proc->depth < 0)
+		name = NULL;
+
+	format_push_sep(proc);
+
+	if (name != NULL && (proc->depths[proc->depth].name || allnames)) {
+		put_text(proc, name);
+		put_text(proc, "=");
+	}
+
+	put_text(proc, text);
+
+	format_set_sep(proc, proc->depths[proc->depth].sep);
+}
+
+/*
+ * Increase the nesting depth with a new block of fields, enclosed within
+ * parentheses, brackets, etcetera.  The given name, which may be NULL, is the
+ * name of the entire nested block.  In the flags field, PF_NONAME indicates
+ * that the fields within the block should have their names printed or not,
+ * although this may be overridden by setting the allnames variable.  The given
+ * string is the block opening string (e.g., an opening parenthesis).  The
+ * given separator is used to separate the fields within the nested block, and
+ * should generally be ", " to maintain output consistency.
+ */
+void
+put_open(struct trace_proc * proc, const char * name, int flags,
+	const char * string, const char * sep)
+{
+
+	put_field(proc, name, string);
+
+	proc->depth++;
+
+	assert(proc->depth < MAX_DEPTH);
+
+	proc->depths[proc->depth].sep = sep;
+	proc->depths[proc->depth].name = !(flags & PF_NONAME);
+
+	format_set_sep(proc, NULL);
+}
+
+/*
+ * Decrease the nesting depth by ending a nested block of fields.  The given
+ * string is the closing parenthesis, bracket, etcetera.
+ */
+void
+put_close(struct trace_proc * proc, const char * string)
+{
+
+	assert(proc->depth >= 0);
+
+	put_text(proc, string);
+
+	proc->depth--;
+
+	if (proc->depth >= 0)
+		format_set_sep(proc, proc->depths[proc->depth].sep);
+	else
+		format_set_sep(proc, NULL);
+}
+
+/*
+ * Version of put_text with variadic arguments.  The given process may be NULL.
+ */
+void
+put_fmt(struct trace_proc * proc, const char * fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	(void)vsnprintf(formatbuf, sizeof(formatbuf), fmt, ap);
+	va_end(ap);
+
+	put_text(proc, formatbuf);
+}
+
+/*
+ * Version of put_field with variadic arguments.
+ */
+void
+put_value(struct trace_proc * proc, const char * name, const char * fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	(void)vsnprintf(formatbuf, sizeof(formatbuf), fmt, ap);
+	va_end(ap);
+
+	put_field(proc, name, formatbuf);
+}
+
+/*
+ * Start printing a structure.  In general, the function copies the contents of
+ * the structure of size 'size' from the traced process at 'addr' into the
+ * local 'ptr' structure, opens a nested block with name 'name' (which may
+ * be NULL) using an opening bracket, and returns TRUE to indicate that the
+ * caller should print fields from the structure.  However, if 'flags' contains
+ * PF_FAILED, the structure will be printed as a pointer, no copy will be made,
+ * and the call will return FALSE.  Similarly, if the remote copy fails, a
+ * pointer will be printed and the call will return FALSE.  If PF_LOCADDR is
+ * given, 'addr' is a local address, and an intraprocess copy will be made.
+ */
+int
+put_open_struct(struct trace_proc * proc, const char * name, int flags,
+	vir_bytes addr, void * ptr, size_t size)
+{
+
+	if ((flags & PF_FAILED) || valuesonly > 1 || addr == 0) {
+		if (flags & PF_LOCADDR)
+			put_field(proc, name, "&..");
+		else
+			put_ptr(proc, name, addr);
+
+		return FALSE;
+	}
+
+	if (!(flags & PF_LOCADDR)) {
+		if (mem_get_data(proc->pid, addr, ptr, size) < 0) {
+			put_ptr(proc, name, addr);
+
+			return FALSE;
+		}
+	} else
+		memcpy(ptr, (void *) addr, size);
+
+	put_open(proc, name, flags, "{", ", ");
+
+	return TRUE;
+}
+
+/*
+ * End printing a structure.  This must be called only to match a successful
+ * call to put_open_struct.  The given 'all' flag indicates whether all fields
+ * of the structure have been printed; if not, a ".." continuation text is
+ * printed to show the user that some structure fields have not been printed.
+ */
+void
+put_close_struct(struct trace_proc * proc, int all)
+{
+
+	if (!all)
+		put_field(proc, NULL, "..");
+
+	put_close(proc, "}");
+}
+
+/*
+ * Print a pointer.  NULL is treated as a special case.
+ */
+void
+put_ptr(struct trace_proc * proc, const char * name, vir_bytes addr)
+{
+
+	if (addr == 0 && !valuesonly)
+		put_field(proc, name, "NULL");
+	else
+		put_value(proc, name, "&0x%lx", addr);
+}
+
+/*
+ * Print the contents of a buffer, at remote address 'addr' and of 'bytes'
+ * size, as a field using name 'name' (which may be NULL).  If the PF_FAILED
+ * flag is given, the buffer address is printed instead, since it is assumed
+ * that the actual buffer contains garbage.  If the PF_LOCADDR flag is given,
+ * the given address is a local address and no intraprocess copies are
+ * performed.  If the PF_STRING flag is given, the buffer is expected to
+ * contain a null terminator within its size, and the string will be printed
+ * only up to there.  Normally, the string is cut off beyond a number of bytes
+ * which depends on the verbosity level; if the PF_FULL flag is given, the full
+ * string will be printed no matter its size (used mainly for path names, which
+ * typically become useless once cut off).
+ */
+void
+put_buf(struct trace_proc * proc, const char * name, int flags, vir_bytes addr,
+	ssize_t size)
+{
+	const char *escaped;
+	size_t len, off, max, chunk;
+	int i, cutoff;
+	char *p;
+
+	if ((flags & PF_FAILED) || valuesonly || addr == 0 || size < 0) {
+		if (flags & PF_LOCADDR)
+			put_field(proc, name, "&..");
+		else
+			put_ptr(proc, name, addr);
+
+		return;
+	}
+
+	if (size == 0) {
+		put_field(proc, name, "\"\"");
+
+		return;
+	}
+
+	/*
+	 * TODO: the maximum says nothing about the size of the printed text.
+	 * Escaped-character printing can make the output much longer.  Does it
+	 * make more sense to apply a limit after the escape transformation?
+	 */
+	if (verbose == 0) max = 32;
+	else if (verbose == 1) max = 256;
+	else max = SIZE_MAX;
+
+	/*
+	 * If the output is cut off, we put two dots after the closing quote.
+	 * For non-string buffers, the output is cut off if the size exceeds
+	 * our limit or we run into a copying error somewhere in the middle.
+	 * For strings, the output is cut off unless we find a null terminator.
+	 */
+	cutoff = !!(flags & PF_STRING);
+	len = (size_t)size;
+	if (!(flags & PF_FULL) && len > max) {
+		len = max;
+		cutoff = TRUE;
+	}
+
+	for (off = 0; off < len; off += chunk) {
+		chunk = len - off;
+		if (chunk > sizeof(formatbuf) - 1)
+			chunk = sizeof(formatbuf) - 1;
+
+		if (!(flags & PF_LOCADDR)) {
+			if (mem_get_data(proc->pid, addr + off, formatbuf,
+			    chunk) < 0) {
+				if (off == 0) {
+					put_ptr(proc, name, addr);
+
+					return;
+				}
+
+				cutoff = TRUE;
+				break;
+			}
+		} else
+			memcpy(formatbuf, (void *)addr, chunk);
+
+		if (off == 0)
+			put_field(proc, name, "\"");
+
+		/* In strings, look for the terminating null character. */
+		if ((flags & PF_STRING) &&
+		    (p = memchr(formatbuf, '\0', chunk)) != NULL) {
+			chunk = (size_t)(p - formatbuf);
+			cutoff = FALSE;
+		}
+
+		/* Print the buffer contents using escaped characters. */
+		for (i = 0; i < chunk; i++) {
+			escaped = get_escape(formatbuf[i]);
+
+			put_text(proc, escaped);
+		}
+
+		/* Stop if we found the end of the string. */
+		if ((flags & PF_STRING) && !cutoff)
+			break;
+	}
+
+	if (cutoff)
+		put_text(proc, "\"..");
+	else
+		put_text(proc, "\"");
+}
+
+/*
+ * Print a flags field, using known flag names.  The name of the whole field is
+ * given as 'name' and may be NULL.  The caller must supply an array of known
+ * flags as 'fp' (with 'num' entries).  Each entry in the array has a mask, a
+ * value, and a name.  If the given flags 'value', bitwise-ANDed with the mask
+ * of an entry, yields the value of that entry, then the name is printed.  This
+ * means that certain zero bits may also be printed as actual flags, and that
+ * by supplying an all-bits-set mask can print a flag name for a zero value,
+ * for example F_OK for access().  See the FLAG macros and their usage for
+ * examples.  All matching flag names are printed with a "|" separator, and if
+ * after evaluating all 'num' entries in 'fp' there are still bits in 'value'
+ * for which nothing has been printed, the remaining bits will be printed with
+ * the 'fmt' format string for an integer (generally "%d" should be used).
+ */
+void
+put_flags(struct trace_proc * proc, const char * name, const struct flags * fp,
+	unsigned int num, const char * fmt, unsigned int value)
+{
+	unsigned int left;
+	int first;
+
+	if (valuesonly) {
+		put_value(proc, name, fmt, value);
+
+		return;
+	}
+
+	put_field(proc, name, "");
+
+	for (first = TRUE, left = value; num > 0; fp++, num--) {
+		if ((value & fp->mask) == fp->value) {
+			if (first)
+				first = FALSE;
+			else
+				put_text(proc, "|");
+			put_text(proc, fp->name);
+
+			left -= fp->value;
+		}
+	}
+
+	if (left != 0) {
+		if (first)
+			first = FALSE;
+		else
+			put_text(proc, "|");
+
+		put_fmt(proc, fmt, left);
+	}
+
+	/*
+	 * If nothing has been printed so far, simply print a zero.  Ignoring
+	 * the given format in this case is intentional: a simple 0 looks
+	 * better than 0x0 or 00 etc.
+	 */
+	if (first)
+		put_text(proc, "0");
+}
+
+/*
+ * Print a tail field at the end of an array.  The given 'count' value is the
+ * total number of elements in the array, or 0 to indicate that an error
+ * occurred.  The given 'printed' value is the number of fields printed so far.
+ * If some fields have been printed already, the number of fields not printed
+ * will be shown as "..(+N)".  If no fields have been printed already, the
+ * (total) number of fields not printed will be shown as "..(N)".  An error
+ * will print "..(?)".
+ *
+ * The rules for printing an array are as follows.  In principle, arrays should
+ * be enclosed in "[]".  However, if a copy error occurs immediately, a pointer
+ * to the array should be printed instead.  An empty array should be printed as
+ * "[]" (not "[..(0)]").  If a copy error occurs in the middle of the array,
+ * put_tail should be used with count == 0.  Only if not all fields in the
+ * array are printed, put_tail should be used with count > 0.  The value of
+ * 'printed' is typically the result of an arbitrary limit set based on the
+ * verbosity level.
+ */
+void
+put_tail(struct trace_proc * proc, unsigned int count, unsigned int printed)
+{
+
+	if (count == 0)
+		put_field(proc, NULL, "..(?)");
+	else
+		put_value(proc, NULL, "..(%s%u)",
+		    (printed > 0) ? "+" : "", count - printed);
+}
diff --git a/minix/usr.bin/trace/inc.h b/minix/usr.bin/trace/inc.h
new file mode 100644
index 000000000..2c84069c0
--- /dev/null
+++ b/minix/usr.bin/trace/inc.h
@@ -0,0 +1,22 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/ptrace.h>
+#include <errno.h>
+#include <assert.h>
+
+#include <minix/config.h>
+#include <minix/const.h>
+#include <minix/type.h>
+#include <minix/ipc.h>
+#include <minix/com.h>
+#include <minix/callnr.h>
+#include <minix/endpoint.h>
+#include <machine/stackframe.h>
+
+#include "proc.h"
+#include "type.h"
+#include "proto.h"
diff --git a/minix/usr.bin/trace/ioctl.c b/minix/usr.bin/trace/ioctl.c
new file mode 100644
index 000000000..ed8931baf
--- /dev/null
+++ b/minix/usr.bin/trace/ioctl.c
@@ -0,0 +1,226 @@
+
+#include "inc.h"
+
+#include <sys/ioctl.h>
+
+static char ioctlbuf[IOCPARM_MASK];
+
+static const struct {
+	const char *(*name)(unsigned long);
+	int (*arg)(struct trace_proc *, unsigned long, void *, int);
+	int is_svrctl;
+} ioctl_table[] = {
+	{ block_ioctl_name,	block_ioctl_arg,	FALSE	},
+	{ char_ioctl_name,	char_ioctl_arg,		FALSE	},
+	{ net_ioctl_name,	net_ioctl_arg,		FALSE	},
+	{ svrctl_name,		svrctl_arg,		TRUE	},
+};
+
+/*
+ * Print an IOCTL request code, and save certain values in the corresponding
+ * process structure in order to be able to print the IOCTL argument.
+ */
+void
+put_ioctl_req(struct trace_proc * proc, const char * name, unsigned long req,
+	int is_svrctl)
+{
+	const char *text;
+	size_t size;
+	unsigned int group, cmd;
+	int i, r, w, big;
+
+	proc->ioctl_index = -1;
+
+	if (valuesonly > 1) {
+		put_value(proc, name, "0x%lx", req);
+
+		return;
+	}
+
+	/*
+	 * Lookups are bruteforce across the IOCTL submodules; they're all
+	 * checked.  We could use the group letter but that would create more
+	 * issues than it solves.  Our hope is that at least the compiler is
+	 * smart about looking up particular codes in each switch statement,
+	 * although in the worst case, it's a full O(n) lookup.
+	 */
+	for (i = 0; !valuesonly && i < COUNT(ioctl_table); i++) {
+		/* IOCTLs and SVRCTLs are considered different name spaces. */
+		if (ioctl_table[i].is_svrctl != is_svrctl)
+			continue;
+
+		if ((text = ioctl_table[i].name(req)) != NULL) {
+			put_field(proc, name, text);
+
+			proc->ioctl_index = i;
+
+			return;
+		}
+	}
+
+	r = _MINIX_IOCTL_IOR(req);
+	w = _MINIX_IOCTL_IOW(req);
+	big = _MINIX_IOCTL_BIG(req);
+	size = (size_t)(big ? _MINIX_IOCTL_SIZE_BIG(req) : IOCPARM_LEN(req));
+	group = big ? 0 : IOCGROUP(req);
+	cmd = req & 0xff; /* shockingly there is no macro for this.. */
+
+	/*
+	 * Not sure why an entire bit is wasted on IOC_VOID (legacy reasons?),
+	 * but since the redundancy is there, we might as well check whether
+	 * this is a valid IOCTL request.  Also, we expect the group to be a
+	 * printable character.  If either check fails, print just a number.
+	 */
+	if (((req & IOC_VOID) && (r || w || big || size > 0)) ||
+	    (!(req & IOC_VOID) && ((!r && !w) || size == 0)) ||
+	    (!big && (group < 32 || group > 127))) {
+		put_value(proc, name, "0x%lx", req);
+
+		return;
+	}
+
+	if (big) {
+		/* For big IOCTLs, "R" becomes before "W" (old MINIX style). */
+		put_value(proc, name, "_IO%s%s_BIG(%u,%zu)",
+		    r ? "R" : "", w ? "W" : "", cmd, size);
+	} else if (IOCGROUP(req) >= 32 && IOCGROUP(req) < 127) {
+		/* For normal IOCTLs, "W" comes before "R" (NetBSD style). */
+		put_value(proc, name, "_IO%s%s('%c',%u,%zu)",
+		    w ? "W" : "", r ? "R" : "", group, cmd, size);
+	}
+}
+
+/*
+ * Print the supplied (out) part of an IOCTL argument, as applicable.  For
+ * efficiency reasons, this function assumes that put_ioctl_req() has been
+ * called for the corresponding IOCTL already, so that the necessary fields in
+ * the given proc structure are set as expected.
+ */
+int
+put_ioctl_arg_out(struct trace_proc * proc, const char * name,
+	unsigned long req, vir_bytes addr, int is_svrctl)
+{
+	size_t size;
+	int dir, all;
+
+	dir = (_MINIX_IOCTL_IOW(req) ? IF_OUT : 0) |
+	    (_MINIX_IOCTL_IOR(req) ? IF_IN : 0);
+
+	if (dir == 0)
+		proc->ioctl_index = -1; /* no argument to print at all */
+
+	/* No support for printing big-IOCTL contents just yet. */
+	if (valuesonly > 1 || _MINIX_IOCTL_BIG(req) ||
+	    proc->ioctl_index == -1) {
+		put_ptr(proc, name, addr);
+
+		return CT_DONE;
+	}
+
+	assert(proc->ioctl_index >= 0);
+	assert(proc->ioctl_index < COUNT(ioctl_table));
+	assert(ioctl_table[proc->ioctl_index].is_svrctl == is_svrctl);
+
+	proc->ioctl_flags =
+	    ioctl_table[proc->ioctl_index].arg(proc, req, NULL, dir);
+
+	if (proc->ioctl_flags == 0) { /* no argument printing for this IOCTL */
+		put_ptr(proc, name, addr);
+
+		proc->ioctl_index = -1; /* forget about the IOCTL handler */
+
+		return CT_DONE;
+	}
+
+	/*
+	 * If this triggers, the IOCTL handler returns a direction that is not
+	 * part of the actual IOCTL, and the handler should be fixed.
+	 */
+	if (proc->ioctl_flags & ~dir) {
+		output_flush(); /* show the IOCTL name for debugging */
+
+		assert(0);
+	}
+
+	if (!(proc->ioctl_flags & IF_OUT))
+		return CT_NOTDONE;
+
+	size = IOCPARM_LEN(req);
+
+	if (size > sizeof(ioctlbuf) ||
+	    mem_get_data(proc->pid, addr, ioctlbuf, size) < 0) {
+		put_ptr(proc, name, addr);
+
+		/* There's no harm in trying the _in side later anyhow.. */
+		return CT_DONE;
+	}
+
+	put_open(proc, name, 0, "{", ", ");
+
+	all = ioctl_table[proc->ioctl_index].arg(proc, req, ioctlbuf, IF_OUT);
+
+	if (!all)
+		put_field(proc, NULL, "..");
+
+	put_close(proc, "}");
+
+	return CT_DONE;
+}
+
+/*
+ * Print the returned (in) part of an IOCTL argument, as applicable.  This
+ * function assumes that it is preceded by a call to put_ioctl_arg_out for this
+ * process.
+ */
+void
+put_ioctl_arg_in(struct trace_proc * proc, const char * name, int failed,
+	unsigned long req, vir_bytes addr, int is_svrctl)
+{
+	size_t size;
+	int all;
+
+	if (valuesonly > 1 || _MINIX_IOCTL_BIG(req) ||
+	    proc->ioctl_index == -1) {
+		put_result(proc);
+
+		return;
+	}
+
+	assert(proc->ioctl_index >= 0);
+	assert(proc->ioctl_index < COUNT(ioctl_table));
+	assert(ioctl_table[proc->ioctl_index].is_svrctl == is_svrctl);
+	assert(proc->ioctl_flags != 0);
+
+	if (proc->ioctl_flags & IF_OUT)
+		put_result(proc);
+	if (!(proc->ioctl_flags & IF_IN))
+		return;
+
+	size = IOCPARM_LEN(req);
+
+	if (failed || size > sizeof(ioctlbuf) ||
+	    mem_get_data(proc->pid, addr, ioctlbuf, size) < 0) {
+		if (!(proc->ioctl_flags & IF_OUT)) {
+			put_ptr(proc, name, addr);
+			put_equals(proc);
+			put_result(proc);
+		} else if (!failed)
+			put_field(proc, NULL, "{..}");
+
+		return;
+	}
+
+	put_open(proc, name, 0, "{", ", ");
+
+	all = ioctl_table[proc->ioctl_index].arg(proc, req, ioctlbuf, IF_IN);
+
+	if (!all)
+		put_field(proc, NULL, "..");
+
+	put_close(proc, "}");
+
+	if (!(proc->ioctl_flags & IF_OUT)) {
+		put_equals(proc);
+		put_result(proc);
+	}
+}
diff --git a/minix/usr.bin/trace/ioctl/block.c b/minix/usr.bin/trace/ioctl/block.c
new file mode 100644
index 000000000..bca7d32d1
--- /dev/null
+++ b/minix/usr.bin/trace/ioctl/block.c
@@ -0,0 +1,229 @@
+
+#include "inc.h"
+
+#include <sys/ioctl.h>
+#include <minix/partition.h>
+#include <sys/vm.h>
+#include <sys/mtio.h>
+
+const char *
+block_ioctl_name(unsigned long req)
+{
+
+	switch (req) {
+	NAME(BIOCTRACEBUF);
+	NAME(BIOCTRACECTL);
+	NAME(BIOCTRACEGET);	/* big IOCTL, not printing argument */
+	NAME(DIOCSETP);
+	NAME(DIOCGETP);
+	NAME(DIOCEJECT);	/* no argument */
+	NAME(DIOCTIMEOUT);
+	NAME(DIOCOPENCT);
+	NAME(DIOCFLUSH);	/* no argument */
+	NAME(DIOCGETWC);
+	NAME(DIOCSETWC);
+	NAME(FBDCADDRULE);
+	NAME(FBDCDELRULE);
+	NAME(FBDCGETRULE);
+	NAME(MIOCRAMSIZE);
+	NAME(MTIOCGET);		/* TODO: print argument */
+	NAME(MTIOCTOP);		/* TODO: print argument */
+	NAME(VNDIOCCLR);
+	NAME(VNDIOCGET);
+	NAME(VNDIOCSET);
+	}
+
+	return NULL;
+}
+
+static const struct flags fbd_flags[] = {
+	FLAG(FBD_FLAG_READ),
+	FLAG(FBD_FLAG_WRITE),
+};
+
+static void
+put_fbd_action(struct trace_proc * proc, const char * name, int action)
+{
+	const char *text = NULL;
+
+	if (!valuesonly) {
+		switch (action) {
+		TEXT(FBD_ACTION_CORRUPT);
+		TEXT(FBD_ACTION_ERROR);
+		TEXT(FBD_ACTION_MISDIR);
+		TEXT(FBD_ACTION_LOSTTORN);
+		}
+	}
+
+	if (text != NULL)
+		put_field(proc, name, text);
+	else
+		put_value(proc, name, "%d", action);
+}
+
+static const struct flags vnd_flags[] = {
+	FLAG(VNDIOF_HASGEOM),
+	FLAG(VNDIOF_READONLY),
+	FLAG(VNDIOF_FORCE),
+};
+
+int
+block_ioctl_arg(struct trace_proc * proc, unsigned long req, void * ptr,
+	int dir)
+{
+	struct part_geom *part;
+	struct fbd_rule *rule;
+	struct vnd_ioctl *vnd;
+	struct vnd_user *vnu;
+	int i;
+
+	switch (req) {
+	case BIOCTRACEBUF:
+		if (ptr == NULL)
+			return IF_OUT;
+
+		put_value(proc, NULL, "%zu", *(size_t *)ptr);
+		return IF_ALL;
+
+	case BIOCTRACECTL:
+		if (ptr == NULL)
+			return IF_OUT;
+
+		i = *(int *)ptr;
+		if (!valuesonly && i == BTCTL_START)
+			put_field(proc, NULL, "BTCTL_START");
+		else if (!valuesonly && i == BTCTL_STOP)
+			put_field(proc, NULL, "BTCTL_STOP");
+		else
+			put_value(proc, NULL, "%d", i);
+		return IF_ALL;
+
+	case DIOCSETP:
+		if ((part = (struct part_geom *)ptr) == NULL)
+			return IF_OUT;
+
+		put_value(proc, "base", "%"PRIu64, part->base);
+		put_value(proc, "size", "%"PRIu64, part->size);
+		return IF_ALL;
+
+	case DIOCGETP:
+		if ((part = (struct part_geom *)ptr) == NULL)
+			return IF_IN;
+
+		put_value(proc, "base", "%"PRIu64, part->base);
+		put_value(proc, "size", "%"PRIu64, part->size);
+		if (verbose > 0) {
+			put_value(proc, "cylinders", "%u", part->cylinders);
+			put_value(proc, "heads", "%u", part->heads);
+			put_value(proc, "sectors", "%u", part->sectors);
+			return IF_ALL;
+		} else
+			return 0;
+
+	case DIOCTIMEOUT:
+		/* Print the old timeout only if verbosity is high enough. */
+		if (ptr == NULL)
+			return IF_OUT | ((verbose > 0) ? IF_IN : 0);
+
+		/* Same action for out and in. */
+		put_value(proc, NULL, "%d", *(int *)ptr);
+		return IF_ALL;
+
+	case DIOCOPENCT:
+		if (ptr == NULL)
+			return IF_IN;
+
+		put_value(proc, NULL, "%d", *(int *)ptr);
+		return IF_ALL;
+
+	case DIOCSETWC:
+	case DIOCGETWC:
+		if (ptr == NULL)
+			return dir; /* out or in, depending on the request */
+
+		put_value(proc, NULL, "%d", *(int *)ptr);
+		return IF_ALL;
+
+	case FBDCDELRULE:
+		if (ptr == NULL)
+			return IF_OUT;
+
+		put_value(proc, NULL, "%d", *(fbd_rulenum_t *)ptr);
+		return IF_ALL;
+
+	case FBDCGETRULE:
+		if ((rule = (struct fbd_rule *)ptr) == NULL)
+			return IF_OUT | IF_IN;
+
+		if (dir == IF_OUT) {
+			put_value(proc, "num", "%d", rule->num);
+			return IF_ALL;
+		}
+
+		/*
+		 * The returned result is the same as what is passed to the
+		 * add request, so we can use the same code to print both.
+		 */
+		/* FALLTHROUGH */
+	case FBDCADDRULE:
+		if ((rule = (struct fbd_rule *)ptr) == NULL)
+			return IF_OUT;
+
+		if (rule->start != 0 || rule->end != 0 || verbose > 0) {
+			put_value(proc, "start", "%"PRIu64, rule->start);
+			put_value(proc, "end", "%"PRIu64, rule->end);
+		}
+		if (rule->flags != (FBD_FLAG_READ | FBD_FLAG_WRITE) ||
+		    verbose > 0)
+			put_flags(proc, "flags", fbd_flags, COUNT(fbd_flags),
+			    "0x%x", rule->flags);
+		if (rule->skip != 0 || verbose > 0)
+			put_value(proc, "skip", "%u", rule->skip);
+		if (rule->count != 0 || verbose > 0)
+			put_value(proc, "count", "%u", rule->count);
+		put_fbd_action(proc, "action", rule->action);
+
+		return 0; /* TODO: optionally print the union fields */
+
+	case MIOCRAMSIZE:
+		if (ptr == NULL)
+			return IF_OUT;
+
+		put_value(proc, NULL, "%"PRIu32, *(u32_t *)ptr);
+		return IF_ALL;
+
+	case VNDIOCSET:
+		if ((vnd = (struct vnd_ioctl *)ptr) == NULL)
+			return IF_OUT | IF_IN;
+
+		if (dir == IF_OUT) {
+			put_value(proc, "vnd_fildes", "%d", vnd->vnd_fildes);
+			put_flags(proc, "vnd_flags", vnd_flags,
+			    COUNT(vnd_flags), "0x%x", vnd->vnd_flags);
+			return 0; /* TODO: print geometry if given */
+		} else {
+			put_value(proc, "vnd_size", "%"PRIu64, vnd->vnd_size);
+			return IF_ALL;
+		}
+
+	case VNDIOCCLR:
+		if ((vnd = (struct vnd_ioctl *)ptr) == NULL)
+			return IF_OUT;
+
+		put_flags(proc, "vnd_flags", vnd_flags, COUNT(vnd_flags),
+		    "0x%x", vnd->vnd_flags);
+		return IF_ALL;
+
+	case VNDIOCGET:
+		if ((vnu = (struct vnd_user *)ptr) == NULL)
+			return IF_IN;
+
+		put_value(proc, "vnu_unit", "%d", vnu->vnu_unit);
+		put_dev(proc, "vnu_dev", vnu->vnu_dev);
+		put_value(proc, "vnu_ino", "%"PRId64, vnu->vnu_ino);
+		return IF_ALL;
+
+	default:
+		return 0;
+	}
+}
diff --git a/minix/usr.bin/trace/ioctl/char.c b/minix/usr.bin/trace/ioctl/char.c
new file mode 100644
index 000000000..5d940427b
--- /dev/null
+++ b/minix/usr.bin/trace/ioctl/char.c
@@ -0,0 +1,509 @@
+
+#include "inc.h"
+
+#include <sys/ioctl.h>
+#include <minix/i2c.h>
+#include <minix/fb.h>
+#include <minix/sound.h>
+#include <sys/termios.h>
+#include <sys/time.h>
+#include <sys/kbdio.h>
+#include <minix/keymap.h>
+#include <sys/vm.h>
+#include <sys/fcntl.h>
+
+const char *
+char_ioctl_name(unsigned long req)
+{
+
+	switch (req) {
+	NAME(MINIX_I2C_IOCTL_EXEC);
+	NAME(FBIOGET_VSCREENINFO);
+	NAME(FBIOPUT_VSCREENINFO);
+	NAME(FBIOGET_FSCREENINFO);	/* TODO: print argument */
+	NAME(FBIOPAN_DISPLAY);
+	NAME(DSPIORATE);
+	NAME(DSPIOSTEREO);
+	NAME(DSPIOSIZE);
+	NAME(DSPIOBITS);
+	NAME(DSPIOSIGN);
+	NAME(DSPIOMAX);
+	NAME(DSPIORESET);		/* no argument */
+	NAME(DSPIOFREEBUF);
+	NAME(DSPIOSAMPLESINBUF);
+	NAME(DSPIOPAUSE);		/* no argument */
+	NAME(DSPIORESUME);		/* no argument */
+	NAME(MIXIOGETVOLUME);
+	NAME(MIXIOGETINPUTLEFT);
+	NAME(MIXIOGETINPUTRIGHT);
+	NAME(MIXIOGETOUTPUT);
+	NAME(MIXIOSETVOLUME);
+	NAME(MIXIOSETINPUTLEFT);
+	NAME(MIXIOSETINPUTRIGHT);
+	NAME(MIXIOSETOUTPUT);
+	NAME(TIOCEXCL);			/* no argument */
+	NAME(TIOCNXCL);			/* no argument */
+	NAME(TIOCFLUSH);
+	NAME(TIOCGETA);
+	NAME(TIOCSETA);
+	NAME(TIOCSETAW);
+	NAME(TIOCSETAF);
+	NAME(TIOCGETD);
+	NAME(TIOCSETD);
+	NAME(TIOCGLINED);
+	NAME(TIOCSLINED);
+	NAME(TIOCSBRK);			/* no argument */
+	NAME(TIOCCBRK);			/* no argument */
+	NAME(TIOCSDTR);			/* no argument */
+	NAME(TIOCCDTR);			/* no argument */
+	NAME(TIOCGPGRP);
+	NAME(TIOCSPGRP);
+	NAME(TIOCOUTQ);
+	NAME(TIOCSTI);
+	NAME(TIOCNOTTY);		/* no argument */
+	NAME(TIOCPKT);
+	NAME(TIOCSTOP);			/* no argument */
+	NAME(TIOCSTART);		/* no argument */
+	NAME(TIOCMSET);			/* TODO: print argument */
+	NAME(TIOCMBIS);			/* TODO: print argument */
+	NAME(TIOCMBIC);			/* TODO: print argument */
+	NAME(TIOCMGET);			/* TODO: print argument */
+	NAME(TIOCREMOTE);
+	NAME(TIOCGWINSZ);
+	NAME(TIOCSWINSZ);
+	NAME(TIOCUCNTL);
+	NAME(TIOCSTAT);
+	NAME(TIOCGSID);
+	NAME(TIOCCONS);
+	NAME(TIOCSCTTY);		/* no argument */
+	NAME(TIOCEXT);
+	NAME(TIOCSIG);			/* no argument */
+	NAME(TIOCDRAIN);		/* no argument */
+	NAME(TIOCGFLAGS);		/* TODO: print argument */
+	NAME(TIOCSFLAGS);		/* TODO: print argument */
+	NAME(TIOCDCDTIMESTAMP);		/* TODO: print argument */
+	NAME(TIOCRCVFRAME);		/* TODO: print argument */
+	NAME(TIOCXMTFRAME);		/* TODO: print argument */
+	NAME(TIOCPTMGET);		/* TODO: print argument */
+	NAME(TIOCGRANTPT);		/* no argument */
+	NAME(TIOCPTSNAME);		/* TODO: print argument */
+	NAME(TIOCSQSIZE);
+	NAME(TIOCGQSIZE);
+	NAME(TIOCSFON);			/* big IOCTL, not printing argument */
+	NAME(KIOCBELL);
+	NAME(KIOCSLEDS);
+	NAME(KIOCSMAP);			/* not worth interpreting */
+	NAME(TIOCMAPMEM);
+	NAME(TIOCUNMAPMEM);
+	}
+
+	return NULL;
+}
+
+static void
+put_i2c_op(struct trace_proc * proc, const char *name, i2c_op_t op)
+{
+	const char *text = NULL;
+
+	if (!valuesonly) {
+		switch (op) {
+		TEXT(I2C_OP_READ);
+		TEXT(I2C_OP_READ_WITH_STOP);
+		TEXT(I2C_OP_WRITE);
+		TEXT(I2C_OP_WRITE_WITH_STOP);
+		TEXT(I2C_OP_READ_BLOCK);
+		TEXT(I2C_OP_WRITE_BLOCK);
+		}
+	}
+
+	if (text != NULL)
+		put_field(proc, name, text);
+	else
+		put_value(proc, name, "%d", op);
+}
+
+static void
+put_sound_device(struct trace_proc * proc, const char * name, int device)
+{
+	const char *text = NULL;
+
+	if (!valuesonly) {
+		switch (device) {
+		TEXT(Master);
+		TEXT(Dac);
+		TEXT(Fm);
+		TEXT(Cd);
+		TEXT(Line);
+		TEXT(Mic);
+		TEXT(Speaker);
+		TEXT(Treble);
+		TEXT(Bass);
+		}
+	}
+
+	if (text != NULL)
+		put_field(proc, name, text);
+	else
+		put_value(proc, name, "%d", device);
+}
+
+static void
+put_sound_state(struct trace_proc * proc, const char * name, int state)
+{
+
+	if (!valuesonly && state == ON)
+		put_field(proc, name, "ON");
+	else if (!valuesonly && state == OFF)
+		put_field(proc, name, "OFF");
+	else
+		put_value(proc, name, "%d", state);
+}
+
+static const struct flags flush_flags[] = {
+	FLAG(FREAD),
+	FLAG(FWRITE),
+};
+
+static const struct flags tc_iflags[] = {
+	FLAG(IGNBRK),
+	FLAG(BRKINT),
+	FLAG(IGNPAR),
+	FLAG(PARMRK),
+	FLAG(INPCK),
+	FLAG(ISTRIP),
+	FLAG(INLCR),
+	FLAG(IGNCR),
+	FLAG(ICRNL),
+	FLAG(IXON),
+	FLAG(IXOFF),
+	FLAG(IXANY),
+	FLAG(IMAXBEL),
+};
+
+static const struct flags tc_oflags[] = {
+	FLAG(OPOST),
+	FLAG(ONLCR),
+	FLAG(OXTABS),
+	FLAG(ONOEOT),
+	FLAG(OCRNL),
+	FLAG(ONOCR),
+	FLAG(ONLRET),
+};
+
+static const struct flags tc_cflags[] = {
+	FLAG(CIGNORE),
+	FLAG_MASK(CSIZE, CS5),
+	FLAG_MASK(CSIZE, CS6),
+	FLAG_MASK(CSIZE, CS7),
+	FLAG_MASK(CSIZE, CS8),
+	FLAG(CSTOPB),
+	FLAG(CREAD),
+	FLAG(PARENB),
+	FLAG(PARODD),
+	FLAG(HUPCL),
+	FLAG(CLOCAL),
+	FLAG(CRTSCTS),
+	FLAG(CDTRCTS),
+	FLAG(MDMBUF),
+};
+
+static const struct flags tc_lflags[] = {
+	FLAG(ECHOKE),
+	FLAG(ECHOE),
+	FLAG(ECHOK),
+	FLAG(ECHO),
+	FLAG(ECHONL),
+	FLAG(ECHOPRT),
+	FLAG(ECHOCTL),
+	FLAG(ISIG),
+	FLAG(ICANON),
+	FLAG(ALTWERASE),
+	FLAG(IEXTEN),
+	FLAG(EXTPROC),
+	FLAG(TOSTOP),
+	FLAG(FLUSHO),
+	FLAG(NOKERNINFO),
+	FLAG(PENDIN),
+	FLAG(NOFLSH),
+};
+
+static void
+put_tty_disc(struct trace_proc * proc, const char * name, int disc)
+{
+	const char *text = NULL;
+
+	if (!valuesonly) {
+		switch (disc) {
+		TEXT(TTYDISC);
+		TEXT(TABLDISC);
+		TEXT(SLIPDISC);
+		TEXT(PPPDISC);
+		TEXT(STRIPDISC);
+		TEXT(HDLCDISC);
+		}
+	}
+
+	if (text != NULL)
+		put_field(proc, name, text);
+	else
+		put_value(proc, name, "%d", disc);
+}
+
+static const struct flags kbd_leds[] = {
+	FLAG(KBD_LEDS_NUM),
+	FLAG(KBD_LEDS_CAPS),
+	FLAG(KBD_LEDS_SCROLL),
+};
+
+int
+char_ioctl_arg(struct trace_proc * proc, unsigned long req, void * ptr,
+	int dir)
+{
+	minix_i2c_ioctl_exec_t *iie;
+	struct fb_var_screeninfo *fbvs;
+	struct volume_level *level;
+	struct inout_ctrl *inout;
+	struct termios *tc;
+	struct winsize *ws;
+	struct kio_bell *bell;
+	struct kio_leds *leds;
+	struct mapreqvm *mapreq;
+
+	switch (req) {
+	case MINIX_I2C_IOCTL_EXEC:
+		if ((iie = (minix_i2c_ioctl_exec_t *)ptr) == NULL)
+			return IF_OUT; /* we print only the request for now */
+
+		put_i2c_op(proc, "iie_op", iie->iie_op);
+		put_value(proc, "iie_addr", "0x%04x", iie->iie_addr);
+		return 0; /* TODO: print command/data/result */
+
+	case FBIOGET_VSCREENINFO:
+		if ((fbvs = (struct fb_var_screeninfo *)ptr) == NULL)
+			return IF_IN;
+
+		put_value(proc, "xres", "%"PRIu32, fbvs->xres);
+		put_value(proc, "yres", "%"PRIu32, fbvs->yres);
+		put_value(proc, "xres_virtual", "%"PRIu32, fbvs->xres_virtual);
+		put_value(proc, "yres_virtual", "%"PRIu32, fbvs->yres_virtual);
+		put_value(proc, "xoffset", "%"PRIu32, fbvs->xoffset);
+		put_value(proc, "yoffset", "%"PRIu32, fbvs->yoffset);
+		put_value(proc, "bits_per_pixel", "%"PRIu32,
+		    fbvs->bits_per_pixel);
+		return 0;
+
+	case FBIOPUT_VSCREENINFO:
+	case FBIOPAN_DISPLAY:
+		if ((fbvs = (struct fb_var_screeninfo *)ptr) == NULL)
+			return IF_OUT;
+
+		put_value(proc, "xoffset", "%"PRIu32, fbvs->xoffset);
+		put_value(proc, "yoffset", "%"PRIu32, fbvs->yoffset);
+		return 0;
+
+	case DSPIORATE:
+	case DSPIOSTEREO:
+	case DSPIOSIZE:
+	case DSPIOBITS:
+	case DSPIOSIGN:
+	case DSPIOMAX:
+	case DSPIOFREEBUF:
+	case DSPIOSAMPLESINBUF:
+		if (ptr == NULL)
+			return dir;
+
+		put_value(proc, NULL, "%u", *(unsigned int *)ptr);
+		return IF_ALL;
+
+	case MIXIOGETVOLUME:
+		if ((level = (struct volume_level *)ptr) == NULL)
+			return dir;
+
+		if (dir == IF_OUT)
+			put_sound_device(proc, "device", level->device);
+		else {
+			put_value(proc, "left", "%d", level->left);
+			put_value(proc, "right", "%d", level->right);
+		}
+		return IF_ALL;
+
+	case MIXIOSETVOLUME:
+		/* Print the corrected volume levels only with verbosity on. */
+		if ((level = (struct volume_level *)ptr) == NULL)
+			return IF_OUT | ((verbose > 0) ? IF_IN : 0);
+
+		if (dir == IF_OUT)
+			put_sound_device(proc, "device", level->device);
+		put_value(proc, "left", "%d", level->left);
+		put_value(proc, "right", "%d", level->right);
+		return IF_ALL;
+
+	case MIXIOGETINPUTLEFT:
+	case MIXIOGETINPUTRIGHT:
+	case MIXIOGETOUTPUT:
+		if ((inout = (struct inout_ctrl *)ptr) == NULL)
+			return dir;
+
+		if (dir == IF_OUT)
+			put_sound_device(proc, "device", inout->device);
+		else {
+			put_sound_state(proc, "left", inout->left);
+			put_sound_state(proc, "right", inout->right);
+		}
+		return IF_ALL;
+
+	case MIXIOSETINPUTLEFT:
+	case MIXIOSETINPUTRIGHT:
+	case MIXIOSETOUTPUT:
+		if ((inout = (struct inout_ctrl *)ptr) == NULL)
+			return IF_OUT;
+
+		put_sound_device(proc, "device", inout->device);
+		put_sound_state(proc, "left", inout->left);
+		put_sound_state(proc, "right", inout->right);
+		return IF_ALL;
+
+	case TIOCFLUSH:
+		if (ptr == NULL)
+			return IF_OUT;
+
+		put_flags(proc, NULL, flush_flags, COUNT(flush_flags), "0x%x",
+		    *(int *)ptr);
+		return IF_ALL;
+
+	case TIOCGETA:
+	case TIOCSETA:
+	case TIOCSETAW:
+	case TIOCSETAF:
+		if ((tc = (struct termios *)ptr) == NULL)
+			return dir;
+
+		/*
+		 * These are fairly common IOCTLs, so printing everything by
+		 * default would create a lot of noise.  By default we limit
+		 * ourselves to printing the field that contains what I
+		 * consider to be the most important flag: ICANON.
+		 * TODO: see if we can come up with a decent format for
+		 * selectively printing (relatively important) flags.
+		 */
+		if (verbose > 0) {
+			put_flags(proc, "c_iflag", tc_iflags, COUNT(tc_iflags),
+			    "0x%x", tc->c_iflag);
+			put_flags(proc, "c_oflag", tc_oflags, COUNT(tc_oflags),
+			    "0x%x", tc->c_oflag);
+			put_flags(proc, "c_cflag", tc_cflags, COUNT(tc_cflags),
+			    "0x%x", tc->c_cflag);
+		}
+		put_flags(proc, "c_lflag", tc_lflags, COUNT(tc_lflags), "0x%x",
+			tc->c_lflag);
+		if (verbose > 0) {
+			put_value(proc, "c_ispeed", "%d", tc->c_ispeed);
+			put_value(proc, "c_ospeed", "%d", tc->c_ospeed);
+		}
+		return 0; /* TODO: print the c_cc fields */
+
+	case TIOCGETD:
+	case TIOCSETD:
+		if (ptr == NULL)
+			return dir;
+
+		put_tty_disc(proc, NULL, *(int *)ptr);
+		return IF_ALL;
+
+	case TIOCGLINED:
+	case TIOCSLINED:
+		if (ptr == NULL)
+			return dir;
+
+		put_buf(proc, NULL, PF_LOCADDR | PF_STRING, (vir_bytes)ptr,
+		    sizeof(linedn_t));
+		return IF_ALL;
+
+	case TIOCGPGRP:
+	case TIOCSPGRP:
+	case TIOCOUTQ:
+	case TIOCPKT:
+	case TIOCREMOTE:
+	case TIOCUCNTL:
+	case TIOCSTAT:		/* argument seems unused? */
+	case TIOCGSID:
+	case TIOCCONS:		/* argument seems unused? */
+	case TIOCEXT:
+	case TIOCSQSIZE:
+	case TIOCGQSIZE:
+		/* Print a simple integer. */
+		if (ptr == NULL)
+			return dir;
+
+		put_value(proc, NULL, "%d", *(int *)ptr);
+		return IF_ALL;
+
+	case TIOCSTI:
+		if (ptr == NULL)
+			return dir;
+
+		if (!valuesonly)
+			put_value(proc, NULL, "'%s'",
+			    get_escape(*(char *)ptr));
+		else
+			put_value(proc, NULL, "%u", *(char *)ptr);
+		return IF_ALL;
+
+	case TIOCGWINSZ:
+	case TIOCSWINSZ:
+		if ((ws = (struct winsize *)ptr) == NULL)
+			return dir;
+
+		/* This is a stupid order, but we follow the struct layout. */
+		put_value(proc, "ws_row", "%u", ws->ws_row);
+		put_value(proc, "ws_col", "%u", ws->ws_col);
+		if (verbose > 0) {
+			put_value(proc, "ws_xpixel", "%u", ws->ws_xpixel);
+			put_value(proc, "ws_ypixel", "%u", ws->ws_ypixel);
+		}
+		return (verbose > 0) ? IF_ALL : 0;
+
+	case KIOCBELL:
+		if ((bell = (struct kio_bell *)ptr) == NULL)
+			return IF_OUT;
+
+		put_value(proc, "kb_pitch", "%u", bell->kb_pitch);
+		put_value(proc, "kb_volume", "%lu", bell->kb_volume);
+		put_struct_timeval(proc, "kb_duration", PF_LOCADDR,
+		    (vir_bytes)&bell->kb_duration);
+
+		return IF_ALL;
+
+	case KIOCSLEDS:
+		if ((leds = (struct kio_leds *)ptr) == NULL)
+			return IF_OUT;
+
+		put_flags(proc, "kl_bits", kbd_leds, COUNT(kbd_leds), "0x%x",
+		    leds->kl_bits);
+		return IF_ALL;
+
+	case TIOCMAPMEM:
+		if ((mapreq = (struct mapreqvm *)ptr) == NULL)
+			return dir;
+
+		/* This structure has more fields, but they're all unused.. */
+		if (dir == IF_OUT) {
+			put_value(proc, "phys_offset", "%"PRIu64,
+			    (uint64_t)mapreq->phys_offset); /* future compat */
+			put_value(proc, "size", "%zu", mapreq->size);
+		} else
+			put_ptr(proc, "vaddr_ret", (vir_bytes)mapreq->vaddr);
+		return IF_ALL;
+
+	case TIOCUNMAPMEM:
+		if ((mapreq = (struct mapreqvm *)ptr) == NULL)
+			return IF_OUT;
+
+		put_ptr(proc, "vaddr", (vir_bytes)mapreq->vaddr);
+		put_value(proc, "size", "%zu", mapreq->size);
+		return IF_ALL;
+
+	default:
+		return 0;
+	}
+}
diff --git a/minix/usr.bin/trace/ioctl/net.c b/minix/usr.bin/trace/ioctl/net.c
new file mode 100644
index 000000000..3a4f42847
--- /dev/null
+++ b/minix/usr.bin/trace/ioctl/net.c
@@ -0,0 +1,565 @@
+
+#include "inc.h"
+
+#include <sys/ioctl.h>
+#include <sys/ucred.h>
+#include <net/gen/in.h>
+#include <net/gen/ether.h>
+#include <net/gen/eth_io.h>
+#include <net/gen/arp_io.h>
+#include <net/gen/ip_io.h>
+#include <net/gen/route.h>
+#include <net/gen/tcp.h>
+#include <net/gen/tcp_io.h>
+#include <net/gen/udp.h>
+#include <net/gen/udp_io.h>
+#include <net/gen/udp_io_hdr.h>
+#include <net/gen/psip_io.h>
+#include <arpa/inet.h>
+
+const char *
+net_ioctl_name(unsigned long req)
+{
+
+	switch (req) {
+	NAME(FIONREAD);
+	NAME(NWIOSETHOPT);	/* TODO: print argument */
+	NAME(NWIOGETHOPT);	/* TODO: print argument */
+	NAME(NWIOGETHSTAT);	/* TODO: print argument */
+	NAME(NWIOARPGIP);	/* TODO: print argument */
+	NAME(NWIOARPGNEXT);	/* TODO: print argument */
+	NAME(NWIOARPSIP);	/* TODO: print argument */
+	NAME(NWIOARPDIP);	/* TODO: print argument */
+	NAME(NWIOSIPCONF2);	/* TODO: print argument */
+	NAME(NWIOSIPCONF);	/* TODO: print argument */
+	NAME(NWIOGIPCONF2);	/* TODO: print argument */
+	NAME(NWIOGIPCONF);	/* TODO: print argument */
+	NAME(NWIOSIPOPT);
+	NAME(NWIOGIPOPT);
+	NAME(NWIOGIPOROUTE);	/* TODO: print argument */
+	NAME(NWIOSIPOROUTE);	/* TODO: print argument */
+	NAME(NWIODIPOROUTE);	/* TODO: print argument */
+	NAME(NWIOGIPIROUTE);	/* TODO: print argument */
+	NAME(NWIOSIPIROUTE);	/* TODO: print argument */
+	NAME(NWIODIPIROUTE);	/* TODO: print argument */
+	NAME(NWIOSTCPCONF);
+	NAME(NWIOGTCPCONF);
+	NAME(NWIOTCPCONN);
+	NAME(NWIOTCPLISTEN);
+	NAME(NWIOTCPATTACH);	/* TODO: print argument */
+	NAME(NWIOTCPSHUTDOWN);	/* no argument */
+	NAME(NWIOSTCPOPT);
+	NAME(NWIOGTCPOPT);
+	NAME(NWIOTCPPUSH);	/* no argument */
+	NAME(NWIOTCPLISTENQ);
+	NAME(NWIOGTCPCOOKIE);
+	NAME(NWIOTCPACCEPTTO);
+	NAME(NWIOTCPGERROR);
+	NAME(NWIOSUDPOPT);
+	NAME(NWIOGUDPOPT);
+	NAME(NWIOUDPPEEK);	/* TODO: print argument */
+	NAME(NWIOSPSIPOPT);	/* TODO: print argument */
+	NAME(NWIOGPSIPOPT);	/* TODO: print argument */
+	NAME(NWIOGUDSFADDR);
+	NAME(NWIOSUDSTADDR);
+	NAME(NWIOSUDSADDR);
+	NAME(NWIOGUDSADDR);
+	NAME(NWIOGUDSPADDR);
+	NAME(NWIOSUDSTYPE);
+	NAME(NWIOSUDSBLOG);
+	NAME(NWIOSUDSCONN);
+	NAME(NWIOSUDSSHUT);
+	NAME(NWIOSUDSPAIR);
+	NAME(NWIOSUDSACCEPT);
+	NAME(NWIOSUDSCTRL);
+	NAME(NWIOGUDSCTRL);
+	NAME(NWIOGUDSSOTYPE);
+	NAME(NWIOGUDSPEERCRED);
+	NAME(NWIOGUDSSNDBUF);
+	NAME(NWIOSUDSSNDBUF);
+	NAME(NWIOGUDSRCVBUF);
+	NAME(NWIOSUDSRCVBUF);
+	}
+
+	return NULL;
+}
+
+static const struct flags ipopt_flags[] = {
+	FLAG_ZERO(NWIO_NOFLAGS),
+	FLAG_MASK(NWIO_ACC_MASK, NWIO_EXCL),
+	FLAG_MASK(NWIO_ACC_MASK, NWIO_SHARED),
+	FLAG_MASK(NWIO_ACC_MASK, NWIO_COPY),
+	FLAG(NWIO_EN_LOC),
+	FLAG(NWIO_DI_LOC),
+	FLAG(NWIO_EN_BROAD),
+	FLAG(NWIO_DI_BROAD),
+	FLAG(NWIO_REMSPEC),
+	FLAG(NWIO_REMANY),
+	FLAG(NWIO_PROTOSPEC),
+	FLAG(NWIO_PROTOANY),
+	FLAG(NWIO_HDR_O_SPEC),
+	FLAG(NWIO_HDR_O_ANY),
+	FLAG(NWIO_RWDATONLY),
+	FLAG(NWIO_RWDATALL),
+};
+
+static void
+put_ipaddr(struct trace_proc * proc, const char * name, ipaddr_t ipaddr)
+{
+	struct in_addr in;
+
+	if (!valuesonly) {
+		in.s_addr = ipaddr;
+
+		/* Is this an acceptable encapsulation? */
+		put_value(proc, name, "[%s]", inet_ntoa(in));
+	} else
+		put_value(proc, name, "0x%08x", ntohl(ipaddr));
+}
+
+static void
+put_ipproto(struct trace_proc * proc, const char * name, ipproto_t proto)
+{
+	const char *text = NULL;
+
+	if (!valuesonly) {
+		switch (proto) {
+		TEXT(IPPROTO_ICMP);
+		TEXT(IPPROTO_TCP);
+		TEXT(IPPROTO_UDP);
+		}
+	}
+
+	if (text != NULL)
+		put_field(proc, name, text);
+	else
+		put_value(proc, name, "%u", proto);
+}
+
+static const struct flags tcpconf_flags[] = {
+	FLAG_ZERO(NWTC_NOFLAGS),
+	FLAG_MASK(NWTC_ACC_MASK, NWTC_EXCL),
+	FLAG_MASK(NWTC_ACC_MASK, NWTC_SHARED),
+	FLAG_MASK(NWTC_ACC_MASK, NWTC_COPY),
+	FLAG_MASK(NWTC_LOCPORT_MASK, NWTC_LP_UNSET),
+	FLAG_MASK(NWTC_LOCPORT_MASK, NWTC_LP_SET),
+	FLAG_MASK(NWTC_LOCPORT_MASK, NWTC_LP_SEL),
+	FLAG(NWTC_SET_RA),
+	FLAG(NWTC_UNSET_RA),
+	FLAG(NWTC_SET_RP),
+	FLAG(NWTC_UNSET_RP),
+};
+
+#define put_port(proc, name, port) \
+	put_value(proc, name, "%u", ntohs(port))
+
+static const struct flags tcpcl_flags[] = {
+	FLAG_ZERO(TCF_DEFAULT),
+	FLAG(TCF_ASYNCH),
+};
+
+static const struct flags tcpopt_flags[] = {
+	FLAG_ZERO(NWTO_NOFLAG),
+	FLAG(NWTO_SND_URG),
+	FLAG(NWTO_SND_NOTURG),
+	FLAG(NWTO_RCV_URG),
+	FLAG(NWTO_RCV_NOTURG),
+	FLAG(NWTO_BSD_URG),
+	FLAG(NWTO_NOTBSD_URG),
+	FLAG(NWTO_DEL_RST),
+	FLAG(NWTO_BULK),
+	FLAG(NWTO_NOBULK),
+};
+
+static const struct flags udpopt_flags[] = {
+	FLAG_ZERO(NWUO_NOFLAGS),
+	FLAG_MASK(NWUO_ACC_MASK, NWUO_EXCL),
+	FLAG_MASK(NWUO_ACC_MASK, NWUO_SHARED),
+	FLAG_MASK(NWUO_ACC_MASK, NWUO_COPY),
+	FLAG_MASK(NWUO_LOCPORT_MASK, NWUO_LP_SET),
+	FLAG_MASK(NWUO_LOCPORT_MASK, NWUO_LP_SEL),
+	FLAG_MASK(NWUO_LOCPORT_MASK, NWUO_LP_ANY),
+	FLAG(NWUO_EN_LOC),
+	FLAG(NWUO_DI_LOC),
+	FLAG(NWUO_EN_BROAD),
+	FLAG(NWUO_DI_BROAD),
+	FLAG(NWUO_RP_SET),
+	FLAG(NWUO_RP_ANY),
+	FLAG(NWUO_RA_SET),
+	FLAG(NWUO_RA_ANY),
+	FLAG(NWUO_RWDATONLY),
+	FLAG(NWUO_RWDATALL),
+	FLAG(NWUO_EN_IPOPT),
+	FLAG(NWUO_DI_IPOPT),
+};
+
+static void
+put_family(struct trace_proc * proc, const char * name, int family)
+{
+	const char *text = NULL;
+
+	if (!valuesonly) {
+		/* TODO: add all the other protocols */
+		switch (family) {
+		TEXT(AF_UNSPEC);
+		TEXT(AF_LOCAL);
+		TEXT(AF_INET);
+		TEXT(AF_INET6);
+		}
+	}
+
+	if (text != NULL)
+		put_field(proc, name, text);
+	else
+		put_value(proc, name, "%d", family);
+}
+
+static const struct flags sock_type[] = {
+	FLAG_MASK(~SOCK_FLAGS_MASK, SOCK_STREAM),
+	FLAG_MASK(~SOCK_FLAGS_MASK, SOCK_DGRAM),
+	FLAG_MASK(~SOCK_FLAGS_MASK, SOCK_RAW),
+	FLAG_MASK(~SOCK_FLAGS_MASK, SOCK_RDM),
+	FLAG_MASK(~SOCK_FLAGS_MASK, SOCK_SEQPACKET),
+	FLAG(SOCK_CLOEXEC),
+	FLAG(SOCK_NONBLOCK),
+	FLAG(SOCK_NOSIGPIPE),
+};
+
+static void
+put_shutdown_how(struct trace_proc * proc, const char * name, int how)
+{
+	const char *text = NULL;
+
+	if (!valuesonly) {
+		switch (how) {
+		TEXT(SHUT_RD);
+		TEXT(SHUT_WR);
+		TEXT(SHUT_RDWR);
+		}
+	}
+
+	if (text != NULL)
+		put_field(proc, name, text);
+	else
+		put_value(proc, name, "%d", how);
+}
+
+static void
+put_struct_uucred(struct trace_proc * proc, const char * name, int flags,
+	vir_bytes addr)
+{
+	struct uucred cred;
+
+	if (!put_open_struct(proc, name, flags, addr, &cred, sizeof(cred)))
+		return;
+
+	put_value(proc, "cr_uid", "%u", cred.cr_uid);
+	if (verbose > 0) {
+		put_value(proc, "cr_gid", "%u", cred.cr_gid);
+		if (verbose > 1)
+			put_value(proc, "cr_ngroups", "%d", cred.cr_ngroups);
+		put_groups(proc, "cr_groups", PF_LOCADDR,
+		    (vir_bytes)&cred.cr_groups, cred.cr_ngroups);
+	}
+
+	put_close_struct(proc, verbose > 0);
+}
+
+static void
+put_cmsg_type(struct trace_proc * proc, const char * name, int type)
+{
+	const char *text = NULL;
+
+	if (!valuesonly) {
+		switch (type) {
+		TEXT(SCM_RIGHTS);
+		TEXT(SCM_CREDS);
+		TEXT(SCM_TIMESTAMP);
+		}
+	}
+
+	if (text != NULL)
+		put_field(proc, name, text);
+	else
+		put_value(proc, name, "%d", type);
+}
+
+static void
+put_msg_control(struct trace_proc * proc, struct msg_control * ptr)
+{
+	struct msghdr msg;
+	struct cmsghdr *cmsg;
+	size_t len;
+	int i;
+
+	if (ptr->msg_controllen > sizeof(ptr->msg_control)) {
+		put_field(proc, NULL, "..");
+
+		return;
+	}
+
+	put_open(proc, NULL, PF_NONAME, "[", ", ");
+
+	memset(&msg, 0, sizeof(msg));
+	msg.msg_control = ptr->msg_control;
+	msg.msg_controllen = ptr->msg_controllen;
+
+	/*
+	 * TODO: decide if we need a verbosity-based limit here.  The argument
+	 * in favor of printing everything is that upon receipt, SCM_RIGHTS
+	 * actually creates new file descriptors, which is pretty essential in
+	 * terms of figuring out what is happening in a process.  In addition,
+	 * these calls should be sufficiently rare that the lengthy output is
+	 * not really disruptive for the general output flow.
+	 */
+	for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
+	    cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+		put_open(proc, NULL, 0, "{", ", ");
+
+		if (verbose > 0)
+			put_value(proc, "cmsg_len", "%u", cmsg->cmsg_len);
+		if (!valuesonly && cmsg->cmsg_level == SOL_SOCKET)
+			put_field(proc, "cmsg_level", "SOL_SOCKET");
+		else
+			put_value(proc, "cmsg_level", "%d", cmsg->cmsg_level);
+		if (cmsg->cmsg_level == SOL_SOCKET)
+			put_cmsg_type(proc, "cmsg_type", cmsg->cmsg_type);
+
+		len = cmsg->cmsg_len - CMSG_LEN(0);
+
+		/* Print the contents of the messages that we know. */
+		if (cmsg->cmsg_level == SOL_SOCKET &&
+		    cmsg->cmsg_type == SCM_RIGHTS) {
+			put_open(proc, NULL, PF_NONAME, "[", ", ");
+			for (i = 0; i < len / sizeof(int); i++)
+				put_fd(proc, NULL,
+				    ((int *)CMSG_DATA(cmsg))[i]);
+			put_close(proc, "]");
+		} else if (cmsg->cmsg_level == SOL_SOCKET &&
+		    cmsg->cmsg_type == SCM_CREDS) {
+			put_struct_uucred(proc, NULL, PF_LOCADDR,
+			    (vir_bytes)CMSG_DATA(cmsg));
+		} else if (len > 0)
+			put_field(proc, NULL, "..");
+
+		put_close(proc, "}");
+	}
+
+	put_close(proc, "]");
+}
+
+int
+net_ioctl_arg(struct trace_proc * proc, unsigned long req, void * ptr, int dir)
+{
+	const char *text;
+	nwio_ipopt_t *ipopt;
+	nwio_tcpconf_t *nwtc;
+	nwio_tcpcl_t *nwtcl;
+	nwio_tcpopt_t *nwto;
+	tcp_cookie_t *cookie;
+	nwio_udpopt_t *nwuo;
+	struct sockaddr_un *sun;
+	int i;
+
+	switch (req) {
+	case FIONREAD:
+		/*
+		 * Arguably this does not belong here, but as of writing, the
+		 * network services are the only ones actually implementing
+		 * support for this IOCTL, and we don't have a more suitable
+		 * place to put it either.
+		 */
+		if (ptr == NULL)
+			return IF_IN;
+
+		put_value(proc, NULL, "%d", *(int *)ptr);
+		return IF_ALL;
+
+	case NWIOSIPOPT:
+	case NWIOGIPOPT:
+		if ((ipopt = (nwio_ipopt_t *)ptr) == NULL)
+			return dir;
+
+		put_flags(proc, "nwio_flags", ipopt_flags, COUNT(ipopt_flags),
+		    "0x%x", ipopt->nwio_flags);
+
+		if (ipopt->nwio_flags & NWIO_REMSPEC)
+			put_ipaddr(proc, "nwio_rem", ipopt->nwio_rem);
+		if (ipopt->nwio_flags & NWIO_PROTOSPEC)
+			put_ipproto(proc, "nwio_proto", ipopt->nwio_proto);
+
+		return 0; /* TODO: the remaining fields */
+
+	case NWIOSTCPCONF:
+	case NWIOGTCPCONF:
+		if ((nwtc = (nwio_tcpconf_t *)ptr) == NULL)
+			return dir;
+
+		put_flags(proc, "nwtc_flags", tcpconf_flags,
+		    COUNT(tcpconf_flags), "0x%x", nwtc->nwtc_flags);
+
+		/* The local address cannot be set, just retrieved. */
+		if (req == NWIOGTCPCONF)
+			put_ipaddr(proc, "nwtc_locaddr", nwtc->nwtc_locaddr);
+
+		if ((nwtc->nwtc_flags & NWTC_LOCPORT_MASK) == NWTC_LP_SET)
+			put_port(proc, "nwtc_locport", nwtc->nwtc_locport);
+
+		if (nwtc->nwtc_flags & NWTC_SET_RA)
+			put_ipaddr(proc, "nwtc_remaddr", nwtc->nwtc_remaddr);
+
+		if (nwtc->nwtc_flags & NWTC_SET_RP)
+			put_port(proc, "nwtc_remport", nwtc->nwtc_remport);
+
+		return IF_ALL;
+
+	case NWIOTCPCONN:
+	case NWIOTCPLISTEN:
+		if ((nwtcl = (nwio_tcpcl_t *)ptr) == NULL)
+			return dir;
+
+		put_flags(proc, "nwtcl_flags", tcpcl_flags,
+		    COUNT(tcpcl_flags), "0x%x", nwtcl->nwtcl_flags);
+
+		/* We pretend the unused nwtcl_ttl field does not exist. */
+		return IF_ALL;
+
+	case NWIOSTCPOPT:
+	case NWIOGTCPOPT:
+		if ((nwto = (nwio_tcpopt_t *)ptr) == NULL)
+			return dir;
+
+		put_flags(proc, "nwto_flags", tcpopt_flags,
+		    COUNT(tcpopt_flags), "0x%x", nwto->nwto_flags);
+		return IF_ALL;
+
+	case NWIOTCPLISTENQ:
+	case NWIOSUDSBLOG:
+		if (ptr == NULL)
+			return IF_OUT;
+
+		put_value(proc, NULL, "%d", *(int *)ptr);
+		return IF_ALL;
+
+	case NWIOGTCPCOOKIE:
+	case NWIOTCPACCEPTTO:
+		if ((cookie = (tcp_cookie_t *)ptr) == NULL)
+			return dir;
+
+		put_value(proc, "tc_ref", "%"PRIu32, cookie->tc_ref);
+		if (verbose > 0)
+			put_buf(proc, "tc_secret", PF_LOCADDR,
+			    (vir_bytes)&cookie->tc_secret,
+			    sizeof(cookie->tc_secret));
+		return (verbose > 0) ? IF_ALL : 0;
+
+	case NWIOTCPGERROR:
+		if (ptr == NULL)
+			return IF_IN;
+
+		i = *(int *)ptr;
+		if (!valuesonly && (text = get_error_name(i)) != NULL)
+			put_field(proc, NULL, text);
+		else
+			put_value(proc, NULL, "%d", i);
+		return IF_ALL;
+
+	case NWIOSUDPOPT:
+	case NWIOGUDPOPT:
+		if ((nwuo = (nwio_udpopt_t *)ptr) == NULL)
+			return dir;
+
+		put_flags(proc, "nwuo_flags", udpopt_flags,
+		    COUNT(udpopt_flags), "0x%x", nwuo->nwuo_flags);
+
+		/* The local address cannot be set, just retrieved. */
+		if (req == NWIOGUDPOPT)
+			put_ipaddr(proc, "nwuo_locaddr", nwuo->nwuo_locaddr);
+
+		if ((nwuo->nwuo_flags & NWUO_LOCPORT_MASK) == NWUO_LP_SET)
+			put_port(proc, "nwuo_locport", nwuo->nwuo_locport);
+
+		if (nwuo->nwuo_flags & NWUO_RA_SET)
+			put_ipaddr(proc, "nwuo_remaddr", nwuo->nwuo_remaddr);
+
+		if (nwuo->nwuo_flags & NWUO_RP_SET)
+			put_port(proc, "nwuo_remport", nwuo->nwuo_remport);
+
+		return IF_ALL;
+
+	case NWIOGUDSFADDR:
+	case NWIOSUDSTADDR:
+	case NWIOSUDSADDR:
+	case NWIOGUDSADDR:
+	case NWIOGUDSPADDR:
+	case NWIOSUDSCONN:
+	case NWIOSUDSACCEPT:
+		if ((sun = (struct sockaddr_un *)ptr) == NULL)
+			return dir;
+
+		put_family(proc, "sun_family", sun->sun_family);
+
+		/* This could be extended to a generic sockaddr printer.. */
+		if (sun->sun_family == AF_LOCAL) {
+			put_buf(proc, "sun_path", PF_LOCADDR | PF_PATH,
+			    (vir_bytes)&sun->sun_path, sizeof(sun->sun_path));
+			return IF_ALL; /* skipping sun_len, it's unused */
+		} else
+			return 0;
+
+	case NWIOSUDSTYPE:
+	case NWIOGUDSSOTYPE:
+		if (ptr == NULL)
+			return dir;
+
+		put_flags(proc, NULL, sock_type, COUNT(sock_type), "0x%x",
+		    *(int *)ptr);
+		return IF_ALL;
+
+	case NWIOSUDSSHUT:
+		if (ptr == NULL)
+			return IF_OUT;
+
+		put_shutdown_how(proc, NULL, *(int *)ptr);
+		return IF_ALL;
+
+	case NWIOSUDSPAIR:
+		if (ptr == NULL)
+			return IF_OUT;
+
+		put_dev(proc, NULL, *(dev_t *)ptr);
+		return IF_ALL;
+
+	case NWIOSUDSCTRL:
+		if (ptr == NULL)
+			return IF_OUT;
+
+		/* FALLTHROUGH */
+	case NWIOGUDSCTRL:
+		if (ptr == NULL)
+			return IF_IN;
+
+		put_msg_control(proc, (struct msg_control *)ptr);
+		return IF_ALL;
+
+	case NWIOGUDSPEERCRED:
+		if (ptr == NULL)
+			return IF_IN;
+
+		put_struct_uucred(proc, NULL, PF_LOCADDR, (vir_bytes)ptr);
+		return IF_ALL;
+
+	case NWIOGUDSSNDBUF:
+	case NWIOSUDSSNDBUF:
+	case NWIOGUDSRCVBUF:
+	case NWIOSUDSRCVBUF:
+		if (ptr == NULL)
+			return dir;
+
+		put_value(proc, NULL, "%zu", *(size_t *)ptr);
+		return IF_ALL;
+
+	default:
+		return 0;
+	}
+}
diff --git a/minix/usr.bin/trace/ioctl/svrctl.c b/minix/usr.bin/trace/ioctl/svrctl.c
new file mode 100644
index 000000000..8708ed221
--- /dev/null
+++ b/minix/usr.bin/trace/ioctl/svrctl.c
@@ -0,0 +1,63 @@
+
+#include "inc.h"
+
+#include <sys/svrctl.h>
+
+const char *
+svrctl_name(unsigned long req)
+{
+
+	switch (req) {
+	NAME(PMSETPARAM);
+	NAME(PMGETPARAM);
+	NAME(VFSGETPARAM);
+	NAME(VFSSETPARAM);
+	}
+
+	return NULL;
+}
+
+int
+svrctl_arg(struct trace_proc * proc, unsigned long req, void * ptr, int dir)
+{
+	struct sysgetenv *env;
+
+	switch (req) {
+	case PMSETPARAM:
+	case VFSSETPARAM:
+		if ((env = (struct sysgetenv *)ptr) == NULL)
+			return IF_OUT;
+
+		put_buf(proc, "key", PF_STRING, (vir_bytes)env->key,
+		    env->keylen);
+		put_buf(proc, "value", PF_STRING, (vir_bytes)env->val,
+		    env->vallen);
+		return IF_ALL;
+
+	case PMGETPARAM:
+	case VFSGETPARAM:
+		if ((env = (struct sysgetenv *)ptr) == NULL)
+			return IF_OUT | IF_IN;
+
+		/*
+		 * So far this is the only IOCTL case where the output depends
+		 * on one of the values in the input: if the given key is NULL,
+		 * PM provides the entire system environment in return, which
+		 * means we cannot just print a single string.  We rely on PM
+		 * not changing the key field, which (while true) is an
+		 * assumption.  With the current (simple) model we would have
+		 * to save the provided key pointer somewhere otherwise.
+		 */
+		if (dir == IF_OUT)
+			put_buf(proc, "key", PF_STRING, (vir_bytes)env->key,
+			    env->keylen);
+		else
+			put_buf(proc, "value",
+			    (env->key != NULL) ? PF_STRING : 0,
+			    (vir_bytes)env->val, env->vallen);
+		return IF_ALL;
+
+	default:
+		return 0;
+	}
+}
diff --git a/minix/usr.bin/trace/kernel.c b/minix/usr.bin/trace/kernel.c
new file mode 100644
index 000000000..195123049
--- /dev/null
+++ b/minix/usr.bin/trace/kernel.c
@@ -0,0 +1,307 @@
+/*
+ * This file, and only this file, should contain all the ugliness needed to
+ * obtain values from the kernel.  It has to be recompiled every time the
+ * layout of the kernel "struct proc" and/or "struct priv" structures changes.
+ * In addition, this file contains the platform-dependent code related to
+ * interpreting the registers exposed by the kernel.
+ *
+ * As a quick note, some functions return TRUE/FALSE, and some return 0/-1.
+ * The former convention is used for functions that return a boolean value;
+ * the latter is used for functions that set errno in all cases of failure,
+ * and where the caller may conceivably use errno as a result.
+ *
+ * On a related note, relevant here and elsewhere: we define _MINIX_SYSTEM but
+ * not _SYSTEM, which means that we should not get negative error numbers.
+ */
+
+#include "inc.h"
+
+#include <machine/archtypes.h>
+#include <minix/timers.h>
+#include "kernel/proc.h"
+#include "kernel/priv.h"
+#if defined(__i386__)
+#include "kernel/arch/i386/include/archconst.h" /* for the KTS_ constants */
+#endif
+
+#include <minix/param.h>
+
+extern struct minix_kerninfo *_minix_kerninfo;
+
+/*
+ * Working area.  By obtaining values from the kernel into these local process
+ * structures, and then returning them, we gain a little robustness against
+ * changes in data types of the fields we need.
+ */
+static struct proc kernel_proc;
+static struct priv kernel_priv;
+
+/*
+ * Check whether our notion of the kernel process structure layout matches that
+ * of the kernel, by comparing magic values.  This can be done only once we
+ * have attached to a process.  Return TRUE if everything seems alright; FALSE
+ * otherwise.
+ */
+int
+kernel_check(pid_t pid)
+{
+
+	if (mem_get_user(pid, offsetof(struct proc, p_magic),
+	    &kernel_proc.p_magic, sizeof(kernel_proc.p_magic)) < 0)
+		return FALSE;
+
+	return (kernel_proc.p_magic == PMAGIC);
+}
+
+/*
+ * Obtain the kernel name for the given (stopped) process.  Return 0 on
+ * success, with the (possibly truncated) name stored in the 'name' buffer
+ * which is of 'size' bytes; the name will be null-terminated.  Note that the
+ * name may contain any suffixes as set by the kernel.  Return -1 on failure,
+ * with errno set as appropriate.
+ */
+int
+kernel_get_name(pid_t pid, char * name, size_t size)
+{
+
+	if (mem_get_user(pid, offsetof(struct proc, p_name),
+	    kernel_proc.p_name, sizeof(kernel_proc.p_name)) < 0)
+		return -1;
+
+	strlcpy(name, kernel_proc.p_name, size);
+	return 0;
+}
+
+/*
+ * Check whether the given process, which we have just attached to, is a system
+ * service.  PM does not prevent us from attaching to most system services,
+ * even though this utility only supports tracing user programs.  Unlike a few
+ * other routines in this file, this function can not use ProcFS to obtain its
+ * result, because the given process may actually be VFS or ProcFS itself!
+ * Return TRUE if the given process is a system service; FALSE if not.
+ */
+int
+kernel_is_service(pid_t pid)
+{
+	size_t align, off;
+
+	/*
+	 * For T_GETUSER, the priv structure follows the proc structure, but
+	 * possibly with padding in between so as to align the priv structure
+	 * to long boundary.
+	 */
+	align = sizeof(long) - 1;
+	off = (sizeof(struct proc) + align) & ~align;
+
+	if (mem_get_user(pid, off + offsetof(struct priv, s_id),
+	    &kernel_priv.s_id, sizeof(kernel_priv.s_id)) < 0)
+		return FALSE; /* process may have disappeared, so no danger */
+
+	return (kernel_priv.s_id != USER_PRIV_ID);
+}
+
+/*
+ * For the given process, which must be stopped on entering a system call,
+ * retrieve the three register values describing the system call.  Return 0 on
+ * success, or -1 on failure with errno set as appropriate.
+ */
+int
+kernel_get_syscall(pid_t pid, reg_t reg[3])
+{
+
+	assert(sizeof(kernel_proc.p_defer) == sizeof(reg_t) * 3);
+
+	if (mem_get_user(pid, offsetof(struct proc, p_defer),
+	    &kernel_proc.p_defer, sizeof(kernel_proc.p_defer)) < 0)
+		return -1;
+
+	reg[0] = kernel_proc.p_defer.r1;
+	reg[1] = kernel_proc.p_defer.r2;
+	reg[2] = kernel_proc.p_defer.r3;
+	return 0;
+}
+
+/*
+ * Retrieve the value of the primary return register for the given process,
+ * which must be stopped on leaving a system call.  This register contains the
+ * IPC-level result of the system call.  Return 0 on success, or -1 on failure
+ * with errno set as appropriate.
+ */
+int
+kernel_get_retreg(pid_t pid, reg_t * retreg)
+{
+	size_t off;
+
+	/*
+	 * Historically p_reg had to be the first field in the proc structure,
+	 * but since this is no longer a hard requirement, getting its actual
+	 * offset into the proc structure certainly doesn't hurt.
+	 */
+	off = offsetof(struct proc, p_reg);
+
+	if (mem_get_user(pid, off + offsetof(struct stackframe_s, retreg),
+	    &kernel_proc.p_reg.retreg, sizeof(kernel_proc.p_reg.retreg)) < 0)
+		return -1;
+
+	*retreg = kernel_proc.p_reg.retreg;
+	return 0;
+}
+
+/*
+ * Return the stack top for user processes.  This is needed for execve(), since
+ * the supplied frame contains pointers prepared for the new location of the
+ * frame, which is at the stack top of the process after the execve().
+ */
+vir_bytes
+kernel_get_stacktop(void)
+{
+
+	return _minix_kerninfo->kinfo->user_sp;
+}
+
+/*
+ * For the given stopped process, get its program counter (pc), stack pointer
+ * (sp), and optionally its frame pointer (fp).  The given fp pointer may be
+ * NULL, in which case the frame pointer is not obtained.  The given pc and sp
+ * pointers must not be NULL, and this is intentional: obtaining fp may require
+ * obtaining sp first.  Return 0 on success, or -1 on failure with errno set
+ * as appropriate.  This functionality is not essential for tracing processes,
+ * and may not be supported on all platforms, in part or full.  In particular,
+ * on some platforms, a zero (= invalid) frame pointer may be returned on
+ * success, indicating that obtaining frame pointers is not supported.
+ */
+int
+kernel_get_context(pid_t pid, reg_t * pc, reg_t * sp, reg_t * fp)
+{
+	size_t off;
+
+	off = offsetof(struct proc, p_reg); /* as above */
+
+	if (mem_get_user(pid, off + offsetof(struct stackframe_s, pc),
+	    &kernel_proc.p_reg.pc, sizeof(kernel_proc.p_reg.pc)) < 0)
+		return -1;
+	if (mem_get_user(pid, off + offsetof(struct stackframe_s, sp),
+	    &kernel_proc.p_reg.sp, sizeof(kernel_proc.p_reg.sp)) < 0)
+		return -1;
+
+	*pc = kernel_proc.p_reg.pc;
+	*sp = kernel_proc.p_reg.sp;
+
+	if (fp == NULL)
+		return 0;
+
+#if defined(__i386__)
+	if (mem_get_user(pid, offsetof(struct proc, p_seg) +
+	    offsetof(struct segframe, p_kern_trap_style),
+	    &kernel_proc.p_seg.p_kern_trap_style,
+	    sizeof(kernel_proc.p_seg.p_kern_trap_style)) < 0)
+		return -1;
+
+	/* This is taken from the kernel i386 exception code. */
+	switch (kernel_proc.p_seg.p_kern_trap_style) {
+	case KTS_SYSENTER:
+	case KTS_SYSCALL:
+		if (mem_get_data(pid, *sp + 16, fp, sizeof(fp)) < 0)
+			return -1;
+		break;
+
+	default:
+		if (mem_get_user(pid, off + offsetof(struct stackframe_s, fp),
+		    &kernel_proc.p_reg.fp, sizeof(kernel_proc.p_reg.fp)) < 0)
+			return -1;
+
+		*fp = kernel_proc.p_reg.fp;
+	}
+#else
+	*fp = 0; /* not supported; this is not a failure (*pc is valid) */
+#endif
+	return 0;
+}
+
+/*
+ * Given a frame pointer, obtain the next program counter and frame pointer.
+ * Return 0 if successful, or -1 on failure with errno set appropriately.  The
+ * functionality is not essential for tracing processes, and may not be
+ * supported on all platforms.  Thus, on some platforms, this function may
+ * always fail.
+ */
+static int
+kernel_get_nextframe(pid_t pid, reg_t fp, reg_t * next_pc, reg_t * next_fp)
+{
+#if defined(__i386__)
+	void *p[2];
+
+	if (mem_get_data(pid, (vir_bytes)fp, &p, sizeof(p)) < 0)
+		return -1;
+
+	*next_pc = (reg_t)p[1];
+	*next_fp = (reg_t)p[0];
+	return 0;
+#else
+	/* Not supported (yet). */
+	errno = ENOSYS;
+	return -1;
+#endif
+}
+
+/*
+ * Print a stack trace for the given process, which is known to be stopped on
+ * entering a system call.  This function does not really belong here, but
+ * without a doubt it is going to have to be fully rewritten to support
+ * anything other than i386.
+ *
+ * Getting symbol names is currently an absolute nightmare.  Not just because
+ * of shared libraries, but also since ProcFS does not offer a /proc/NNN/exe,
+ * so that we cannot reliably determine the binary being executed: not for
+ * processes being attached to, and not for exec calls using a relative path.
+ */
+void
+kernel_put_stacktrace(struct trace_proc * proc)
+{
+	unsigned int count, max;
+	reg_t pc, sp, fp, low, high;
+
+	if (kernel_get_context(proc->pid, &pc, &sp, &fp) < 0)
+		return;
+
+	/*
+	 * A low default limit such as 6 looks much prettier, but is simply not
+	 * useful enough for moderately-sized programs in practice.  Right now,
+	 * 15 is about two lines on a 80-column terminal.
+	 */
+	if (verbose == 0) max = 15;
+	else if (verbose == 1) max = 31;
+	else max = UINT_MAX;
+
+	/*
+	 * We keep formatting to an absolute minimum, to facilitate passing
+	 * the lines straight into tools such as addr2line.
+	 */
+	put_newline();
+	put_fmt(proc, "  0x%x", pc);
+
+	low = high = fp;
+
+	for (count = 1; count < max && fp != 0; count++) {
+		if (kernel_get_nextframe(proc->pid, fp, &pc, &fp) < 0)
+			break;
+
+		put_fmt(proc, " 0x%x", pc);
+
+		/*
+		 * Stop if we see a frame pointer that falls within the range
+		 * of the frame pointers we have seen so far.  This also
+		 * prevents getting stuck in a loop on the same frame pointer.
+		 */
+		if (fp >= low && fp <= high)
+			break;
+		if (low > fp)
+			low = fp;
+		if (high < fp)
+			high = fp;
+	}
+
+	if (fp != 0)
+		put_text(proc, " ..");
+	put_newline();
+}
diff --git a/minix/usr.bin/trace/mem.c b/minix/usr.bin/trace/mem.c
new file mode 100644
index 000000000..e0b67270b
--- /dev/null
+++ b/minix/usr.bin/trace/mem.c
@@ -0,0 +1,61 @@
+
+#include "inc.h"
+
+/*
+ * Retrieve 'len' bytes from the memory of the traced process 'pid' at address
+ * 'addr' and put the result in the buffer pointed to by 'ptr'.  Return 0 on
+ * success, or otherwise -1 with errno set appropriately.
+ */
+int
+mem_get_data(pid_t pid, vir_bytes addr, void * ptr, size_t len)
+{
+	struct ptrace_range pr;
+
+	if (len == 0) return 0;
+
+	pr.pr_space = TS_DATA;
+	pr.pr_addr = addr;
+	pr.pr_size = len;
+	pr.pr_ptr = ptr;
+
+	return ptrace(T_GETRANGE, pid, &pr, 0);
+}
+
+/*
+ * Retrieve 'len' bytes from the kernel structure memory of the traced process
+ * 'pid' at offset 'addr' and put the result in the buffer pointed to by 'ptr'.
+ * Return 0 on success, or otherwise -1 with errno set appropriately.
+ */
+int
+mem_get_user(pid_t pid, vir_bytes addr, void * ptr, size_t len)
+{
+	long data;
+	char *p;
+	size_t off, chunk;
+
+	if (len == 0) return 0;
+
+	/* Align access to address. */
+	off = addr & (sizeof(data) - 1);
+	addr -= off;
+
+	p = ptr;
+
+	while (len > 0) {
+		errno = 0;
+		data = ptrace(T_GETUSER, pid, (void *)addr, 0);
+		if (errno != 0) return -1;
+
+		chunk = sizeof(data) - off;
+		if (chunk > len)
+			chunk = len;
+
+		memcpy(p, (char *)&data + off, chunk);
+		p += chunk;
+		addr += chunk;
+		len -= chunk;
+		off = 0;
+	}
+
+	return 0;
+}
diff --git a/minix/usr.bin/trace/output.c b/minix/usr.bin/trace/output.c
new file mode 100644
index 000000000..f87416378
--- /dev/null
+++ b/minix/usr.bin/trace/output.c
@@ -0,0 +1,516 @@
+
+#include "inc.h"
+
+#include <fcntl.h>
+#include <unistd.h>
+
+/*
+ * The maximum number of bytes that may be buffered before writing the buffered
+ * output to the underlying file.  This is a performance optimization only.
+ * Writing more than this number of bytes at once will be handled correctly.
+ */
+#define OUTPUT_BUFSZ	512
+
+static int out_fd;
+static char out_buf[OUTPUT_BUFSZ];
+static int out_len;
+static int out_err;
+
+static pid_t last_pid; /* not a trace_proc pointer; it could become invalid! */
+static unsigned int line_off;
+static unsigned int prefix_off;
+static int print_pid;
+static int print_susp;
+static int add_space;
+
+/*
+ * Initialize the output channel.  Called before any other output functions,
+ * but after a child process (to be traced) has already been spawned.  If the
+ * given file string is not NULL, it is the path to a file that is to be used
+ * to write output to.  If it is NULL, output is written to standard error.
+ */
+int
+output_init(const char * file)
+{
+
+	/* Initialize state. */
+	out_len = 0;
+	out_err = FALSE;
+
+	last_pid = 0;
+	line_off = 0;
+	prefix_off = 0;
+	print_pid = FALSE;
+	print_susp = FALSE;
+	add_space = FALSE;
+
+	/*
+	 * Ignore signals resulting from writing to a closed pipe.  We can
+	 * handle write errors properly ourselves.  Setting O_NOSIGPIPE is an
+	 * alternative, but that would affect other processes writing to the
+	 * same file object, even after we have terminated.
+	 */
+	signal(SIGPIPE, SIG_IGN);
+
+	/* Initialize the output file descriptor. */
+	if (file == NULL) {
+		/* No output file given?  Use standard error. */
+		out_fd = STDERR_FILENO;
+
+		return 0;
+	} else {
+		/*
+		 * Use a restrictive mask for the output file.  Traces may
+		 * contain sensitive information (for security and otherwise),
+		 * and the user might not always be careful about the location
+		 * of the file.
+		 */
+		/* The file descriptor is not closed explicitly. */
+		out_fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_APPEND,
+		    0600);
+
+		return (out_fd < 0) ? -1 : 0;
+	}
+}
+
+/*
+ * Write the given data to the given file descriptor, taking into account the
+ * possibility of partial writes and write errors.
+ */
+static void
+write_fd(int fd, const char *buf, size_t len)
+{
+	ssize_t r;
+
+	/* If we got a write error before, do not try to write more. */
+	if (out_err)
+		return;
+
+	/* Write all output, in chunks if we have to. */
+	while (len > 0) {
+		r = write(fd, buf, len);
+
+		/*
+		 * A write error (and that includes EOF) causes the program to
+		 * terminate with an error code.  For obvious reasons we cannot
+		 * print an error about this.  Do not even report to standard
+		 * error if the output was redirected, because that may mess
+		 * with the actual programs being run right now.
+		 */
+		if (r <= 0) {
+			out_err = TRUE;
+
+			break;
+		}
+
+		len -= r;
+	}
+}
+
+/*
+ * Return TRUE iff an output error occurred and the program should terminate.
+ */
+int
+output_error(void)
+{
+
+	return out_err;
+}
+
+/*
+ * Print the given null-terminated string to the output channel.  Return the
+ * number of characters printed, for alignment purposes.  In the future, this
+ * number may end up being different from the number of bytes given to print,
+ * due to multibyte encoding or colors or whatnot.
+ */
+static unsigned int
+output_write(const char * text)
+{
+	size_t len;
+
+	len = strlen(text);
+
+	if (out_len + len > sizeof(out_buf)) {
+		write_fd(out_fd, out_buf, out_len);
+
+		out_len = 0;
+
+		/* Write large buffers right away. */
+		if (len > sizeof(out_buf)) {
+			write_fd(out_fd, text, len);
+
+			return len;
+		}
+	}
+
+	memcpy(&out_buf[out_len], text, len);
+
+	out_len += len;
+
+	return len;
+}
+
+/*
+ * Flush any pending output to the output channel.
+ */
+void
+output_flush(void)
+{
+
+	if (out_len > 0) {
+		write_fd(out_fd, out_buf, out_len);
+
+		out_len = 0;
+	}
+}
+
+/*
+ * Print a PID prefix for the given process, or an info prefix if no process
+ * (NULL) is given.  Prefixes are only relevant when multiple processes are
+ * traced.  As long as there are multiple processes, each line is prefixed with
+ * the PID of the process.  As soon as the number of processes has been reduced
+ * back to one, one more line is prefixed with the PID of the remaining process
+ * (with a "'" instead of a "|") to help the user identify which process is
+ * left.  In addition, whenever a preempted call is about to be resumed, a "*"
+ * is printed instead of a space, so as to show that it is a continuation of a
+ * previous line.  An example of all these cases:
+ *
+ *   fork() = 3
+ *       3| Tracing test (pid 3)
+ *       3| fork() = 0
+ *       3| read(0, <..>
+ *       2| waitpid(-1, <..>
+ *    INFO| This is an example info line.
+ *       3|*read(0, "", 1024) = 0
+ *       3| exit(1)
+ *       3| Process exited normally with code 1
+ *       2'*waitpid(-1, W_EXITED(1), 0) = 3
+ *   exit(0)
+ *   Process exited normally with code 0
+ */
+static void
+put_prefix(struct trace_proc * proc, int resuming)
+{
+	char prefix[32];
+	unsigned int count;
+
+	assert(line_off == 0);
+
+	count = proc_count();
+
+	/* TODO: add a command line option for always printing the pid. */
+	if (print_pid || count > 1 || proc == NULL) {
+		/*
+		 * TODO: we currently rely on the highest PID having at most
+		 * five digits, but this will eventually change.  There are
+		 * several ways to deal with that, but none are great.
+		 */
+		if (proc == NULL)
+			snprintf(prefix, sizeof(prefix), "%5s| ", "INFO");
+		else
+			snprintf(prefix, sizeof(prefix), "%5d%c%c",
+			    proc->pid, (count > 1) ? '|' : '\'',
+			    resuming ? '*' : ' ');
+
+		prefix_off = line_off = output_write(prefix);
+
+		last_pid = (proc != NULL ? proc->pid : 0);
+	} else {
+		assert(!resuming);
+
+		prefix_off = 0;
+	}
+
+	/* Remember whether the next line should get prefixed regardless. */
+	print_pid = (count > 1 || proc == NULL);
+}
+
+/*
+ * Add a string to the end of the text recording for the given process.
+ * This is used only to record the call-enter output of system calls.
+ */
+static void
+record_add(struct trace_proc * proc, const char * text)
+{
+	size_t len;
+
+	assert(proc->recording);
+
+	/* If the recording buffer is already full, do not record more. */
+	if (proc->outlen == sizeof(proc->outbuf))
+		return;
+
+	len = strlen(text);
+
+	/* If nonempty, the recording buffer is always null terminated. */
+	if (len < sizeof(proc->outbuf) - proc->outlen - 1) {
+		strcpy(&proc->outbuf[proc->outlen], text);
+
+		proc->outlen += len;
+	} else
+		proc->outlen = sizeof(proc->outbuf); /* buffer exhausted */
+}
+
+/*
+ * Start recording text for the given process.  Since this marks the start of
+ * a call, remember to print a preemption marker when the call gets preempted.
+ */
+void
+record_start(struct trace_proc * proc)
+{
+
+	proc->recording = TRUE;
+
+	print_susp = TRUE;
+}
+
+/*
+ * Stop recording text for the given process.
+ */
+void
+record_stop(struct trace_proc * proc)
+{
+
+	proc->recording = FALSE;
+}
+
+/*
+ * Clear recorded text for the given process.  Since this also marks the end of
+ * the entire call, no longer print a supension marker before the next newline.
+ */
+void
+record_clear(struct trace_proc * proc)
+{
+
+	assert(!proc->recording);
+	proc->outlen = 0;
+
+	if (proc->pid == last_pid)
+		print_susp = FALSE;
+}
+
+/*
+ * Replay the record for the given process on a new line, if the current line
+ * does not already have output for this process.  If it does, do nothing.
+ * If the process has no recorded output, just start a new line.  Return TRUE
+ * iff the caller must print its own replay text due to a recording overflow.
+ */
+int
+record_replay(struct trace_proc * proc)
+{
+	int space;
+
+	assert(!proc->recording);
+
+	/*
+	 * If there is output on the current line, and it is for the current
+	 * process, we must assume that it is the original, recorded text, and
+	 * thus, we should do nothing.  If output on the current line is for
+	 * another process, we must force a new line before replaying.
+	 */
+	if (line_off > 0) {
+		if (proc->pid == last_pid)
+			return FALSE;
+
+		put_newline();
+	}
+
+	/*
+	 * If there is nothing to replay, do nothing further.  This case may
+	 * occur when printing signals, in which case the caller still expects
+	 * a new line to be started.  This line must not be prefixed with a
+	 * "resuming" marker though--after all, nothing is being resumed here.
+	 */
+	if (proc->outlen == 0)
+		return FALSE;
+
+	/*
+	 * If there is text to replay, then this does mean we are in effect
+	 * resuming the recorded call, even if it is just to print a signal.
+	 * Thus, we must print a prefix that shows the call is being resumed.
+	 * Similarly, unless the recording is cleared before a newline, we must
+	 * suspend the line again, too.
+	 */
+	put_prefix(proc, TRUE /*resuming*/);
+
+	print_susp = TRUE;
+
+	/*
+	 * If the recording buffer was exhausted during recording, the caller
+	 * must generate the replay text instead.
+	 */
+	if (proc->outlen == sizeof(proc->outbuf))
+		return TRUE;
+
+	/*
+	 * Replay the recording.  If it ends with a space, turn it into a soft
+	 * space, because the recording may be followed immediately by a
+	 * newline; an example of this is the exit() exception.
+	 */
+	space = proc->outbuf[proc->outlen - 1] == ' ';
+	if (space)
+		proc->outbuf[proc->outlen - 1] = 0;
+
+	put_text(proc, proc->outbuf);
+
+	if (space) {
+		put_space(proc);
+
+		/* Restore the space, in case another replay takes place. */
+		proc->outbuf[proc->outlen - 1] = ' ';
+	}
+
+	return FALSE;
+}
+
+/*
+ * Start a new line, and adjust the local state accordingly.  If nothing has
+ * been printed on the current line yet, this function is a no-op.  Otherwise,
+ * the output so far may have to be marked as preempted with the "<..>"
+ * preemption marker.
+ */
+void
+put_newline(void)
+{
+
+	if (line_off == 0)
+		return;
+
+	if (print_susp) {
+		if (add_space)
+			(void)output_write(" ");
+
+		(void)output_write("<..>");
+	}
+
+#if DEBUG
+	(void)output_write("|");
+#endif
+
+	(void)output_write("\n");
+	output_flush();
+
+	line_off = 0;
+	add_space = FALSE;
+	print_susp = FALSE;
+	last_pid = 0;
+}
+
+/*
+ * Print a string as part of the output associated with a process.  If the
+ * current line contains output for another process, a newline will be printed
+ * first.  If the current line contains output for the same process, then the
+ * text will simply continue on the same line.  If the current line is empty,
+ * a process PID prefix may have to be printed first.  Either way, after this
+ * operation, the current line will contain text for the given process.  If
+ * requested, the text may also be recorded for the process, for later replay.
+ * As an exception, proc may be NULL when printing general information lines.
+ */
+void
+put_text(struct trace_proc * proc, const char * text)
+{
+
+	if (line_off > 0 && (proc == NULL || proc->pid != last_pid)) {
+		/*
+		 * The current line has not been terminated with a newline yet.
+		 * Start a new line.  Note that this means that for lines not
+		 * associated to a process, the whole line must be printed at
+		 * once.  This can be fixed but is currently not an issue.
+		 */
+		put_newline();
+	}
+
+	/* See if we must add a prefix at the start of the line. */
+	if (line_off == 0)
+		put_prefix(proc, FALSE /*resuming*/);
+
+	/* If needed, record the given text. */
+	if (proc != NULL && proc->recording)
+		record_add(proc, text);
+
+	/*
+	 * If we delayed printing a space, print one now.  This is never part
+	 * of text that must be saved.  In fact, we support these soft spaces
+	 * for exactly one case; see put_space() for details.
+	 */
+	if (add_space) {
+		line_off += output_write(" ");
+
+		add_space = FALSE;
+	}
+
+	/* Finally, print the actual text. */
+	line_off += output_write(text);
+
+	last_pid = (proc != NULL) ? proc->pid : 0;
+}
+
+/*
+ * Add a space to the output for the given process, but only if and once more
+ * text is printed for the process afterwards.  The aim is to ensure that no
+ * lines ever end with a space, to prevent needless line wrapping on terminals.
+ * The space may have to be remembered for the current line (for preemption,
+ * which does not have a process pointer to work with) as well as recorded for
+ * later replay, if recording is enabled.  Consider the following example:
+ *
+ * [A]   3| execve(..) <..>
+ *       2| getpid(0) = 2 (ppid=1)
+ * [B]   3| execve(..) = -1 [ENOENT]
+ * [A]   3| exit(1) <..>
+ *       2| getpid(0) = 2 (ppid=1)
+ *       3| exit(1)
+ *       3| Process exited normally with code 1
+ *
+ * On the [A] lines, the space between the call's closing parenthesis and the
+ * "<..>" preemption marker is the result of add_space being set to TRUE; on
+ * the [B] line, the space between the closing parenthesis and the equals sign
+ * is the result of the space being recorded.
+ */
+void
+put_space(struct trace_proc * proc)
+{
+
+	/* This call must only be used after output for the given process. */
+	assert(last_pid == proc->pid);
+
+	/* In case the call does not get preempted. */
+	add_space = TRUE;
+
+	/* In case the call does get preempted. */
+	if (proc->recording)
+		record_add(proc, " ");
+}
+
+/*
+ * Indent the remainders of the text on the line for this process, such that
+ * similar remainders are similarly aligned.  In particular, the remainder is
+ * the equals sign of a call, and everything after it.  Of course, alignment
+ * can only be used if the call has not already printed beyond the alignment
+ * position.  Also, the prefix must not be counted toward the alignment, as it
+ * is possible that a line without prefix may be preempted and later continued
+ * with prefix.  All things considered, the result would look like this:
+ *
+ *   getuid()                      = 1 (euid=1)
+ *   setuid(0)                     = -1 [EPERM]
+ *   write(2, "Permission denied\n", 18) = 18
+ *   fork()                        = 3
+ *       3| Tracing test (pid 3)
+ *       3| fork()                        = 0
+ *       3| exit(0)
+ *       3| Process exited normally with code 0
+ *       2' waitpid(-1, W_EXITED(0), 0)   = 3
+ *
+ */
+void put_align(struct trace_proc * __unused proc)
+{
+
+	/*
+	 * TODO: add actual support for this.  The following code works,
+	 * although not so efficiently.  The difficulty is the default
+	 * configuration and corresponding options.
+
+	while (line_off - prefix_off < 20)
+		put_text(proc, " ");
+
+	 */
+}
diff --git a/minix/usr.bin/trace/proc.c b/minix/usr.bin/trace/proc.c
new file mode 100644
index 000000000..665ea9f12
--- /dev/null
+++ b/minix/usr.bin/trace/proc.c
@@ -0,0 +1,97 @@
+
+#include "inc.h"
+
+static TAILQ_HEAD(, trace_proc) proc_root;
+static unsigned int nr_procs;
+
+/*
+ * Initialize the list of traced processes.
+ */
+void
+proc_init(void)
+{
+
+	TAILQ_INIT(&proc_root);
+	nr_procs = 0;
+}
+
+/*
+ * Add a new process to the list of traced processes, allocating memory for it
+ * first.  Return the new process structure with its PID assigned and the rest
+ * zeroed out, or NULL upon allocation failure (with errno set appropriately).
+ */
+struct trace_proc *
+proc_add(pid_t pid)
+{
+	struct trace_proc *proc;
+
+	proc = (struct trace_proc *)malloc(sizeof(struct trace_proc));
+
+	if (proc == NULL)
+		return NULL;
+
+	memset(proc, 0, sizeof(*proc));
+
+	proc->pid = pid;
+
+	TAILQ_INSERT_TAIL(&proc_root, proc, next);
+	nr_procs++;
+
+	return proc;
+}
+
+/*
+ * Retrieve the data structure for a traced process based on its PID.  Return
+ * a pointer to the structure, or NULL if no structure exists for this process.
+ */
+struct trace_proc *
+proc_get(pid_t pid)
+{
+	struct trace_proc *proc;
+
+	/* Linear search for now; se we can easily add a hashtable later.. */
+	TAILQ_FOREACH(proc, &proc_root, next) {
+		if (proc->pid == pid)
+			return proc;
+	}
+
+	return NULL;
+}
+
+/*
+ * Remove a process from the list of traced processes.
+ */
+void
+proc_del(struct trace_proc * proc)
+{
+
+	TAILQ_REMOVE(&proc_root, proc, next);
+	nr_procs--;
+
+	free(proc);
+}
+
+/*
+ * Iterator for the list of traced processes.  If a NULL pointer is given,
+ * return the first process in the list; otherwise, return the next process in
+ * the list.  Not stable with respect to list modifications.
+ */
+struct trace_proc *
+proc_next(struct trace_proc * proc)
+{
+
+	if (proc == NULL)
+		return TAILQ_FIRST(&proc_root);
+	else
+		return TAILQ_NEXT(proc, next);
+}
+
+/*
+ * Return the number of processes in the list of traced processes.
+ */
+unsigned int
+proc_count(void)
+{
+
+	return nr_procs;
+}
diff --git a/minix/usr.bin/trace/proc.h b/minix/usr.bin/trace/proc.h
new file mode 100644
index 000000000..509f523ee
--- /dev/null
+++ b/minix/usr.bin/trace/proc.h
@@ -0,0 +1,99 @@
+
+#include <sys/queue.h>
+
+/*
+ * The maximum nesting depth of parentheses/brackets.  The current maximum
+ * depth is something like six, for UDS control messages.  This constant can be
+ * increased as necessary without any problem.
+ */
+#define MAX_DEPTH	10
+
+/*
+ * The maximum size of text that may be recorded, including null terminator.
+ * Increasing this allows longer lines to be recorded and replayed without
+ * being cut short (see call_replay), but also increases memory usage.
+ */
+#define RECORD_BUFSZ	256
+
+struct trace_proc {
+	/* identity (public) */
+	pid_t pid;
+
+	/* data structure management (proc.c) */
+	TAILQ_ENTRY(trace_proc) next;
+
+	/* general process state (trace.c) */
+	char name[PROC_NAME_LEN];
+	unsigned int trace_flags;
+	reg_t last_pc;
+	reg_t last_sp;
+
+	/* call enter-to-leave state (call.c) */
+	int call_type;
+	vir_bytes m_addr;
+	message m_out;
+	const char *call_name;
+	unsigned int call_flags;
+	const struct call_handler *call_handler;
+	int call_result;
+
+	/* output state (output.c) */
+	int recording;
+	char outbuf[RECORD_BUFSZ];
+	size_t outlen;
+
+	/* formatting state (format.c) */
+	const char *next_sep;
+	int depth;
+	struct {
+		const char *sep;
+		int name;
+	} depths[MAX_DEPTH];
+
+	/* ioctl state (ioctl.c) */
+	int ioctl_index;
+	unsigned int ioctl_flags;
+};
+
+/* Trace flags. */
+#define TF_INCALL	0x01	/* the process has entered a system call */
+#define TF_SKIP		0x02	/* the system call result is to be skipped */
+#define TF_CTX_SKIP	0x04	/* skip call result only if context changes */
+#define TF_STOPPING	0x08	/* the process is expecting a SIGSTOP */
+#define TF_ATTACH	0x10	/* we have not started this process */
+#define TF_DETACH	0x20	/* detach from the process as soon as we can */
+#define TF_EXEC		0x40	/* the process may be performing an execve() */
+#define TF_NOCALL	0x80	/* no system call seen yet (for info only) */
+
+/* Trace classes, determining how the tracer engine should handle a call. */
+#define TC_NORMAL	0	/* normal call, no exceptions required */
+#define TC_EXEC		1	/* exec call, success on subsequent SIGSTOP */
+#define TC_SIGRET	2	/* sigreturn call, success on context change */
+
+/* Call flags. */
+#define CF_DONE		0x01	/* printing the call parameters is done */
+#define CF_NORETURN	0x02	/* the call does not return on success */
+#define CF_HIDE		0x04	/* do not print the current call */
+#define CF_IPC_ERR	0x08	/* a failure occurred at the IPC level */
+#define CF_REG_ERR	0x10	/* unable to retrieve the result register */
+#define CF_MSG_ERR	0x20	/* unable to copy in the reply message */
+
+/* Call types, determining how much has been printed up to the call split. */
+#define CT_NOTDONE	(0)	/* not all parameters have been printed yet */
+#define CT_DONE		(CF_DONE)	/* all parameters have been printed */
+#define CT_NORETURN	(CF_DONE | CF_NORETURN)	/* the no-return call type */
+
+/* Put flags. */
+#define PF_FAILED	0x01	/* call failed, results may be invalid */
+#define PF_LOCADDR	0x02	/* pointer is into local address space */
+/* Yes, PF_LOCAL would conflict with the packet family definition.  Bah. */
+#define PF_ALT		0x04	/* alternative output (callee specific) */
+#define PF_STRING	PF_ALT	/* buffer is string (put_buf only) */
+#define PF_FULL		0x08	/* print full format (callee specific) */
+#define PF_PATH		(PF_STRING | PF_FULL)	/* flags for path names */
+#define PF_NONAME	0x10	/* default to no field names at this depth */
+
+/* I/O control flags. */
+#define IF_OUT		0x1	/* call to print outgoing (written) data */
+#define IF_IN		0x2	/* call to print incoming (read) data */
+#define IF_ALL		0x4	/* all fields printed (not really a bit) */
diff --git a/minix/usr.bin/trace/proto.h b/minix/usr.bin/trace/proto.h
new file mode 100644
index 000000000..27a889421
--- /dev/null
+++ b/minix/usr.bin/trace/proto.h
@@ -0,0 +1,130 @@
+
+/* call.c */
+void put_endpoint(struct trace_proc *proc, const char *name, endpoint_t endpt);
+void put_equals(struct trace_proc *proc);
+void put_result(struct trace_proc *proc);
+int default_out(struct trace_proc *proc, const message *m_out);
+void default_in(struct trace_proc *proc, const message *m_out,
+	const message *m_in, int failed);
+int call_enter(struct trace_proc *proc, int show_stack);
+void call_leave(struct trace_proc *proc, int skip);
+void call_replay(struct trace_proc *proc);
+const char *call_name(struct trace_proc *proc);
+
+/* error.c */
+const char *get_error_name(int err);
+
+/* escape.c */
+const char *get_escape(char c);
+
+/* format.c */
+void format_reset(struct trace_proc *proc);
+void format_set_sep(struct trace_proc *proc, const char *sep);
+void format_push_sep(struct trace_proc *proc);
+void put_field(struct trace_proc *proc, const char *name, const char *text);
+void put_open(struct trace_proc *proc, const char *name, int flags,
+	const char *string, const char *separator);
+void put_close(struct trace_proc *proc, const char *string);
+void put_fmt(struct trace_proc *proc, const char *fmt, ...)
+	__attribute__((__format__(__printf__, 2, 3)));
+void put_value(struct trace_proc *proc, const char *name, const char *fmt, ...)
+	__attribute__((__format__(__printf__, 3, 4)));
+int put_open_struct(struct trace_proc *proc, const char *name, int flags,
+	vir_bytes addr, void *ptr, size_t size);
+void put_close_struct(struct trace_proc *proc, int all);
+void put_ptr(struct trace_proc *proc, const char *name, vir_bytes addr);
+void put_buf(struct trace_proc *proc, const char *name, int flags,
+	vir_bytes addr, ssize_t size);
+void put_flags(struct trace_proc *proc, const char *name,
+	const struct flags *fp, unsigned int num, const char *fmt,
+	unsigned int value);
+void put_tail(struct trace_proc * proc, unsigned int count,
+	unsigned int printed);
+
+/* ioctl.c */
+void put_ioctl_req(struct trace_proc *proc, const char *name,
+	unsigned long req, int is_svrctl);
+int put_ioctl_arg_out(struct trace_proc *proc, const char *name,
+	unsigned long req, vir_bytes addr, int is_svrctl);
+void put_ioctl_arg_in(struct trace_proc *proc, const char *name, int failed,
+	unsigned long req, vir_bytes addr, int is_svrctl);
+
+/* kernel.c */
+int kernel_check(pid_t pid);
+int kernel_get_name(pid_t pid, char *name, size_t size);
+int kernel_is_service(pid_t pid);
+int kernel_get_syscall(pid_t pid, reg_t reg[3]);
+int kernel_get_retreg(pid_t pid, reg_t *retreg);
+vir_bytes kernel_get_stacktop(void);
+int kernel_get_context(pid_t pid, reg_t *pc, reg_t *sp, reg_t *fp);
+void kernel_put_stacktrace(struct trace_proc * proc);
+
+/* mem.c */
+int mem_get_data(pid_t pid, vir_bytes addr, void *ptr, size_t len);
+int mem_get_user(pid_t pid, vir_bytes addr, void *ptr, size_t len);
+
+/* pm.c */
+void put_struct_timeval(struct trace_proc *proc, const char *name, int flags,
+	vir_bytes addr);
+void put_time(struct trace_proc *proc, const char *name, time_t time);
+void put_groups(struct trace_proc * proc, const char * name, int flags,
+	vir_bytes addr, int count);
+
+/* output.c */
+int output_init(const char *file);
+int output_error(void);
+void output_flush(void);
+void record_start(struct trace_proc *proc);
+void record_stop(struct trace_proc *proc);
+void record_clear(struct trace_proc *proc);
+int record_replay(struct trace_proc *proc);
+void put_newline(void);
+void put_text(struct trace_proc *proc, const char *text);
+void put_space(struct trace_proc *proc);
+void put_align(struct trace_proc *proc);
+
+/* proc.c */
+void proc_init(void);
+struct trace_proc *proc_add(pid_t pid);
+struct trace_proc *proc_get(pid_t pid);
+void proc_del(struct trace_proc *proc);
+struct trace_proc *proc_next(struct trace_proc *last);
+unsigned int proc_count(void);
+
+/* signal.c */
+const char *get_signal_name(int sig);
+
+/* trace.c */
+extern int allnames;
+extern unsigned int verbose;
+extern unsigned int valuesonly;
+
+/* vfs.c */
+void put_fd(struct trace_proc *proc, const char *name, int fd);
+void put_dev(struct trace_proc *proc, const char *name, dev_t dev);
+
+/* service */
+const struct calls pm_calls;
+const struct calls vfs_calls;
+const struct calls rs_calls;
+const struct calls vm_calls;
+const struct calls ipc_calls;
+
+/* ioctl/block.c */
+const char *block_ioctl_name(unsigned long req);
+int block_ioctl_arg(struct trace_proc *proc, unsigned long req, void *ptr,
+	int dir);
+
+/* ioctl/char.c */
+const char *char_ioctl_name(unsigned long req);
+int char_ioctl_arg(struct trace_proc *proc, unsigned long req, void *ptr,
+	int dir);
+
+/* ioctl/net.c */
+const char *net_ioctl_name(unsigned long req);
+int net_ioctl_arg(struct trace_proc *proc, unsigned long req, void *ptr,
+	int dir);
+
+/* ioctl/svrctl.c */
+const char *svrctl_name(unsigned long req);
+int svrctl_arg(struct trace_proc *proc, unsigned long req, void *ptr, int dir);
diff --git a/minix/usr.bin/trace/service/ipc.c b/minix/usr.bin/trace/service/ipc.c
new file mode 100644
index 000000000..21368d950
--- /dev/null
+++ b/minix/usr.bin/trace/service/ipc.c
@@ -0,0 +1,445 @@
+/* This file is concerned with the IPC server, not with kernel-level IPC. */
+
+#include "inc.h"
+
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/sem.h>
+
+static void
+put_key(struct trace_proc * proc, const char * name, key_t key)
+{
+
+	if (!valuesonly && key == IPC_PRIVATE)
+		put_field(proc, name, "IPC_PRIVATE");
+	else
+		put_value(proc, name, "%ld", key);
+}
+
+static const struct flags ipcget_flags[] = {
+	FLAG(IPC_CREAT),
+	FLAG(IPC_EXCL),
+};
+
+static int
+ipc_shmget_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_key(proc, "key", m_out->m_lc_ipc_shmget.key);
+	put_value(proc, "size", "%zu", m_out->m_lc_ipc_shmget.size);
+	put_flags(proc, "shmflg", ipcget_flags, COUNT(ipcget_flags), "0%o",
+	    m_out->m_lc_ipc_shmget.flag);
+
+	return CT_DONE;
+}
+
+static void
+ipc_shmget_in(struct trace_proc * proc, const message * __unused m_out,
+	const message * m_in, int failed)
+{
+
+	if (!failed)
+		put_value(proc, NULL, "%d", m_in->m_lc_ipc_shmget.retid);
+	else
+		put_result(proc);
+}
+
+static const struct flags shmat_flags[] = {
+	FLAG(SHM_RDONLY),
+	FLAG(SHM_RND),
+};
+
+static int
+ipc_shmat_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_value(proc, "shmid", "%d", m_out->m_lc_ipc_shmat.id);
+	put_ptr(proc, "shmaddr", (vir_bytes)m_out->m_lc_ipc_shmat.addr);
+	put_flags(proc, "shmflg", shmat_flags, COUNT(shmat_flags), "0x%x",
+	    m_out->m_lc_ipc_shmat.flag);
+
+	return CT_DONE;
+}
+
+static void
+ipc_shmat_in(struct trace_proc * proc, const message * __unused m_out,
+	const message * m_in, int failed)
+{
+
+	if (!failed)
+		put_ptr(proc, NULL, (vir_bytes)m_in->m_lc_ipc_shmat.retaddr);
+	else
+		put_result(proc);
+}
+
+static int
+ipc_shmdt_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_ptr(proc, "shmaddr", (vir_bytes)m_out->m_lc_ipc_shmdt.addr);
+
+	return CT_DONE;
+}
+
+static void
+put_shmctl_cmd(struct trace_proc * proc, const char * name, int cmd)
+{
+	const char *text = NULL;
+
+	if (!valuesonly) {
+		switch (cmd) {
+		TEXT(IPC_RMID);
+		TEXT(IPC_SET);
+		TEXT(IPC_STAT);
+		TEXT(SHM_STAT);
+		TEXT(SHM_INFO);
+		TEXT(IPC_INFO);
+		}
+	}
+
+	if (text != NULL)
+		put_field(proc, name, text);
+	else
+		put_value(proc, name, "%d", cmd);
+}
+
+static const struct flags shm_mode_flags[] = {
+	FLAG(SHM_DEST),
+	FLAG(SHM_LOCKED),
+};
+
+static void
+put_struct_shmid_ds(struct trace_proc * proc, const char * name, int flags,
+	vir_bytes addr)
+{
+	struct shmid_ds buf;
+	int set;
+
+	if (!put_open_struct(proc, name, flags, addr, &buf, sizeof(buf)))
+		return;
+
+	/* Is this an IPC_SET call?  Then print a small subset of fields.. */
+	set = (flags & PF_ALT);
+
+	put_open(proc, "shm_perm", 0, "{", ", ");
+
+	put_value(proc, "uid", "%u", buf.shm_perm.uid);
+	put_value(proc, "gid", "%u", buf.shm_perm.gid);
+	if (!set && verbose > 0) {
+		put_value(proc, "cuid", "%u", buf.shm_perm.cuid);
+		put_value(proc, "cgid", "%u", buf.shm_perm.cgid);
+	}
+	put_flags(proc, "mode", shm_mode_flags, COUNT(shm_mode_flags),
+	    "0%03o", buf.shm_perm.mode);
+
+	put_close(proc, "}");
+
+	if (!set) {
+		put_value(proc, "shm_segsz", "%zu", buf.shm_segsz);
+		if (verbose > 0) {
+			put_value(proc, "shm_lpid", "%d", buf.shm_lpid);
+			put_value(proc, "shm_cpid", "%d", buf.shm_cpid);
+			put_time(proc, "shm_atime", buf.shm_atime);
+			put_time(proc, "shm_dtime", buf.shm_dtime);
+			put_time(proc, "shm_ctime", buf.shm_ctime);
+		}
+		put_value(proc, "shm_nattch", "%u", buf.shm_nattch);
+	}
+
+	put_close_struct(proc, set || verbose > 0);
+}
+
+static int
+ipc_shmctl_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_value(proc, "shmid", "%d", m_out->m_lc_ipc_shmctl.id);
+	put_shmctl_cmd(proc, "cmd", m_out->m_lc_ipc_shmctl.cmd);
+
+	/* TODO: add support for the IPC_INFO and SHM_INFO structures.. */
+	switch (m_out->m_lc_ipc_shmctl.cmd) {
+	case IPC_STAT:
+	case SHM_STAT:
+		return CT_NOTDONE;
+
+	case IPC_SET:
+		put_struct_shmid_ds(proc, "buf", PF_ALT,
+		    (vir_bytes)m_out->m_lc_ipc_shmctl.buf);
+
+		return CT_DONE;
+
+	default:
+		put_ptr(proc, "buf", (vir_bytes)m_out->m_lc_ipc_shmctl.buf);
+
+		return CT_DONE;
+	}
+}
+
+static void
+ipc_shmctl_in(struct trace_proc * proc, const message * m_out,
+	const message * m_in, int failed)
+{
+
+	switch (m_out->m_lc_ipc_shmctl.cmd) {
+	case IPC_STAT:
+	case SHM_STAT:
+		put_struct_shmid_ds(proc, "buf", failed,
+		    (vir_bytes)m_out->m_lc_ipc_shmctl.buf);
+		put_equals(proc);
+
+		break;
+	}
+
+	if (!failed) {
+		switch (m_out->m_lc_ipc_shmctl.cmd) {
+		case SHM_INFO:
+		case SHM_STAT:
+		case IPC_INFO:
+			put_value(proc, NULL, "%d", m_in->m_lc_ipc_shmctl.ret);
+
+			return;
+		}
+	}
+
+	put_result(proc);
+}
+
+static int
+ipc_semget_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_key(proc, "key", m_out->m_lc_ipc_semget.key);
+	put_value(proc, "nsems", "%d", m_out->m_lc_ipc_semget.nr);
+	put_flags(proc, "semflg", ipcget_flags, COUNT(ipcget_flags), "0%o",
+	    m_out->m_lc_ipc_semget.flag);
+
+	return CT_DONE;
+}
+
+static void
+ipc_semget_in(struct trace_proc * proc, const message * __unused m_out,
+	const message * m_in, int failed)
+{
+
+	if (!failed)
+		put_value(proc, NULL, "%d", m_in->m_lc_ipc_semget.retid);
+	else
+		put_result(proc);
+}
+
+static void
+put_semctl_cmd(struct trace_proc * proc, const char * name, int cmd)
+{
+	const char *text = NULL;
+
+	if (!valuesonly) {
+		switch (cmd) {
+		TEXT(IPC_RMID);
+		TEXT(IPC_SET);
+		TEXT(IPC_STAT);
+		TEXT(GETNCNT);
+		TEXT(GETPID);
+		TEXT(GETVAL);
+		TEXT(GETALL);
+		TEXT(GETZCNT);
+		TEXT(SETVAL);
+		TEXT(SETALL);
+		TEXT(SEM_STAT);
+		TEXT(SEM_INFO);
+		TEXT(IPC_INFO);
+		}
+	}
+
+	if (text != NULL)
+		put_field(proc, name, text);
+	else
+		put_value(proc, name, "%d", cmd);
+}
+
+static void
+put_struct_semid_ds(struct trace_proc * proc, const char * name, int flags,
+	vir_bytes addr)
+{
+	struct semid_ds buf;
+	int set;
+
+	if (!put_open_struct(proc, name, flags, addr, &buf, sizeof(buf)))
+		return;
+
+	/* Is this an IPC_SET call?  Then print a small subset of fields.. */
+	set = (flags & PF_ALT);
+
+	put_open(proc, "sem_perm", 0, "{", ", ");
+
+	put_value(proc, "uid", "%u", buf.sem_perm.uid);
+	put_value(proc, "gid", "%u", buf.sem_perm.gid);
+	if (!set && verbose > 0) {
+		put_value(proc, "cuid", "%u", buf.sem_perm.cuid);
+		put_value(proc, "cgid", "%u", buf.sem_perm.cgid);
+	}
+	put_value(proc, "mode", "0%03o", buf.sem_perm.mode);
+
+	put_close(proc, "}");
+
+	if (!set) {
+		if (verbose > 0) {
+			put_time(proc, "sem_otime", buf.sem_otime);
+			put_time(proc, "sem_ctime", buf.sem_ctime);
+		}
+		put_value(proc, "sem_nsems", "%u", buf.sem_nsems);
+	}
+
+	put_close_struct(proc, set || verbose > 0);
+}
+
+
+static int
+ipc_semctl_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_value(proc, "semid", "%d", m_out->m_lc_ipc_semctl.id);
+	put_value(proc, "semnum", "%d", m_out->m_lc_ipc_semctl.num);
+	put_semctl_cmd(proc, "cmd", m_out->m_lc_ipc_semctl.cmd);
+
+	/* TODO: add support for the IPC_INFO and SEM_INFO structures.. */
+	switch (m_out->m_lc_ipc_semctl.cmd) {
+	case IPC_STAT:
+	case SEM_STAT:
+		return CT_NOTDONE;
+
+	case IPC_SET:
+		put_struct_semid_ds(proc, "buf", PF_ALT,
+		    (vir_bytes)m_out->m_lc_ipc_semctl.opt);
+
+		return CT_DONE;
+
+	case IPC_INFO:
+	case SEM_INFO:
+		put_ptr(proc, "buf", (vir_bytes)m_out->m_lc_ipc_semctl.opt);
+
+		return CT_DONE;
+
+	case GETALL:
+	case SETALL:
+		put_ptr(proc, "array", (vir_bytes)m_out->m_lc_ipc_semctl.opt);
+
+		return CT_DONE;
+
+	case SETVAL:
+		put_value(proc, "val", "%d", m_out->m_lc_ipc_semctl.opt);
+
+		return CT_DONE;
+
+	default:
+		return CT_DONE;
+	}
+}
+
+static void
+ipc_semctl_in(struct trace_proc * proc, const message * m_out,
+	const message * m_in, int failed)
+{
+
+	switch (m_out->m_lc_ipc_semctl.cmd) {
+	case IPC_STAT:
+	case SEM_STAT:
+		put_struct_semid_ds(proc, "buf", failed,
+		    (vir_bytes)m_out->m_lc_ipc_semctl.opt);
+		put_equals(proc);
+
+		break;
+	}
+
+	if (!failed) {
+		switch (m_out->m_lc_ipc_semctl.cmd) {
+		case GETNCNT:
+		case GETPID:
+		case GETVAL:
+		case GETZCNT:
+		case SEM_INFO:
+		case SEM_STAT:
+		case IPC_INFO:
+			put_value(proc, NULL, "%d", m_in->m_lc_ipc_semctl.ret);
+			return;
+		}
+	}
+	put_result(proc);
+}
+
+static const struct flags sem_flags[] = {
+	FLAG(IPC_NOWAIT),
+	FLAG(SEM_UNDO),
+};
+
+static void
+put_struct_sembuf(struct trace_proc * proc, const char * name, int flags,
+	vir_bytes addr)
+{
+	struct sembuf buf;
+	int all;
+
+	if (!put_open_struct(proc, name, flags, addr, &buf, sizeof(buf)))
+		return;
+
+	all = FALSE;
+	put_value(proc, "sem_num", "%u", buf.sem_num);
+	put_value(proc, "sem_op", "%d", buf.sem_op);
+	if (verbose > 0 || (buf.sem_flg & ~SEM_UNDO) != 0) {
+		put_flags(proc, "sem_flg", sem_flags, COUNT(sem_flags), "0x%x",
+		   buf.sem_flg);
+		all = TRUE;
+	}
+
+	put_close_struct(proc, all);
+}
+
+static void
+put_sembuf_array(struct trace_proc * proc, const char * name, vir_bytes addr,
+	size_t count)
+{
+	struct sembuf buf[SEMOPM]; /* about 600 bytes, so OK for the stack */
+	size_t i;
+
+	if (valuesonly > 1 || count > SEMOPM ||
+	    mem_get_data(proc->pid, addr, &buf, count * sizeof(buf[0])) != 0) {
+		put_ptr(proc, name, addr);
+
+		return;
+	}
+
+	put_open(proc, name, PF_NONAME, "[", ", ");
+	for (i = 0; i < count; i++)
+		put_struct_sembuf(proc, NULL, PF_LOCADDR, (vir_bytes)&buf[i]);
+	put_close(proc, "]");
+}
+
+static int
+ipc_semop_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_value(proc, "semid", "%d", m_out->m_lc_ipc_semop.id);
+	put_sembuf_array(proc, "sops", (vir_bytes)m_out->m_lc_ipc_semop.ops,
+	    m_out->m_lc_ipc_semop.size);
+	put_value(proc, "nsops", "%zu", m_out->m_lc_ipc_semop.size);
+
+	return CT_DONE;
+}
+
+#define IPC_CALL(c) [((IPC_ ## c) - IPC_BASE)]
+
+static const struct call_handler ipc_map[] = {
+	IPC_CALL(SHMGET) = HANDLER("shmget", ipc_shmget_out, ipc_shmget_in),
+	IPC_CALL(SHMAT) = HANDLER("shmat", ipc_shmat_out, ipc_shmat_in),
+	IPC_CALL(SHMDT) = HANDLER("shmdt", ipc_shmdt_out, default_in),
+	IPC_CALL(SHMCTL) = HANDLER("shmctl", ipc_shmctl_out, ipc_shmctl_in),
+	IPC_CALL(SEMGET) = HANDLER("semget", ipc_semget_out, ipc_semget_in),
+	IPC_CALL(SEMCTL) = HANDLER("semctl", ipc_semctl_out, ipc_semctl_in),
+	IPC_CALL(SEMOP) = HANDLER("semop", ipc_semop_out, default_in),
+};
+
+const struct calls ipc_calls = {
+	.endpt = ANY,
+	.base = IPC_BASE,
+	.map = ipc_map,
+	.count = COUNT(ipc_map)
+};
diff --git a/minix/usr.bin/trace/service/pm.c b/minix/usr.bin/trace/service/pm.c
new file mode 100644
index 000000000..15611235e
--- /dev/null
+++ b/minix/usr.bin/trace/service/pm.c
@@ -0,0 +1,1396 @@
+
+#include "inc.h"
+
+#include <signal.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/resource.h>
+#include <sys/utsname.h>
+#include <sys/reboot.h>
+#include <minix/profile.h>
+
+static int
+pm_exit_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_value(proc, "status", "%d", m_out->m_lc_pm_exit.status);
+
+	return CT_NORETURN;
+}
+
+static const struct flags waitpid_options[] = {
+	FLAG(WNOHANG),
+	FLAG(WUNTRACED),
+	FLAG(WALTSIG),
+	FLAG(WALLSIG),
+	FLAG(WNOWAIT),
+	FLAG(WNOZOMBIE),
+	FLAG(WOPTSCHECKED),
+};
+
+static void
+put_waitpid_status(struct trace_proc * proc, const char * name, int status)
+{
+	const char *signame;
+	int sig;
+
+	/*
+	 * There is no suitable set of macros to be used here, so we're going
+	 * to invent our own: W_EXITED, W_SIGNALED, and W_STOPPED.  Hopefully
+	 * they are sufficiently clear even though they don't actually exist.
+	 * The code below is downright messy, but it also ensures that no bits
+	 * are set unexpectedly in the status.
+	 */
+	if (!valuesonly && WIFEXITED(status) &&
+	    status == W_EXITCODE(WEXITSTATUS(status), 0)) {
+		put_value(proc, name, "W_EXITED(%d)",
+		    WEXITSTATUS(status));
+
+		return;
+	}
+
+	/* WCOREDUMP() actually returns WCOREFLAG or 0, but better safe.. */
+	if (!valuesonly && WIFSIGNALED(status) && status == (W_EXITCODE(0,
+	    WTERMSIG(status)) | (WCOREDUMP(status) ? WCOREFLAG : 0))) {
+		sig = WTERMSIG(status);
+
+		if ((signame = get_signal_name(sig)) != NULL)
+			put_value(proc, name, "W_SIGNALED(%s)", signame);
+		else
+			put_value(proc, name, "W_SIGNALED(%u)", sig);
+
+		if (WCOREDUMP(status))
+			put_text(proc, "|WCOREDUMP");
+
+		return;
+	}
+
+	if (!valuesonly && WIFSTOPPED(status) &&
+	    status == W_STOPCODE(WSTOPSIG(status))) {
+		sig = WSTOPSIG(status);
+
+		if ((signame = get_signal_name(sig)) != NULL)
+			put_value(proc, name, "W_STOPPED(%s)", signame);
+		else
+			put_value(proc, name, "W_STOPPED(%u)", sig);
+
+		return;
+	}
+
+	/*
+	 * If we get here, either valuesonly is enabled or the resulting status
+	 * is not one we recognize, for example because extra bits are set.
+	 */
+	put_value(proc, name, "0x%04x", status);
+}
+
+static int
+pm_waitpid_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_value(proc, "pid", "%d", m_out->m_lc_pm_waitpid.pid);
+
+	return CT_NOTDONE;
+}
+
+static void
+pm_waitpid_in(struct trace_proc * proc, const message * m_out,
+	const message * m_in, int failed)
+{
+
+	/*
+	 * If the result is zero, there is no status to show.  Also, since the
+	 * status is returned in the result message, we cannot print the user-
+	 * given pointer.  Instead, upon failure we show "&.." to indicate an
+	 * unknown pointer.
+	 */
+	if (!failed && m_in->m_type > 0)
+		put_waitpid_status(proc, "status",
+		    m_in->m_pm_lc_waitpid.status);
+	else
+		put_field(proc, "status", "&..");
+	put_flags(proc, "options", waitpid_options, COUNT(waitpid_options),
+	    "0x%x", m_out->m_lc_pm_waitpid.options);
+	put_equals(proc);
+	put_result(proc);
+}
+
+static void
+pm_getpid_in(struct trace_proc * proc, const message * __unused m_out,
+	const message * m_in, int failed)
+{
+
+	put_result(proc);
+	if (!failed) {
+		put_open(proc, NULL, 0, "(", ", ");
+		put_value(proc, "ppid", "%d", m_in->m_pm_lc_getpid.parent_pid);
+		put_close(proc, ")");
+	}
+}
+
+/* This function is shared between setuid and seteuid. */
+static int
+pm_setuid_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_value(proc, "uid", "%u", m_out->m_lc_pm_setuid.uid);
+
+	return CT_DONE;
+}
+
+static void
+pm_getuid_in(struct trace_proc * proc, const message * __unused m_out,
+	const message * m_in, int failed)
+{
+
+	put_result(proc);
+	if (!failed) {
+		put_open(proc, NULL, 0, "(", ", ");
+		put_value(proc, "euid", "%u", m_in->m_pm_lc_getuid.euid);
+		put_close(proc, ")");
+	}
+}
+
+static int
+pm_stime_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_time(proc, "time", m_out->m_lc_pm_time.sec);
+
+	return CT_DONE;
+}
+
+static void
+put_signal(struct trace_proc * proc, const char * name, int sig)
+{
+	const char *signame;
+
+	if (!valuesonly && (signame = get_signal_name(sig)) != NULL)
+		put_field(proc, name, signame);
+	else
+		put_value(proc, name, "%d", sig);
+}
+
+static void
+put_ptrace_req(struct trace_proc * proc, const char * name, int req)
+{
+	const char *text = NULL;
+
+	if (!valuesonly) {
+		switch (req) {
+		TEXT(T_STOP);
+		TEXT(T_OK);
+		TEXT(T_ATTACH);
+		TEXT(T_DETACH);
+		TEXT(T_RESUME);
+		TEXT(T_STEP);
+		TEXT(T_SYSCALL);
+		TEXT(T_EXIT);
+		TEXT(T_GETINS);
+		TEXT(T_GETDATA);
+		TEXT(T_GETUSER);
+		TEXT(T_SETINS);
+		TEXT(T_SETDATA);
+		TEXT(T_SETUSER);
+		TEXT(T_SETOPT);
+		TEXT(T_GETRANGE);
+		TEXT(T_SETRANGE);
+		TEXT(T_READB_INS);
+		TEXT(T_WRITEB_INS);
+		}
+	}
+
+	if (text != NULL)
+		put_field(proc, name, text);
+	else
+		put_value(proc, name, "%d", req);
+}
+
+static void
+put_struct_ptrace_range(struct trace_proc * proc, const char * name, int flags,
+	vir_bytes addr)
+{
+	struct ptrace_range pr;
+
+	if (!put_open_struct(proc, name, flags, addr, &pr, sizeof(pr)))
+		return;
+
+	if (!valuesonly && pr.pr_space == TS_INS)
+		put_field(proc, "pr_space", "TS_INS");
+	else if (!valuesonly && pr.pr_space == TS_DATA)
+		put_field(proc, "pr_space", "TS_DATA");
+	else
+		put_value(proc, "pr_space", "%d", pr.pr_space);
+	put_value(proc, "pr_addr", "0x%lx", pr.pr_addr);
+	put_ptr(proc, "pr_ptr", (vir_bytes)pr.pr_ptr);
+	put_value(proc, "pr_size", "%zu", pr.pr_size);
+
+	put_close_struct(proc, TRUE /*all*/);
+}
+
+static int
+pm_ptrace_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_ptrace_req(proc, "req", m_out->m_lc_pm_ptrace.req);
+	put_value(proc, "pid", "%d", m_out->m_lc_pm_ptrace.pid);
+
+	switch (m_out->m_lc_pm_ptrace.req) {
+	case T_GETINS:
+	case T_GETDATA:
+	case T_GETUSER:
+	case T_READB_INS:
+		put_value(proc, "addr", "0x%lx", m_out->m_lc_pm_ptrace.addr);
+		put_value(proc, "data", "%ld", m_out->m_lc_pm_ptrace.data);
+		break;
+	case T_SETINS:
+	case T_SETDATA:
+	case T_SETUSER:
+	case T_WRITEB_INS:
+		put_value(proc, "addr", "0x%lx", m_out->m_lc_pm_ptrace.addr);
+		put_value(proc, "data", "0x%lx", m_out->m_lc_pm_ptrace.data);
+		break;
+	case T_RESUME:
+	case T_STEP:
+	case T_SYSCALL:
+		put_value(proc, "addr", "%ld", m_out->m_lc_pm_ptrace.addr);
+		put_signal(proc, "data", m_out->m_lc_pm_ptrace.data);
+		break;
+	case T_GETRANGE:
+	case T_SETRANGE:
+		put_struct_ptrace_range(proc, "addr", 0,
+		    m_out->m_lc_pm_ptrace.addr);
+		put_value(proc, "data", "%ld", m_out->m_lc_pm_ptrace.data);
+		break;
+	default:
+		put_value(proc, "addr", "%ld", m_out->m_lc_pm_ptrace.addr);
+		put_value(proc, "data", "%ld", m_out->m_lc_pm_ptrace.data);
+		break;
+	}
+
+	return CT_DONE;
+}
+
+static void
+pm_ptrace_in(struct trace_proc * proc, const message * m_out,
+	const message * m_in, int failed)
+{
+
+	if (!failed) {
+		switch (m_out->m_lc_pm_ptrace.req) {
+		case T_GETINS:
+		case T_GETDATA:
+		case T_GETUSER:
+		case T_READB_INS:
+			put_value(proc, NULL, "0x%lx",
+			    m_in->m_pm_lc_ptrace.data);
+			return;
+		}
+	}
+
+	put_result(proc);
+}
+
+void
+put_groups(struct trace_proc * proc, const char * name, int flags,
+	vir_bytes addr, int count)
+{
+	gid_t groups[NGROUPS_MAX];
+	int i;
+
+	if ((flags & PF_FAILED) || valuesonly || count < 0 ||
+	    count > NGROUPS_MAX || (count > 0 && mem_get_data(proc->pid, addr,
+	    groups, count * sizeof(groups[0])) < 0)) {
+		if (flags & PF_LOCADDR)
+			put_field(proc, name, "&..");
+		else
+			put_ptr(proc, name, addr);
+
+		return;
+	}
+
+	put_open(proc, name, PF_NONAME, "[", ", ");
+	for (i = 0; i < count; i++)
+		put_value(proc, NULL, "%u", groups[i]);
+	put_close(proc, "]");
+}
+
+static int
+pm_setgroups_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_value(proc, "ngroups", "%d", m_out->m_lc_pm_groups.num);
+	put_groups(proc, "grouplist", 0, m_out->m_lc_pm_groups.ptr,
+	    m_out->m_lc_pm_groups.num);
+
+	return CT_DONE;
+}
+
+static int
+pm_getgroups_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_value(proc, "ngroups", "%d", m_out->m_lc_pm_groups.num);
+
+	return CT_NOTDONE;
+}
+
+static void
+pm_getgroups_in(struct trace_proc * proc, const message * m_out,
+	const message * m_in, int failed)
+{
+
+	put_groups(proc, "grouplist", failed, m_out->m_lc_pm_groups.ptr,
+	    m_in->m_type);
+	put_equals(proc);
+	put_result(proc);
+}
+
+static int
+pm_kill_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_value(proc, "pid", "%d", m_out->m_lc_pm_sig.pid);
+	put_signal(proc, "sig", m_out->m_lc_pm_sig.nr);
+
+	return CT_DONE;
+}
+
+/* This function is shared between setgid and setegid. */
+static int
+pm_setgid_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_value(proc, "gid", "%u", m_out->m_lc_pm_setgid.gid);
+
+	return CT_DONE;
+}
+
+static void
+pm_getgid_in(struct trace_proc * proc, const message * __unused m_out,
+	const message * m_in, int failed)
+{
+
+	put_result(proc);
+	if (!failed) {
+		put_open(proc, NULL, 0, "(", ", ");
+		put_value(proc, "egid", "%u", m_in->m_pm_lc_getgid.egid);
+		put_close(proc, ")");
+	}
+}
+
+static int
+put_frame_string(struct trace_proc * proc, vir_bytes frame, size_t len,
+	vir_bytes addr)
+{
+	vir_bytes stacktop, offset;
+
+	/*
+	 * The addresses in the frame assume that the process has already been
+	 * changed, and the top of the frame is now located at the new process
+	 * stack top, which is a hardcoded system-global value.  In order to
+	 * print the strings, we must convert back each address to its location
+	 * within the given frame.
+	 */
+	stacktop = kernel_get_stacktop();
+
+	if (addr >= stacktop)
+		return FALSE;
+	offset = stacktop - addr;
+	if (offset >= len)
+		return FALSE;
+	addr = frame + len - offset;
+
+	/*
+	 * TODO: while using put_buf() is highly convenient, it does require at
+	 * least one copy operation per printed string.  The strings are very
+	 * likely to be consecutive in memory, so copying in larger chunks at
+	 * once would be preferable.  Also, if copying from the frame fails,
+	 * put_buf() will print the string address as we corrected it above,
+	 * rather than the address as found in the frame.  A copy failure would
+	 * always be a case of malice on the traced process's behalf, though.
+	 */
+	put_buf(proc, NULL, PF_STRING, addr, len - offset);
+
+	return TRUE;
+}
+
+/*
+ * Print the contents of the exec frame, which includes both pointers and
+ * actual string data for the arguments and environment variables to be used.
+ * Even though we know that the entire frame is not going to exceed ARG_MAX
+ * bytes, this is too large a size for a static buffer, and we'd like to avoid
+ * allocating large dynamic buffers as well.  The situation is complicated by
+ * the fact that any string in the frame may run up to the end of the frame.
+ */
+static void
+put_exec_frame(struct trace_proc * proc, vir_bytes addr, size_t len)
+{
+	void *argv[64];
+	size_t off, chunk;
+	unsigned int i, count, max, argv_max, envp_max;
+	int first, ok, nulls;
+
+	if (valuesonly) {
+		put_ptr(proc, "frame", addr);
+		put_value(proc, "framelen", "%zu", len);
+
+		return;
+	}
+
+	if (verbose == 0) {
+		argv_max = 16;
+		envp_max = 0;
+	} else if (verbose == 1)
+		argv_max = envp_max = 64;
+	else
+		argv_max = envp_max = INT_MAX;
+
+	off = sizeof(int); /* skip 'argc' at the start of the frame */
+	first = TRUE;
+	ok = TRUE;
+	nulls = 0;
+	count = 0;
+	max = argv_max;
+
+	do {
+		chunk = sizeof(argv);
+		if (chunk > len - off)
+			chunk = len - off;
+
+		if (mem_get_data(proc->pid, addr + off, argv, chunk) != 0)
+			break;
+
+		if (first) {
+			put_open(proc, "argv", PF_NONAME, "[", ", ");
+
+			first = FALSE;
+		}
+
+		for (i = 0; i < chunk / sizeof(void *) && ok; i++) {
+			if (argv[i] == NULL) {
+				if (count > max)
+					put_tail(proc, count, max);
+				put_close(proc, "]");
+				if (nulls++ == 0) {
+					put_open(proc, "envp", PF_NONAME, "[",
+					    ", ");
+					count = 0;
+					max = envp_max;
+				} else
+					break; /* two NULL pointers: done! */
+			} else if (count++ < max)
+				ok = put_frame_string(proc, addr, len,
+				    (vir_bytes)argv[i]);
+		}
+
+		off += chunk;
+	} while (nulls < 2 && ok);
+
+	/*
+	 * Handle failure cases, implied by not reaching the second NULL
+	 * in the array.  Successful completion is handled in the loop above.
+	 * Note that 'ok' is not always cleared on failure, as it is used only
+	 * to break out of the outer loop.
+	 */
+	if (first) {
+		put_ptr(proc, "argv", addr + off);
+		put_field(proc, "envp", "&..");
+	} else if (nulls < 2) {
+		put_tail(proc, 0, 0);
+		put_close(proc, "]");
+		if (nulls < 1) {
+			put_open(proc, "envp", PF_NONAME, "[", ", ");
+			put_tail(proc, 0, 0);
+			put_close(proc, "]");
+		}
+	}
+}
+
+static int
+pm_exec_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_buf(proc, "path", PF_PATH, m_out->m_lc_pm_exec.name,
+	    m_out->m_lc_pm_exec.namelen);
+	put_exec_frame(proc, m_out->m_lc_pm_exec.frame,
+	    m_out->m_lc_pm_exec.framelen);
+
+	return CT_NORETURN;
+}
+
+/* The idea is that this function may one day print a human-readable time. */
+void
+put_time(struct trace_proc * proc, const char * name, time_t time)
+{
+
+	put_value(proc, name, "%"PRId64, time);
+}
+
+void
+put_struct_timeval(struct trace_proc * proc, const char * name, int flags,
+	vir_bytes addr)
+{
+	struct timeval tv;
+
+	/* No field names; they just make things harder to read. */
+	if (!put_open_struct(proc, name, flags | PF_NONAME, addr, &tv,
+	    sizeof(tv)))
+		return;
+
+	if (flags & PF_ALT)
+		put_time(proc, "tv_sec", tv.tv_sec);
+	else
+		put_value(proc, "tv_sec", "%"PRId64, tv.tv_sec);
+	put_value(proc, "tv_usec", "%d", tv.tv_usec);
+
+	put_close_struct(proc, TRUE /*all*/);
+}
+
+static void
+put_struct_itimerval(struct trace_proc * proc, const char * name, int flags,
+	vir_bytes addr)
+{
+	struct itimerval it;
+
+	/*
+	 * This used to pass PF_NONAME, but the layout may not be clear enough
+	 * without names.  It does turn simple alarm(1) calls into rather
+	 * lengthy output, though.
+	 */
+	if (!put_open_struct(proc, name, flags, addr, &it, sizeof(it)))
+		return;
+
+	put_struct_timeval(proc, "it_interval", PF_LOCADDR,
+	    (vir_bytes)&it.it_interval);
+	put_struct_timeval(proc, "it_value", PF_LOCADDR,
+	    (vir_bytes)&it.it_value);
+
+	put_close_struct(proc, TRUE /*all*/);
+}
+
+static void
+put_itimer_which(struct trace_proc * proc, const char * name, int which)
+{
+	const char *text = NULL;
+
+	if (!valuesonly) {
+		switch (which) {
+		TEXT(ITIMER_REAL);
+		TEXT(ITIMER_VIRTUAL);
+		TEXT(ITIMER_PROF);
+		TEXT(ITIMER_MONOTONIC);
+		}
+	}
+
+	if (text != NULL)
+		put_field(proc, name, text);
+	else
+		put_value(proc, name, "%d", which);
+}
+
+static const char *
+pm_itimer_name(const message * m_out)
+{
+
+	return (m_out->m_lc_pm_itimer.value != 0) ? "setitimer" : "getitimer";
+}
+
+static int
+pm_itimer_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_itimer_which(proc, "which", m_out->m_lc_pm_itimer.which);
+	if (m_out->m_lc_pm_itimer.value != 0) {
+		put_struct_itimerval(proc, "value", 0,
+		    m_out->m_lc_pm_itimer.value);
+
+		/*
+		 * If there will be no old values to print, finish the call
+		 * now.  For setitimer only; getitimer may not pass NULL.
+		 */
+		if (m_out->m_lc_pm_itimer.ovalue == 0) {
+			put_ptr(proc, "ovalue", 0);
+
+			return CT_DONE;
+		}
+	}
+
+	return CT_NOTDONE;
+}
+
+static void
+pm_itimer_in(struct trace_proc * proc, const message * m_out,
+	const message * __unused m_in, int failed)
+{
+
+	if (m_out->m_lc_pm_itimer.value == 0 ||
+	    m_out->m_lc_pm_itimer.ovalue != 0) {
+		put_struct_itimerval(proc,
+		    (m_out->m_lc_pm_itimer.value != 0) ? "ovalue" : "value",
+		    failed, m_out->m_lc_pm_itimer.ovalue);
+		put_equals(proc);
+	}
+	put_result(proc);
+}
+
+static void
+put_struct_mcontext(struct trace_proc * proc, const char * name, int flags,
+	vir_bytes addr)
+{
+	mcontext_t ctx;
+
+	if (!put_open_struct(proc, name, flags, addr, &ctx, sizeof(ctx)))
+		return;
+
+	/*
+	 * TODO: print actual fields.  Then again, the ones that are saved and
+	 * restored (FPU state) are hardly interesting enough to print..
+	 */
+
+	put_close_struct(proc, FALSE /*all*/);
+}
+
+static int
+pm_getmcontext_out(struct trace_proc * proc, const message * m_out)
+{
+
+	return CT_NOTDONE;
+}
+
+static void
+pm_getmcontext_in(struct trace_proc * proc, const message * m_out,
+	const message * m_in, int failed)
+{
+
+	put_struct_mcontext(proc, "mcp", failed, m_out->m_lc_pm_mcontext.ctx);
+	put_equals(proc);
+	put_result(proc);
+}
+
+static int
+pm_setmcontext_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_struct_mcontext(proc, "mcp", 0, m_out->m_lc_pm_mcontext.ctx);
+
+	return CT_DONE;
+}
+
+static void
+put_sigset(struct trace_proc * proc, const char * name, sigset_t set)
+{
+	const char *signame;
+	unsigned int count, unknown;
+	int sig, invert;
+
+	/*
+	 * First decide whether we should print a normal or an inverted mask.
+	 * Unfortunately, depending on the place, a filled set may or may not
+	 * have bits outside the 1..NSIG range set.  Therefore, we ignore the
+	 * bits outside this range entirely, and use simple heuristics to
+	 * decide whether to show an inverted set.  If we know all the signal
+	 * names for either set and not the other, show that one; otherwise,
+	 * show an inverted mask if at least 3/4th of the bits are set.
+	 */
+	count = 0;
+	unknown = 0;
+	for (sig = 1; sig < NSIG; sig++) {
+		if (sigismember(&set, sig))
+			count++;
+		if (get_signal_name(sig) == NULL)
+			unknown |= 1 << !!sigismember(&set, sig);
+	}
+	if (unknown == 1 /*for unset bit*/ || unknown == 2 /*for set bit*/)
+		invert = unknown - 1;
+	else
+		invert = (count >= (NSIG - 1) * 3 / 4);
+
+	put_open(proc, name, PF_NONAME, invert ? "~[" : "[", " ");
+
+	for (sig = 1; sig < NSIG; sig++) {
+		/* Note that sigismember() may not strictly return 0 or 1.. */
+		if (!sigismember(&set, sig) != invert)
+			continue;
+
+		if ((signame = get_signal_name(sig)) != NULL) {
+			/* Skip the "SIG" prefix for brevity. */
+			if (!strncmp(signame, "SIG", 3))
+				put_field(proc, NULL, &signame[3]);
+			else
+				put_field(proc, NULL, signame);
+		} else
+			put_value(proc, NULL, "%d", sig);
+	}
+
+	put_close(proc, "]");
+}
+
+static const struct flags sa_flags[] = {
+	FLAG(SA_ONSTACK),
+	FLAG(SA_RESTART),
+	FLAG(SA_RESETHAND),
+	FLAG(SA_NODEFER),
+	FLAG(SA_NOCLDSTOP),
+	FLAG(SA_NOCLDWAIT),
+#ifdef SA_SIGINFO
+	FLAG(SA_SIGINFO),
+#endif
+	FLAG(SA_NOKERNINFO)
+};
+
+static void
+put_sa_handler(struct trace_proc * proc, const char * name, vir_bytes handler)
+{
+	const char *text = NULL;
+
+	if (!valuesonly) {
+		switch ((int)handler) {
+		case (int)SIG_DFL: text = "SIG_DFL"; break;
+		case (int)SIG_IGN: text = "SIG_IGN"; break;
+		case (int)SIG_HOLD: text = "SIG_HOLD"; break;
+		}
+	}
+
+	if (text != NULL)
+		put_field(proc, name, text);
+	else
+		put_ptr(proc, name, handler);
+}
+
+static void
+put_struct_sigaction(struct trace_proc * proc, const char * name, int flags,
+	vir_bytes addr)
+{
+	struct sigaction sa;
+
+	if (!put_open_struct(proc, name, flags, addr, &sa, sizeof(sa)))
+		return;
+
+	put_sa_handler(proc, "sa_handler", (vir_bytes)sa.sa_handler);
+
+	if (verbose > 1)
+		put_sigset(proc, "sa_mask", sa.sa_mask);
+
+	/* A somewhat lame attempt to reduce noise a bit. */
+	if ((sa.sa_flags & ~(SA_ONSTACK | SA_RESTART | SA_RESETHAND |
+	    SA_NODEFER)) != 0 || sa.sa_handler != SIG_DFL || verbose > 0)
+		put_flags(proc, "sa_flags", sa_flags, COUNT(sa_flags), "0x%x",
+		    sa.sa_flags);
+
+	put_close_struct(proc, verbose > 1);
+}
+
+static int
+pm_sigaction_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_signal(proc, "signal", m_out->m_lc_pm_sig.nr);
+	put_struct_sigaction(proc, "act", 0, m_out->m_lc_pm_sig.act);
+
+	/* If there will be no old values to print, finish the call now. */
+	if (m_out->m_lc_pm_sig.oact == 0) {
+		put_ptr(proc, "oact", 0);
+		return CT_DONE;
+	} else
+		return CT_NOTDONE;
+}
+
+static void
+pm_sigaction_in(struct trace_proc * proc, const message * m_out,
+	const message * __unused m_in, int failed)
+{
+
+	if (m_out->m_lc_pm_sig.oact != 0) {
+		put_struct_sigaction(proc, "oact", failed,
+		    m_out->m_lc_pm_sig.oact);
+		put_equals(proc);
+	}
+	put_result(proc);
+}
+
+static int
+pm_sigsuspend_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_sigset(proc, "set", m_out->m_lc_pm_sigset.set);
+
+	return CT_DONE;
+}
+
+static int
+pm_sigpending_out(struct trace_proc * __unused proc,
+	const message * __unused m_out)
+{
+
+	return CT_NOTDONE;
+}
+
+static void
+pm_sigpending_in(struct trace_proc * proc, const message * __unused m_out,
+	const message * m_in, int failed)
+{
+
+	if (!failed)
+		put_sigset(proc, "set", m_in->m_pm_lc_sigset.set);
+	else
+		put_field(proc, "set", "&..");
+	put_equals(proc);
+	put_result(proc);
+}
+
+static void
+put_sigprocmask_how(struct trace_proc * proc, const char * name, int how)
+{
+	const char *text = NULL;
+
+	if (!valuesonly) {
+		switch (how) {
+		case SIG_INQUIRE: /* pseudocode, print something else */
+		TEXT(SIG_BLOCK);
+		TEXT(SIG_UNBLOCK);
+		TEXT(SIG_SETMASK);
+		}
+	}
+
+	if (text != NULL)
+		put_field(proc, name, text);
+	else
+		put_value(proc, name, "%d", how);
+}
+
+static int
+pm_sigprocmask_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_sigprocmask_how(proc, "how", m_out->m_lc_pm_sigset.how);
+	if (m_out->m_lc_pm_sigset.how == SIG_INQUIRE)
+		put_ptr(proc, "set", 0);
+	else
+		put_sigset(proc, "set", m_out->m_lc_pm_sigset.set);
+
+	return CT_NOTDONE;
+}
+
+static void
+pm_sigprocmask_in(struct trace_proc * proc, const message * __unused m_out,
+	const message * m_in, int failed)
+{
+
+	if (!failed)
+		put_sigset(proc, "oset", m_in->m_pm_lc_sigset.set);
+	else
+		put_field(proc, "oset", "&..");
+	put_equals(proc);
+	put_result(proc);
+}
+
+static int
+pm_sigreturn_out(struct trace_proc * proc, const message * m_out)
+{
+	struct sigcontext scp;
+
+	if (put_open_struct(proc, "scp", 0, m_out->m_lc_pm_sigset.ctx, &scp,
+	    sizeof(scp))) {
+		if (verbose == 1) {
+#if defined(__i386__)
+			put_ptr(proc, "sc_eip", scp.sc_eip);
+			put_ptr(proc, "sc_esp", scp.sc_esp);
+#elif defined(__arm__)
+			put_ptr(proc, "sc_pc", scp.sc_pc);
+			put_ptr(proc, "sc_usr_sp", scp.sc_usr_sp);
+#endif
+		}
+
+		/*
+		 * We deliberately print the signal set from the message rather
+		 * than from the structure, since in theory they may be
+		 * different and PM uses the one from the message only.
+		 */
+		put_sigset(proc, "sc_mask", m_out->m_lc_pm_sigset.set);
+
+		/*
+		 * TODO: print some other fields, although it is probably not
+		 * useful to print all registers even with verbose > 1?
+		 */
+		put_close_struct(proc, FALSE /*all*/);
+	}
+
+	return CT_NORETURN;
+}
+
+static void
+pm_sigreturn_in(struct trace_proc * proc, const message * __unused m_out,
+	const message * __unused m_in, int failed)
+{
+
+	if (failed) {
+		put_equals(proc);
+		put_result(proc);
+	}
+}
+
+static void
+put_sysuname_field(struct trace_proc * proc, const char * name, int field)
+{
+	const char *text = NULL;
+
+	if (!valuesonly) {
+		switch (field) {
+		TEXT(_UTS_ARCH);
+		TEXT(_UTS_KERNEL);
+		TEXT(_UTS_MACHINE);
+		TEXT(_UTS_HOSTNAME);
+		TEXT(_UTS_NODENAME);
+		TEXT(_UTS_RELEASE);
+		TEXT(_UTS_VERSION);
+		TEXT(_UTS_SYSNAME);
+		TEXT(_UTS_BUS);
+		}
+	}
+
+	if (text != NULL)
+		put_field(proc, name, text);
+	else
+		put_value(proc, name, "%d", field);
+}
+
+static int
+pm_sysuname_out(struct trace_proc * proc, const message * m_out)
+{
+
+	if (!valuesonly && m_out->m_lc_pm_sysuname.req == _UTS_GET)
+		put_field(proc, "req", "_UTS_GET");
+	else if (!valuesonly && m_out->m_lc_pm_sysuname.req == _UTS_SET)
+		put_field(proc, "req", "_UTS_SET");
+	else
+		put_value(proc, "req", "%d", m_out->m_lc_pm_sysuname.req);
+	put_sysuname_field(proc, "field", m_out->m_lc_pm_sysuname.field);
+
+	if (m_out->m_lc_pm_sysuname.req == _UTS_GET)
+		return CT_NOTDONE;
+
+	put_buf(proc, "value", PF_STRING, m_out->m_lc_pm_sysuname.value,
+	    m_out->m_lc_pm_sysuname.len);
+	put_value(proc, "len", "%d", m_out->m_lc_pm_sysuname.len);
+	return CT_DONE;
+}
+
+static void
+pm_sysuname_in(struct trace_proc * proc, const message * m_out,
+	const message * m_in, int failed)
+{
+
+	if (m_out->m_lc_pm_sysuname.req == _UTS_GET) {
+		put_buf(proc, "value", failed | PF_STRING,
+		    m_out->m_lc_pm_sysuname.value, m_in->m_type);
+		put_value(proc, "len", "%d", m_out->m_lc_pm_sysuname.len);
+		put_equals(proc);
+	}
+	put_result(proc);
+}
+
+static void
+put_priority_which(struct trace_proc * proc, const char * name, int which)
+{
+	const char *text = NULL;
+
+	if (!valuesonly) {
+		switch (which) {
+		TEXT(PRIO_PROCESS);
+		TEXT(PRIO_PGRP);
+		TEXT(PRIO_USER);
+		}
+	}
+
+	if (text != NULL)
+		put_field(proc, name, text);
+	else
+		put_value(proc, name, "%d", which);
+}
+
+static int
+pm_getpriority_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_priority_which(proc, "which", m_out->m_lc_pm_priority.which);
+	put_value(proc, "who", "%d", m_out->m_lc_pm_priority.who);
+
+	return CT_DONE;
+}
+
+static void
+pm_getpriority_in(struct trace_proc * proc, const message * __unused m_out,
+	const message * m_in, int failed)
+{
+
+	if (!failed)
+		put_value(proc, NULL, "%d", m_in->m_type + PRIO_MIN);
+	else
+		put_result(proc);
+}
+
+static int
+pm_setpriority_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_priority_which(proc, "which", m_out->m_lc_pm_priority.which);
+	put_value(proc, "who", "%d", m_out->m_lc_pm_priority.who);
+	put_value(proc, "prio", "%d", m_out->m_lc_pm_priority.prio);
+
+	return CT_DONE;
+}
+
+static int
+pm_gettimeofday_out(struct trace_proc * __unused proc,
+	const message * __unused m_out)
+{
+
+	return CT_NOTDONE;
+}
+
+static void
+put_timespec_as_timeval(struct trace_proc * proc, const char * name,
+	time_t sec, long nsec)
+{
+
+	/* No field names within the structure. */
+	put_open(proc, name, PF_NONAME, "{", ", ");
+
+	put_time(proc, "tv_sec", sec);
+	put_value(proc, "tv_usec", "%ld", nsec / 1000);
+
+	put_close(proc, "}");
+}
+
+static void
+pm_gettimeofday_in(struct trace_proc * proc, const message * __unused m_out,
+	const message * m_in, int failed)
+{
+
+	if (!failed) {
+		/*
+		 * The system call returns values which do not match the call
+		 * being made, so just like libc, we have to correct..
+		 */
+		put_timespec_as_timeval(proc, "tp", m_in->m_pm_lc_time.sec,
+		    m_in->m_pm_lc_time.nsec);
+	} else
+		put_field(proc, "tp", "&..");
+	put_ptr(proc, "tzp", 0); /* not part of the system call (yet) */
+
+	put_equals(proc);
+	put_result(proc);
+}
+
+static int
+pm_getsid_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_value(proc, "pid", "%d", m_out->m_lc_pm_getsid.pid);
+
+	return CT_DONE;
+}
+
+static void
+put_clockid(struct trace_proc * proc, const char * name, clockid_t clock_id)
+{
+	const char *text = NULL;
+
+	if (!valuesonly) {
+		switch (clock_id) {
+		TEXT(CLOCK_REALTIME);
+#ifdef CLOCK_VIRTUAL
+		TEXT(CLOCK_VIRTUAL);
+#endif
+#ifdef CLOCK_PROF
+		TEXT(CLOCK_PROF);
+#endif
+		TEXT(CLOCK_MONOTONIC);
+		}
+	}
+
+	if (text != NULL)
+		put_field(proc, name, text);
+	else
+		put_value(proc, name, "%d", clock_id);
+}
+
+static void
+put_clock_timespec(struct trace_proc * proc, const char * name, int flags,
+	time_t sec, long nsec)
+{
+
+	if (flags & PF_FAILED) {
+		put_field(proc, name, "&..");
+
+		return;
+	}
+
+	/* No field names within the structure. */
+	put_open(proc, name, PF_NONAME, "{", ", ");
+
+	if (flags & PF_ALT)
+		put_time(proc, "tv_sec", sec);
+	else
+		put_value(proc, "tv_sec", "%"PRId64, sec);
+	put_value(proc, "tv_nsec", "%ld", nsec);
+
+	put_close(proc, "}");
+}
+
+/* This function is shared between clock_getres and clock_gettime. */
+static int
+pm_clock_get_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_clockid(proc, "clock_id", m_out->m_lc_pm_time.clk_id);
+
+	return CT_NOTDONE;
+}
+
+static void
+pm_clock_getres_in(struct trace_proc * proc, const message * __unused m_out,
+	const message * m_in, int failed)
+{
+
+	put_clock_timespec(proc, "res", failed, m_in->m_pm_lc_time.sec,
+	    m_in->m_pm_lc_time.nsec);
+	put_equals(proc);
+	put_result(proc);
+}
+
+/*
+ * Same as pm_clock_getres_in, but different field name and the option to print
+ * at least some results as time strings (in the future).
+ */
+static void
+pm_clock_gettime_in(struct trace_proc * proc, const message * m_out,
+	const message * m_in, int failed)
+{
+	int flags;
+
+	flags = failed;
+	if (m_out->m_lc_pm_time.clk_id == CLOCK_REALTIME)
+		flags |= PF_ALT; /* TODO: make this print a time string. */
+
+	put_clock_timespec(proc, "tp", flags, m_in->m_pm_lc_time.sec,
+	    m_in->m_pm_lc_time.nsec);
+	put_equals(proc);
+	put_result(proc);
+}
+
+static const char *
+pm_clock_settime_name(const message * m_out)
+{
+
+	if (m_out->m_lc_pm_time.now == 0)
+		return "adjtime";
+	else
+		return "clock_settime";
+}
+
+static int
+pm_clock_settime_out(struct trace_proc * proc, const message * m_out)
+{
+	int flags;
+
+	/* These two calls just look completely different.. */
+	if (m_out->m_lc_pm_time.now == 0) {
+		put_timespec_as_timeval(proc, "delta", m_out->m_lc_pm_time.sec,
+		    m_out->m_lc_pm_time.nsec);
+		put_ptr(proc, "odelta", 0); /* not supported on MINIX3 */
+	} else {
+		flags = 0;
+		if (m_out->m_lc_pm_time.clk_id == CLOCK_REALTIME)
+			flags |= PF_ALT;
+		put_clockid(proc, "clock_id", m_out->m_lc_pm_time.clk_id);
+		put_clock_timespec(proc, "tp", flags, m_out->m_lc_pm_time.sec,
+		    m_out->m_lc_pm_time.nsec);
+	}
+
+	return CT_DONE;
+}
+
+static int
+pm_getrusage_out(struct trace_proc * proc, const message * m_out)
+{
+
+	if (!valuesonly && m_out->m_lc_pm_rusage.who == RUSAGE_SELF)
+		put_field(proc, "who", "RUSAGE_SELF");
+	else if (!valuesonly && m_out->m_lc_pm_rusage.who == RUSAGE_CHILDREN)
+		put_field(proc, "who", "RUSAGE_CHILDREN");
+	else
+		put_value(proc, "who", "%d", m_out->m_lc_pm_rusage.who);
+
+	return CT_NOTDONE;
+}
+
+static void
+pm_getrusage_in(struct trace_proc * proc, const message * m_out,
+	const message * __unused m_in, int failed)
+{
+	struct rusage buf;
+
+	/* Inline; we will certainly not be reusing this anywhere else. */
+	if (put_open_struct(proc, "rusage", failed, m_out->m_lc_pm_rusage.addr,
+	    &buf, sizeof(buf))) {
+		put_struct_timeval(proc, "ru_utime", PF_LOCADDR,
+		    (vir_bytes)&buf.ru_utime);
+		put_struct_timeval(proc, "ru_stime", PF_LOCADDR,
+		    (vir_bytes)&buf.ru_stime);
+
+		if (verbose > 0)
+			put_value(proc, "ru_nsignals", "%ld", buf.ru_nsignals);
+		put_close_struct(proc, verbose > 0);
+	}
+	put_equals(proc);
+	put_result(proc);
+}
+
+static const struct flags reboot_flags[] = {
+	FLAG_ZERO(RB_AUTOBOOT),
+	FLAG(RB_ASKNAME),
+	FLAG(RB_DUMP),
+	FLAG_MASK(RB_POWERDOWN, RB_HALT),
+	FLAG(RB_POWERDOWN),
+	FLAG(RB_INITNAME),
+	FLAG(RB_KDB),
+	FLAG(RB_NOSYNC),
+	FLAG(RB_RDONLY),
+	FLAG(RB_SINGLE),
+	FLAG(RB_STRING),
+	FLAG(RB_USERCONF),
+};
+
+static int
+pm_reboot_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_flags(proc, "how", reboot_flags, COUNT(reboot_flags), "0x%x",
+	    m_out->m_lc_pm_reboot.how);
+	put_ptr(proc, "bootstr", 0); /* not supported on MINIX3 */
+
+	return CT_DONE;
+}
+
+static int
+pm_svrctl_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_ioctl_req(proc, "request", m_out->m_lc_svrctl.request,
+	    TRUE /*is_svrctl*/);
+	return put_ioctl_arg_out(proc, "arg", m_out->m_lc_svrctl.request,
+	    m_out->m_lc_svrctl.arg, TRUE /*is_svrctl*/);
+}
+
+static void
+pm_svrctl_in(struct trace_proc * proc, const message * m_out,
+	const message * __unused m_in, int failed)
+{
+
+	put_ioctl_arg_in(proc, "arg", failed, m_out->m_lc_svrctl.request,
+	    m_out->m_lc_svrctl.arg, TRUE /*is_svrctl*/);
+}
+
+static int
+pm_sprof_out(struct trace_proc * proc, const message * m_out)
+{
+	int freq;
+
+	if (!valuesonly && m_out->m_lc_pm_sprof.action == PROF_START)
+		put_field(proc, "action", "PROF_START");
+	else if (!valuesonly && m_out->m_lc_pm_sprof.action == PROF_STOP)
+		put_field(proc, "action", "PROF_STOP");
+	else
+		put_value(proc, "action", "%d", m_out->m_lc_pm_sprof.action);
+
+	put_value(proc, "size", "%zu", m_out->m_lc_pm_sprof.mem_size);
+
+	freq = m_out->m_lc_pm_sprof.freq;
+	if (!valuesonly && freq >= 3 && freq <= 15) /* no constants.. */
+		put_value(proc, "freq", "%u /*%uHz*/", freq, 1 << (16 - freq));
+	else
+		put_value(proc, "freq", "%u", freq);
+
+	if (!valuesonly && m_out->m_lc_pm_sprof.intr_type == PROF_RTC)
+		put_field(proc, "type", "PROF_RTC");
+	else if (!valuesonly && m_out->m_lc_pm_sprof.intr_type == PROF_NMI)
+		put_field(proc, "type", "PROF_NMI");
+	else
+		put_value(proc, "type", "%d", m_out->m_lc_pm_sprof.intr_type);
+
+	put_ptr(proc, "ctl_ptr", m_out->m_lc_pm_sprof.ctl_ptr);
+	put_ptr(proc, "mem_ptr", m_out->m_lc_pm_sprof.mem_ptr);
+
+	return CT_DONE;
+}
+
+#define PM_CALL(c) [((PM_ ## c) - PM_BASE)]
+
+static const struct call_handler pm_map[] = {
+	PM_CALL(EXIT) = HANDLER("exit", pm_exit_out, default_in),
+	PM_CALL(FORK) = HANDLER("fork", default_out, default_in),
+	PM_CALL(WAITPID) = HANDLER("waitpid", pm_waitpid_out, pm_waitpid_in),
+	PM_CALL(GETPID) = HANDLER("getpid", default_out, pm_getpid_in),
+	PM_CALL(SETUID) = HANDLER("setuid", pm_setuid_out, default_in),
+	PM_CALL(GETUID) = HANDLER("getuid", default_out, pm_getuid_in),
+	PM_CALL(STIME) = HANDLER("stime", pm_stime_out, default_in),
+	PM_CALL(PTRACE) = HANDLER("ptrace", pm_ptrace_out, pm_ptrace_in),
+	PM_CALL(SETGROUPS) = HANDLER("setgroups", pm_setgroups_out,
+	    default_in),
+	PM_CALL(GETGROUPS) = HANDLER("getgroups", pm_getgroups_out,
+	    pm_getgroups_in),
+	PM_CALL(KILL) = HANDLER("kill", pm_kill_out, default_in),
+	PM_CALL(SETGID) = HANDLER("setgid", pm_setgid_out, default_in),
+	PM_CALL(GETGID) = HANDLER("getgid", default_out, pm_getgid_in),
+	PM_CALL(EXEC) = HANDLER("execve", pm_exec_out, default_in),
+	PM_CALL(SETSID) = HANDLER("setsid", default_out, default_in),
+	PM_CALL(GETPGRP) = HANDLER("getpgrp", default_out, default_in),
+	PM_CALL(ITIMER) = HANDLER_NAME(pm_itimer_name, pm_itimer_out,
+	    pm_itimer_in),
+	PM_CALL(GETMCONTEXT) = HANDLER("getmcontext", pm_getmcontext_out,
+	    pm_getmcontext_in),
+	PM_CALL(SETMCONTEXT) = HANDLER("setmcontext", pm_setmcontext_out,
+	    default_in),
+	PM_CALL(SIGACTION) = HANDLER("sigaction", pm_sigaction_out,
+	    pm_sigaction_in),
+	PM_CALL(SIGSUSPEND) = HANDLER("sigsuspend", pm_sigsuspend_out,
+	    default_in),
+	PM_CALL(SIGPENDING) = HANDLER("sigpending", pm_sigpending_out,
+	    pm_sigpending_in),
+	PM_CALL(SIGPROCMASK) = HANDLER("sigprocmask", pm_sigprocmask_out,
+	    pm_sigprocmask_in),
+	PM_CALL(SIGRETURN) = HANDLER("sigreturn", pm_sigreturn_out,
+	    pm_sigreturn_in),
+	PM_CALL(SYSUNAME) = HANDLER("sysuname", pm_sysuname_out,
+	    pm_sysuname_in),
+	PM_CALL(GETPRIORITY) = HANDLER("getpriority", pm_getpriority_out,
+	    pm_getpriority_in),
+	PM_CALL(SETPRIORITY) = HANDLER("setpriority", pm_setpriority_out,
+	    default_in),
+	PM_CALL(GETTIMEOFDAY) = HANDLER("gettimeofday", pm_gettimeofday_out,
+	    pm_gettimeofday_in),
+	PM_CALL(SETEUID) = HANDLER("seteuid", pm_setuid_out, default_in),
+	PM_CALL(SETEGID) = HANDLER("setegid", pm_setgid_out, default_in),
+	PM_CALL(ISSETUGID) = HANDLER("issetugid", default_out, default_in),
+	PM_CALL(GETSID) = HANDLER("getsid", pm_getsid_out, default_in),
+	PM_CALL(CLOCK_GETRES) = HANDLER("clock_getres", pm_clock_get_out,
+	    pm_clock_getres_in),
+	PM_CALL(CLOCK_GETTIME) = HANDLER("clock_gettime", pm_clock_get_out,
+	    pm_clock_gettime_in),
+	PM_CALL(CLOCK_SETTIME) = HANDLER_NAME(pm_clock_settime_name,
+	    pm_clock_settime_out, default_in),
+	PM_CALL(GETRUSAGE) = HANDLER("pm_getrusage", pm_getrusage_out,
+	    pm_getrusage_in),
+	PM_CALL(REBOOT) = HANDLER("reboot", pm_reboot_out, default_in),
+	PM_CALL(SVRCTL) = HANDLER("pm_svrctl", pm_svrctl_out, pm_svrctl_in),
+	PM_CALL(SPROF) = HANDLER("sprofile", pm_sprof_out, default_in),
+};
+
+const struct calls pm_calls = {
+	.endpt = PM_PROC_NR,
+	.base = PM_BASE,
+	.map = pm_map,
+	.count = COUNT(pm_map)
+};
diff --git a/minix/usr.bin/trace/service/rs.c b/minix/usr.bin/trace/service/rs.c
new file mode 100644
index 000000000..514a4836c
--- /dev/null
+++ b/minix/usr.bin/trace/service/rs.c
@@ -0,0 +1,140 @@
+
+#include "inc.h"
+
+#include <minix/rs.h>
+
+static const struct flags rss_flags[] = {
+	FLAG(RSS_COPY),
+	FLAG(RSS_REUSE),
+	FLAG(RSS_NOBLOCK),
+	FLAG(RSS_REPLICA),
+	FLAG(RSS_SELF_LU),
+	FLAG(RSS_SYS_BASIC_CALLS),
+	FLAG(RSS_VM_BASIC_CALLS),
+	FLAG(RSS_NO_BIN_EXP),
+};
+
+static void
+put_struct_rs_start(struct trace_proc * proc, const char * name,
+	vir_bytes addr)
+{
+	struct rs_start buf;
+
+	if (!put_open_struct(proc, name, 0, addr, &buf, sizeof(buf)))
+		return;
+
+	if (verbose > 0)
+		put_flags(proc, "rss_flags", rss_flags, COUNT(rss_flags),
+		    "0x%x", buf.rss_flags);
+	put_buf(proc, "rss_cmd", 0, (vir_bytes)buf.rss_cmd, buf.rss_cmdlen);
+	put_buf(proc, "rss_label", 0, (vir_bytes)buf.rss_label.l_addr,
+	    buf.rss_label.l_len);
+	if (verbose > 0 || buf.rss_major != 0)
+		put_value(proc, "rss_major", "%d", buf.rss_major);
+	if (verbose > 0 || buf.devman_id != 0)
+		put_value(proc, "devman_id", "%d", buf.devman_id);
+	put_value(proc, "rss_uid", "%u", buf.rss_uid);
+	if (verbose > 0) {
+		put_endpoint(proc, "rss_sigmgr", buf.rss_sigmgr);
+		put_endpoint(proc, "rss_scheduler", buf.rss_sigmgr);
+	}
+	if (verbose > 1) {
+		put_value(proc, "rss_priority", "%d", buf.rss_priority);
+		put_value(proc, "rss_quantum", "%d", buf.rss_quantum);
+	}
+	if (verbose > 0) {
+		put_value(proc, "rss_period", "%ld", buf.rss_period);
+		put_buf(proc, "rss_script", 0, (vir_bytes)buf.rss_script,
+		    buf.rss_scriptlen);
+	}
+
+	put_close_struct(proc, FALSE /*all*/); /* TODO: the remaining fields */
+}
+
+/* This function is shared between rs_up and rs_edit. */
+static int
+rs_up_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_struct_rs_start(proc, "addr", (vir_bytes)m_out->m_rs_req.addr);
+
+	return CT_DONE;
+}
+
+/*
+ * This function is shared between rs_down, rs_refresh, rs_restart, and
+ * rs_clone.
+ */
+static int
+rs_label_out(struct trace_proc * proc, const message * m_out)
+{
+
+	/*
+	 * We are not using PF_STRING here, because unlike in most places
+	 * (including rs_lookup), the string length does not include the
+	 * terminating NULL character.
+	 */
+	put_buf(proc, "label", 0, (vir_bytes)m_out->m_rs_req.addr,
+	    m_out->m_rs_req.len);
+
+	return CT_DONE;
+}
+
+static int
+rs_update_out(struct trace_proc * proc, const message * m_out)
+{
+
+	/*
+	 * FIXME: this is a value from the wrong message union, and that is
+	 * actually a minix bug.
+	 */
+	put_struct_rs_start(proc, "addr", (vir_bytes)m_out->m_rs_req.addr);
+
+	/* TODO: interpret these fields */
+	put_value(proc, "state", "%d", m_out->m_rs_update.state);
+	put_value(proc, "maxtime", "%d", m_out->m_rs_update.prepare_maxtime);
+
+	return CT_DONE;
+}
+
+static int
+rs_lookup_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_buf(proc, "label", PF_STRING, (vir_bytes)m_out->m_rs_req.name,
+	    m_out->m_rs_req.name_len);
+
+	return CT_DONE;
+}
+
+static void
+rs_lookup_in(struct trace_proc * proc, const message * __unused m_out,
+	const message * m_in, int failed)
+{
+
+	if (!failed)
+		put_endpoint(proc, NULL, m_in->m_rs_req.endpoint);
+	else
+		put_result(proc);
+}
+
+#define RS_CALL(c) [((RS_ ## c) - RS_RQ_BASE)]
+
+static const struct call_handler rs_map[] = {
+	RS_CALL(UP) = HANDLER("rs_up", rs_up_out, default_in),
+	RS_CALL(DOWN) = HANDLER("rs_down", rs_label_out, default_in),
+	RS_CALL(REFRESH) = HANDLER("rs_refresh", rs_label_out, default_in),
+	RS_CALL(RESTART) = HANDLER("rs_restart", rs_label_out, default_in),
+	RS_CALL(SHUTDOWN) = HANDLER("rs_shutdown", default_out, default_in),
+	RS_CALL(CLONE) = HANDLER("rs_clone", rs_label_out, default_in),
+	RS_CALL(UPDATE) = HANDLER("rs_update", rs_update_out, default_in),
+	RS_CALL(EDIT) = HANDLER("rs_edit", rs_up_out, default_in),
+	RS_CALL(LOOKUP) = HANDLER("rs_lookup", rs_lookup_out, rs_lookup_in),
+};
+
+const struct calls rs_calls = {
+	.endpt = RS_PROC_NR,
+	.base = RS_RQ_BASE,
+	.map = rs_map,
+	.count = COUNT(rs_map)
+};
diff --git a/minix/usr.bin/trace/service/vfs.c b/minix/usr.bin/trace/service/vfs.c
new file mode 100644
index 000000000..71006c1cb
--- /dev/null
+++ b/minix/usr.bin/trace/service/vfs.c
@@ -0,0 +1,1457 @@
+
+#include "inc.h"
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <sys/mount.h>
+#include <sys/resource.h>
+
+/*
+ * This function should always be used when printing a file descriptor.  It
+ * currently offers no benefit, but will in the future allow for features such
+ * as color highlighting and tracking of specific open files (TODO).
+ */
+void
+put_fd(struct trace_proc * proc, const char * name, int fd)
+{
+
+	put_value(proc, name, "%d", fd);
+}
+
+static int
+vfs_read_out(struct trace_proc * proc, const message *m_out)
+{
+
+	put_fd(proc, "fd", m_out->m_lc_vfs_readwrite.fd);
+
+	return CT_NOTDONE;
+}
+
+static void
+vfs_read_in(struct trace_proc * proc, const message *m_out,
+	const message *m_in, int failed)
+{
+
+	put_buf(proc, "buf", failed, m_out->m_lc_vfs_readwrite.buf,
+	    m_in->m_type);
+	put_value(proc, "len", "%zu", m_out->m_lc_vfs_readwrite.len);
+	put_equals(proc);
+	put_result(proc);
+}
+
+static int
+vfs_write_out(struct trace_proc * proc, const message *m_out)
+{
+
+	put_fd(proc, "fd", m_out->m_lc_vfs_readwrite.fd);
+	put_buf(proc, "buf", 0, m_out->m_lc_vfs_readwrite.buf,
+	    m_out->m_lc_vfs_readwrite.len);
+	put_value(proc, "len", "%zu", m_out->m_lc_vfs_readwrite.len);
+
+	return CT_DONE;
+}
+
+static void
+put_lseek_whence(struct trace_proc * proc, const char * name, int whence)
+{
+	const char *text = NULL;
+
+	if (!valuesonly) {
+		switch (whence) {
+		TEXT(SEEK_SET);
+		TEXT(SEEK_CUR);
+		TEXT(SEEK_END);
+		}
+	}
+
+	if (text != NULL)
+		put_field(proc, name, text);
+	else
+		put_value(proc, name, "%d", whence);
+}
+
+static int
+vfs_lseek_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_fd(proc, "fd", m_out->m_lc_vfs_lseek.fd);
+	put_value(proc, "offset", "%"PRId64, m_out->m_lc_vfs_lseek.offset);
+	put_lseek_whence(proc, "whence", m_out->m_lc_vfs_lseek.whence);
+
+	return CT_DONE;
+}
+
+static void
+vfs_lseek_in(struct trace_proc * proc, const message * __unused m_out,
+	const message * m_in, int failed)
+{
+
+	if (!failed)
+		put_value(proc, NULL, "%"PRId64, m_in->m_vfs_lc_lseek.offset);
+	else
+		put_result(proc);
+}
+
+static const struct flags open_flags[] = {
+	FLAG_MASK(O_ACCMODE, O_RDONLY),
+	FLAG_MASK(O_ACCMODE, O_WRONLY),
+	FLAG_MASK(O_ACCMODE, O_RDWR),
+#define ACCMODE_ENTRIES 3	/* the first N entries are for O_ACCMODE */
+	FLAG(O_NONBLOCK),
+	FLAG(O_APPEND),
+	FLAG(O_SHLOCK),
+	FLAG(O_EXLOCK),
+	FLAG(O_ASYNC),
+	FLAG(O_SYNC),
+	FLAG(O_NOFOLLOW),
+	FLAG(O_CREAT),
+	FLAG(O_TRUNC),
+	FLAG(O_EXCL),
+	FLAG(O_NOCTTY),
+	FLAG(O_DSYNC),
+	FLAG(O_RSYNC),
+	FLAG(O_ALT_IO),
+	FLAG(O_DIRECT),
+	FLAG(O_DIRECTORY),
+	FLAG(O_CLOEXEC),
+	FLAG(O_SEARCH),
+	FLAG(O_NOSIGPIPE),
+};
+
+static void
+put_open_flags(struct trace_proc * proc, const char * name, int value,
+	int full)
+{
+	const struct flags *fp;
+	unsigned int num;
+
+	fp = open_flags;
+	num = COUNT(open_flags);
+
+	/*
+	 * If we're not printing a full open()-style set of flags, but instead
+	 * just a loose set of flags, then skip the access mode altogether,
+	 * otherwise we'd be printing O_RDONLY when no access mode is given.
+	 */
+	if (!full) {
+		fp += ACCMODE_ENTRIES;
+		num -= ACCMODE_ENTRIES;
+	}
+
+	put_flags(proc, name, fp, num, "0x%x", value);
+}
+
+static const struct flags mode_flags[] = {
+	FLAG_MASK(S_IFMT, S_IFIFO),
+	FLAG_MASK(S_IFMT, S_IFCHR),
+	FLAG_MASK(S_IFMT, S_IFDIR),
+	FLAG_MASK(S_IFMT, S_IFBLK),
+	FLAG_MASK(S_IFMT, S_IFREG),
+	FLAG_MASK(S_IFMT, S_IFLNK),
+	FLAG_MASK(S_IFMT, S_IFSOCK),
+	FLAG_MASK(S_IFMT, S_IFWHT),
+	FLAG(S_ARCH1),
+	FLAG(S_ARCH2),
+	FLAG(S_ISUID),
+	FLAG(S_ISGID),
+	FLAG(S_ISTXT),
+};
+
+/* Do not use %04o instead of 0%03o; it is octal even if greater than 0777. */
+#define put_mode(p, n, v) \
+	put_flags(p, n, mode_flags, COUNT(mode_flags), "0%03o", v)
+
+static void
+put_path(struct trace_proc * proc, const message * m_out)
+{
+	size_t len;
+
+	if ((len = m_out->m_lc_vfs_path.len) <= M_PATH_STRING_MAX)
+		put_buf(proc, "path", PF_LOCADDR | PF_PATH,
+		    (vir_bytes)m_out->m_lc_vfs_path.buf, len);
+	else
+		put_buf(proc, "path", PF_PATH, m_out->m_lc_vfs_path.name, len);
+}
+
+static int
+vfs_open_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_path(proc, m_out);
+	put_open_flags(proc, "flags", m_out->m_lc_vfs_path.flags,
+	    TRUE /*full*/);
+
+	return CT_DONE;
+}
+
+/* This function is shared between creat and open. */
+static void
+vfs_open_in(struct trace_proc * proc, const message * __unused m_out,
+	const message * m_in, int failed)
+{
+
+	if (!failed)
+		put_fd(proc, NULL, m_in->m_type);
+	else
+		put_result(proc);
+}
+
+static int
+vfs_creat_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_buf(proc, "path", PF_PATH, m_out->m_lc_vfs_creat.name,
+	    m_out->m_lc_vfs_creat.len);
+	put_open_flags(proc, "flags", m_out->m_lc_vfs_creat.flags,
+	    TRUE /*full*/);
+	put_mode(proc, "mode", m_out->m_lc_vfs_creat.mode);
+
+	return CT_DONE;
+}
+
+static int
+vfs_close_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_fd(proc, "fd", m_out->m_lc_vfs_close.fd);
+
+	return CT_DONE;
+}
+
+/* This function is used for link, rename, and symlink. */
+static int
+vfs_link_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_buf(proc, "path1", PF_PATH, m_out->m_lc_vfs_link.name1,
+	    m_out->m_lc_vfs_link.len1);
+	put_buf(proc, "path2", PF_PATH, m_out->m_lc_vfs_link.name2,
+	    m_out->m_lc_vfs_link.len2);
+
+	return CT_DONE;
+}
+
+static int
+vfs_path_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_path(proc, m_out);
+
+	return CT_DONE;
+}
+
+static int
+vfs_path_mode_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_path(proc, m_out);
+	put_mode(proc, "mode", m_out->m_lc_vfs_path.mode);
+
+	return CT_DONE;
+}
+
+void
+put_dev(struct trace_proc * proc, const char * name, dev_t dev)
+{
+	devmajor_t major;
+	devminor_t minor;
+
+	major = major(dev);
+	minor = minor(dev);
+
+	/* The value 0 ("no device") should print as "0". */
+	if (dev != 0 && makedev(major, minor) == dev && !valuesonly)
+		put_value(proc, name, "<%d,%d>", major, minor);
+	else
+		put_value(proc, name, "%"PRIu64, dev);
+}
+
+static int
+vfs_mknod_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_buf(proc, "path", PF_PATH, m_out->m_lc_vfs_mknod.name,
+	    m_out->m_lc_vfs_mknod.len);
+	put_mode(proc, "mode", m_out->m_lc_vfs_mknod.mode);
+	put_dev(proc, "dev", m_out->m_lc_vfs_mknod.device);
+
+	return CT_DONE;
+}
+
+static int
+vfs_chown_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_buf(proc, "path", PF_PATH, m_out->m_lc_vfs_chown.name,
+	    m_out->m_lc_vfs_chown.len);
+	/* -1 means "keep the current value" so print as signed */
+	put_value(proc, "owner", "%d", m_out->m_lc_vfs_chown.owner);
+	put_value(proc, "group", "%d", m_out->m_lc_vfs_chown.group);
+
+	return CT_DONE;
+}
+
+/* TODO: expand this to the full ST_ set. */
+static const struct flags mount_flags[] = {
+	FLAG(MNT_RDONLY),
+};
+
+static int
+vfs_mount_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_buf(proc, "special", PF_PATH, m_out->m_lc_vfs_mount.dev,
+	    m_out->m_lc_vfs_mount.devlen);
+	put_buf(proc, "path", PF_PATH, m_out->m_lc_vfs_mount.path,
+	    m_out->m_lc_vfs_mount.pathlen);
+	put_flags(proc, "flags", mount_flags, COUNT(mount_flags), "0x%x",
+	    m_out->m_lc_vfs_mount.flags);
+	put_buf(proc, "type", PF_STRING, m_out->m_lc_vfs_mount.type,
+	    m_out->m_lc_vfs_mount.typelen);
+	put_buf(proc, "label", PF_STRING, m_out->m_lc_vfs_mount.label,
+	    m_out->m_lc_vfs_mount.labellen);
+
+	return CT_DONE;
+}
+
+static int
+vfs_umount_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_buf(proc, "path", PF_PATH, m_out->m_lc_vfs_umount.name,
+	    m_out->m_lc_vfs_umount.namelen);
+
+	return CT_DONE;
+}
+
+static void
+vfs_umount_in(struct trace_proc * proc, const message * m_out,
+	const message * __unused m_in, int failed)
+{
+
+	put_result(proc);
+
+	if (!failed) {
+		put_open(proc, NULL, 0, "(", ", ");
+		put_buf(proc, "label", PF_STRING, m_out->m_lc_vfs_umount.label,
+		    m_out->m_lc_vfs_umount.labellen);
+
+		put_close(proc, ")");
+	}
+}
+
+
+static const struct flags access_flags[] = {
+	FLAG_ZERO(F_OK),
+	FLAG(R_OK),
+	FLAG(W_OK),
+	FLAG(X_OK),
+};
+
+static int
+vfs_access_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_path(proc, m_out);
+	put_flags(proc, "mode", access_flags, COUNT(access_flags), "0x%x",
+	    m_out->m_lc_vfs_path.mode);
+
+	return CT_DONE;
+}
+
+static int
+vfs_readlink_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_buf(proc, "path", PF_PATH, m_out->m_lc_vfs_readlink.name,
+	    m_out->m_lc_vfs_readlink.namelen);
+
+	return CT_NOTDONE;
+}
+
+static void
+vfs_readlink_in(struct trace_proc * proc, const message * m_out,
+	const message * m_in, int failed)
+{
+
+	/* The call does not return a string, so do not use PF_STRING here. */
+	put_buf(proc, "buf", failed, m_out->m_lc_vfs_readlink.buf,
+	    m_in->m_type);
+	put_value(proc, "bufsize", "%zd", m_out->m_lc_vfs_readlink.bufsize);
+	put_equals(proc);
+	put_result(proc);
+}
+
+static void
+put_struct_stat(struct trace_proc * proc, const char * name, int flags,
+	vir_bytes addr)
+{
+	struct stat buf;
+	int is_special;
+
+	if (!put_open_struct(proc, name, flags, addr, &buf, sizeof(buf)))
+		return;
+
+	/*
+	 * The combination of struct stat's frequent usage and large number of
+	 * fields makes this structure a pain to print.  For now, the idea is
+	 * that for verbosity level 0, we print the mode, and the target device
+	 * for block/char special files or the file size for all other files.
+	 * For higher verbosity levels, largely maintain the structure's own
+	 * order of fields.  Violate this general structure printing rule for
+	 * some fields though, because the actual field order in struct stat is
+	 * downright ridiculous.  Like elsewhere, for verbosity level 1 print
+	 * all fields with meaningful values, and for verbosity level 2 just
+	 * print everything, including fields that are known to be not yet
+	 * supported and fields that contain known values.
+	 */
+	is_special = (S_ISBLK(buf.st_mode) || S_ISCHR(buf.st_mode));
+
+	if (verbose > 0) {
+		put_dev(proc, "st_dev", buf.st_dev);
+		put_value(proc, "st_ino", "%"PRId64, buf.st_ino);
+	}
+	put_mode(proc, "st_mode", buf.st_mode);
+	if (verbose > 0) {
+		put_value(proc, "st_nlink", "%u", buf.st_nlink);
+		put_value(proc, "st_uid", "%u", buf.st_uid);
+		put_value(proc, "st_gid", "%u", buf.st_gid);
+	}
+	if (is_special || verbose > 1)
+		put_dev(proc, "st_rdev", buf.st_rdev);
+	if (verbose > 0) {
+		/*
+		 * TODO: print the nanosecond part, but possibly only if we are
+		 * not actually interpreting the time as a date (another TODO),
+		 * and/or possibly only with verbose > 1 (largely unsupported).
+		 */
+		put_time(proc, "st_atime", buf.st_atime);
+		put_time(proc, "st_mtime", buf.st_mtime);
+		put_time(proc, "st_ctime", buf.st_ctime);
+	}
+	if (verbose > 1) /* not yet supported on MINIX3 */
+		put_time(proc, "st_birthtime", buf.st_birthtime);
+	if (!is_special || verbose > 1)
+		put_value(proc, "st_size", "%"PRId64, buf.st_size);
+	if (verbose > 0) {
+		put_value(proc, "st_blocks", "%"PRId64, buf.st_blocks);
+		put_value(proc, "st_blksize", "%"PRId32, buf.st_blksize);
+	}
+	if (verbose > 1) {
+		put_value(proc, "st_flags", "%"PRIu32, buf.st_flags);
+		put_value(proc, "st_gen", "%"PRIu32, buf.st_gen);
+	}
+
+	put_close_struct(proc, verbose > 1);
+}
+
+static int
+vfs_stat_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_buf(proc, "path", PF_PATH, m_out->m_lc_vfs_stat.name,
+	    m_out->m_lc_vfs_stat.len);
+
+	return CT_NOTDONE;
+}
+
+static void
+vfs_stat_in(struct trace_proc * proc, const message * m_out,
+	const message * __unused m_in, int failed)
+{
+
+	put_struct_stat(proc, "buf", failed, m_out->m_lc_vfs_stat.buf);
+	put_equals(proc);
+	put_result(proc);
+}
+
+static int
+vfs_fstat_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_fd(proc, "fd", m_out->m_lc_vfs_fstat.fd);
+
+	return CT_NOTDONE;
+}
+
+static void
+vfs_fstat_in(struct trace_proc * proc, const message * m_out,
+	const message * __unused m_in, int failed)
+{
+
+	put_struct_stat(proc, "buf", failed, m_out->m_lc_vfs_fstat.buf);
+	put_equals(proc);
+	put_result(proc);
+}
+
+static int
+vfs_ioctl_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_fd(proc, "fd", m_out->m_lc_vfs_ioctl.fd);
+	put_ioctl_req(proc, "req", m_out->m_lc_vfs_ioctl.req,
+	    FALSE /*is_svrctl*/);
+	return put_ioctl_arg_out(proc, "arg", m_out->m_lc_vfs_ioctl.req,
+	    (vir_bytes)m_out->m_lc_vfs_ioctl.arg, FALSE /*is_svrctl*/);
+}
+
+static void
+vfs_ioctl_in(struct trace_proc * proc, const message * m_out,
+	const message * __unused m_in, int failed)
+{
+
+	put_ioctl_arg_in(proc, "arg", failed, m_out->m_lc_vfs_ioctl.req,
+	    (vir_bytes)m_out->m_lc_vfs_ioctl.arg, FALSE /*is_svrctl*/);
+}
+
+static void
+put_fcntl_cmd(struct trace_proc * proc, const char * name, int cmd)
+{
+	const char *text = NULL;
+
+	if (!valuesonly) {
+		switch (cmd) {
+		TEXT(F_DUPFD);
+		TEXT(F_GETFD);
+		TEXT(F_SETFD);
+		TEXT(F_GETFL);
+		TEXT(F_SETFL);
+		TEXT(F_GETOWN);
+		TEXT(F_SETOWN);
+		TEXT(F_GETLK);
+		TEXT(F_SETLK);
+		TEXT(F_SETLKW);
+		TEXT(F_CLOSEM);
+		TEXT(F_MAXFD);
+		TEXT(F_DUPFD_CLOEXEC);
+		TEXT(F_GETNOSIGPIPE);
+		TEXT(F_SETNOSIGPIPE);
+		TEXT(F_FREESP);
+		TEXT(F_FLUSH_FS_CACHE);
+		}
+	}
+
+	if (text != NULL)
+		put_field(proc, name, text);
+	else
+		put_value(proc, name, "%d", cmd);
+}
+
+static const struct flags fd_flags[] = {
+	FLAG(FD_CLOEXEC),
+};
+
+#define put_fd_flags(p, n, v) \
+	put_flags(p, n, fd_flags, COUNT(fd_flags), "0x%x", v)
+
+static void
+put_flock_type(struct trace_proc * proc, const char * name, int type)
+{
+	const char *text = NULL;
+
+	if (!valuesonly) {
+		switch (type) {
+		TEXT(F_RDLCK);
+		TEXT(F_UNLCK);
+		TEXT(F_WRLCK);
+		}
+	}
+
+	if (text != NULL)
+		put_field(proc, name, text);
+	else
+		put_value(proc, name, "%d", type);
+}
+
+/*
+ * With PF_FULL, also print l_pid, unless l_type is F_UNLCK in which case
+ * only that type is printed.   With PF_ALT, print only l_whence/l_start/l_len.
+ */
+static void
+put_struct_flock(struct trace_proc * proc, const char * name, int flags,
+	vir_bytes addr)
+{
+	struct flock flock;
+	int limited;
+
+	if (!put_open_struct(proc, name, flags, addr, &flock, sizeof(flock)))
+		return;
+
+	limited = ((flags & PF_FULL) && flock.l_type == F_UNLCK);
+
+	if (!(flags & PF_ALT))
+		put_flock_type(proc, "l_type", flock.l_type);
+	if (!limited) {
+		put_lseek_whence(proc, "l_whence", flock.l_whence);
+		put_value(proc, "l_start", "%"PRId64, flock.l_start);
+		put_value(proc, "l_len", "%"PRId64, flock.l_len);
+		if (flags & PF_FULL)
+			put_value(proc, "l_pid", "%d", flock.l_pid);
+	}
+
+	put_close_struct(proc, TRUE /*all*/);
+}
+
+static int
+vfs_fcntl_out(struct trace_proc * proc, const message * m_out)
+{
+	int full;
+
+	put_fd(proc, "fd", m_out->m_lc_vfs_fcntl.fd);
+	put_fcntl_cmd(proc, "cmd", m_out->m_lc_vfs_fcntl.cmd);
+
+	switch (m_out->m_lc_vfs_fcntl.cmd) {
+	case F_DUPFD:
+		put_fd(proc, "fd2", m_out->m_lc_vfs_fcntl.arg_int);
+		break;
+	case F_SETFD:
+		put_fd_flags(proc, "flags", m_out->m_lc_vfs_fcntl.arg_int);
+		break;
+	case F_SETFL:
+		/*
+		 * One of those difficult cases: the access mode is ignored, so
+		 * we don't want to print O_RDONLY if it is not given.  On the
+		 * other hand, fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_..) is
+		 * a fairly common construction, in which case we don't want to
+		 * print eg O_..|0x2 if the access mode is O_RDWR.  Thus, we
+		 * compromise: show the access mode if any of its bits are set.
+		 */
+		put_open_flags(proc, "flags", m_out->m_lc_vfs_fcntl.arg_int,
+		    m_out->m_lc_vfs_fcntl.arg_int & O_ACCMODE /*full*/);
+		break;
+	case F_SETLK:
+	case F_SETLKW:
+		put_struct_flock(proc, "lkp", 0,
+		    m_out->m_lc_vfs_fcntl.arg_ptr);
+		break;
+	case F_FREESP:
+		put_struct_flock(proc, "lkp", PF_ALT,
+		    m_out->m_lc_vfs_fcntl.arg_ptr);
+		break;
+	case F_SETNOSIGPIPE:
+		put_value(proc, "arg", "%d", m_out->m_lc_vfs_fcntl.arg_int);
+		break;
+	}
+
+	return (m_out->m_lc_vfs_fcntl.cmd != F_GETLK) ? CT_DONE : CT_NOTDONE;
+}
+
+static void
+vfs_fcntl_in(struct trace_proc * proc, const message * m_out,
+	const message * m_in, int failed)
+{
+
+	switch (m_out->m_lc_vfs_fcntl.cmd) {
+	case F_GETFD:
+		if (failed)
+			break;
+		put_fd_flags(proc, NULL, m_in->m_type);
+		return;
+	case F_GETFL:
+		if (failed)
+			break;
+		put_open_flags(proc, NULL, m_in->m_type, TRUE /*full*/);
+		return;
+	case F_GETLK:
+		put_struct_flock(proc, "lkp", failed | PF_FULL,
+		    m_out->m_lc_vfs_fcntl.arg_ptr);
+		put_equals(proc);
+		break;
+	}
+
+	put_result(proc);
+}
+
+static int
+vfs_pipe2_out(struct trace_proc * __unused proc,
+	const message * __unused m_out)
+{
+
+	return CT_NOTDONE;
+}
+
+static void
+vfs_pipe2_in(struct trace_proc * proc, const message * m_out,
+	const message * m_in, int failed)
+{
+
+	if (!failed) {
+		put_open(proc, "fd", PF_NONAME, "[", ", ");
+		put_fd(proc, "rfd", m_in->m_lc_vfs_pipe2.fd0);
+		put_fd(proc, "wfd", m_in->m_lc_vfs_pipe2.fd1);
+		put_close(proc, "]");
+	} else
+		put_field(proc, "fd", "&..");
+	put_open_flags(proc, "flags", m_out->m_lc_vfs_pipe2.flags,
+	    FALSE /*full*/);
+	put_equals(proc);
+	put_result(proc);
+}
+
+static int
+vfs_umask_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_mode(proc, NULL, m_out->m_lc_vfs_umask.mask);
+
+	return CT_DONE;
+}
+
+static void
+vfs_umask_in(struct trace_proc * proc, const message * __unused m_out,
+	const message * m_in, int failed)
+{
+
+	if (!failed)
+		put_mode(proc, NULL, m_in->m_type);
+	else
+		put_result(proc);
+
+}
+
+static void
+put_dirent_type(struct trace_proc * proc, const char * name, unsigned int type)
+{
+	const char *text = NULL;
+
+	if (!valuesonly) {
+		switch (type) {
+		TEXT(DT_UNKNOWN);
+		TEXT(DT_FIFO);
+		TEXT(DT_CHR);
+		TEXT(DT_DIR);
+		TEXT(DT_BLK);
+		TEXT(DT_REG);
+		TEXT(DT_LNK);
+		TEXT(DT_SOCK);
+		TEXT(DT_WHT);
+		}
+	}
+
+	if (text != NULL)
+		put_field(proc, name, text);
+	else
+		put_value(proc, name, "%u", type);
+}
+
+static void
+put_struct_dirent(struct trace_proc * proc, const char *name, int flags,
+	vir_bytes addr)
+{
+	struct dirent dirent;
+
+	if (!put_open_struct(proc, name, flags, addr, &dirent, sizeof(dirent)))
+		return;
+
+	if (verbose > 0)
+		put_value(proc, "d_fileno", "%"PRIu64, dirent.d_fileno);
+	if (verbose > 1) {
+		put_value(proc, "d_reclen", "%u", dirent.d_reclen);
+		put_value(proc, "d_namlen", "%u", dirent.d_namlen);
+	}
+	if (verbose >= 1 + (dirent.d_type == DT_UNKNOWN))
+		put_dirent_type(proc, "d_type", dirent.d_type);
+	put_buf(proc, "d_name", PF_LOCADDR, (vir_bytes)dirent.d_name,
+	    MIN(dirent.d_namlen, sizeof(dirent.d_name)));
+
+	put_close_struct(proc, verbose > 1);
+}
+
+static void
+put_dirent_array(struct trace_proc * proc, const char * name, int flags,
+	vir_bytes addr, ssize_t size)
+{
+	struct dirent dirent;
+	unsigned count, max;
+	ssize_t off, chunk;
+
+	if ((flags & PF_FAILED) || valuesonly > 1 || size < 0) {
+		put_ptr(proc, name, addr);
+
+		return;
+	}
+
+	if (size == 0) {
+		put_field(proc, name, "[]");
+
+		return;
+	}
+
+	if (verbose == 0)
+		max = 0; /* TODO: should we set this to 1 instead? */
+	else if (verbose == 1)
+		max = 3; /* low; just to give an indication where we are */
+	else
+		max = INT_MAX;
+
+	/*
+	 * TODO: as is, this is highly inefficient, as we are typically copying
+	 * in the same pieces of memory in repeatedly..
+	 */
+	count = 0;
+	for (off = 0; off < size; off += chunk) {
+		chunk = size - off;
+		if (chunk > sizeof(dirent))
+			chunk = sizeof(dirent);
+		if (chunk < _DIRENT_MINSIZE(&dirent))
+			break;
+
+		if (mem_get_data(proc->pid, addr + off, &dirent, chunk) < 0) {
+			if (off == 0) {
+				put_ptr(proc, name, addr);
+
+				return;
+			}
+
+			break;
+		}
+
+		if (off == 0)
+			put_open(proc, name, PF_NONAME, "[", ", ");
+
+		if (count < max)
+			put_struct_dirent(proc, NULL, PF_LOCADDR,
+			    (vir_bytes)&dirent);
+
+		if (chunk > dirent.d_reclen)
+			chunk = dirent.d_reclen;
+		count++;
+	}
+
+	if (off < size)
+		put_tail(proc, 0, 0);
+	else if (count > max)
+		put_tail(proc, count, max);
+	put_close(proc, "]");
+}
+
+static int
+vfs_getdents_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_fd(proc, "fd", m_out->m_lc_vfs_readwrite.fd);
+
+	return CT_NOTDONE;
+}
+
+static void
+vfs_getdents_in(struct trace_proc * proc, const message * m_out,
+	const message * m_in, int failed)
+{
+
+	put_dirent_array(proc, "buf", failed, m_out->m_lc_vfs_readwrite.buf,
+	    m_in->m_type);
+	put_value(proc, "len", "%zu", m_out->m_lc_vfs_readwrite.len);
+	put_equals(proc);
+	put_result(proc);
+}
+
+static void
+put_fd_set(struct trace_proc * proc, const char * name, vir_bytes addr,
+	int nfds)
+{
+	fd_set set;
+	size_t off;
+	unsigned int i, j, words, count, max;
+
+	if (addr == 0 || nfds < 0) {
+		put_ptr(proc, name, addr);
+
+		return;
+	}
+
+	/*
+	 * Each process may define its own FD_SETSIZE, so our fd_set may be of
+	 * a different size than theirs.  Thus, we copy at a granularity known
+	 * to be valid in any case: a single word of bits.  We make the
+	 * assumption that fd_set consists purely of bits, so that we can use
+	 * the second (and so on) bit word as an fd_set by itself.
+	 */
+	words = (nfds + NFDBITS - 1) / NFDBITS;
+
+	count = 0;
+
+	if (verbose == 0)
+		max = 16;
+	else if (verbose == 1)
+		max = FD_SETSIZE;
+	else
+		max = INT_MAX;
+
+	/* TODO: copy in more at once, but stick to fd_mask boundaries. */
+	for (off = 0, i = 0; i < words; i++, off += sizeof(fd_mask)) {
+		if (mem_get_data(proc->pid, addr + off, &set,
+		    sizeof(fd_mask)) != 0) {
+			if (count == 0) {
+				put_ptr(proc, name, addr);
+
+				return;
+			}
+
+			break;
+		}
+
+		for (j = 0; j < NFDBITS; j++) {
+			if (FD_ISSET(j, &set)) {
+				if (count == 0)
+					put_open(proc, name, PF_NONAME, "[",
+					    " ");
+
+				if (count < max)
+					put_fd(proc, NULL, i * NFDBITS + j);
+
+				count++;
+			}
+		}
+	}
+
+	/*
+	 * The empty set should print as "[]".  If copying any part failed, it
+	 * should print as "[x, ..(?)]" where x is the set printed so far, if
+	 * any.  If copying never failed, and we did not print all fds in the
+	 * set, print the remaining count n as "[x, ..(+n)]" at the end.
+	 */
+	if (count == 0)
+		put_open(proc, name, PF_NONAME, "[", " ");
+
+	if (i < words)
+		put_tail(proc, 0, 0);
+	else if (count > max)
+		put_tail(proc, count, max);
+
+	put_close(proc, "]");
+}
+
+static int
+vfs_select_out(struct trace_proc * proc, const message * m_out)
+{
+	int nfds;
+
+	nfds = m_out->m_lc_vfs_select.nfds;
+
+	put_fd(proc, "nfds", nfds); /* not really a file descriptor.. */
+	put_fd_set(proc, "readfds",
+	    (vir_bytes)m_out->m_lc_vfs_select.readfds, nfds);
+	put_fd_set(proc, "writefds",
+	    (vir_bytes)m_out->m_lc_vfs_select.writefds, nfds);
+	put_fd_set(proc, "errorfds",
+	    (vir_bytes)m_out->m_lc_vfs_select.errorfds, nfds);
+	put_struct_timeval(proc, "timeout", 0, m_out->m_lc_vfs_select.timeout);
+
+	return CT_DONE;
+}
+
+static void
+vfs_select_in(struct trace_proc * proc, const message * m_out,
+	const message * __unused m_in, int failed)
+{
+	vir_bytes readfds, writefds, errorfds;
+	int nfds;
+
+	put_result(proc);
+	if (failed)
+		return;
+
+	nfds = m_out->m_lc_vfs_select.nfds;
+
+	readfds = (vir_bytes)m_out->m_lc_vfs_select.readfds;
+	writefds = (vir_bytes)m_out->m_lc_vfs_select.writefds;
+	errorfds = (vir_bytes)m_out->m_lc_vfs_select.errorfds;
+
+	if (readfds == 0 && writefds == 0 && errorfds == 0)
+		return;
+
+	/* Omit names, because it looks weird. */
+	put_open(proc, NULL, PF_NONAME, "(", ", ");
+	if (readfds != 0)
+		put_fd_set(proc, "readfds", readfds, nfds);
+	if (writefds != 0)
+		put_fd_set(proc, "writefds", writefds, nfds);
+	if (errorfds != 0)
+		put_fd_set(proc, "errorfds", errorfds, nfds);
+	put_close(proc, ")");
+}
+
+static int
+vfs_fchdir_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_fd(proc, "fd", m_out->m_lc_vfs_fchdir.fd);
+
+	return CT_DONE;
+}
+
+static int
+vfs_fsync_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_fd(proc, "fd", m_out->m_lc_vfs_fsync.fd);
+
+	return CT_DONE;
+}
+
+static int
+vfs_truncate_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_buf(proc, "path", PF_PATH, m_out->m_lc_vfs_truncate.name,
+	    m_out->m_lc_vfs_truncate.len);
+	put_value(proc, "length", "%"PRId64, m_out->m_lc_vfs_truncate.offset);
+
+	return CT_DONE;
+}
+
+static int
+vfs_ftruncate_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_fd(proc, "fd", m_out->m_lc_vfs_truncate.fd);
+	put_value(proc, "length", "%"PRId64, m_out->m_lc_vfs_truncate.offset);
+
+	return CT_DONE;
+}
+
+static int
+vfs_fchmod_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_fd(proc, "fd", m_out->m_lc_vfs_fchmod.fd);
+	put_mode(proc, "mode", m_out->m_lc_vfs_fchmod.mode);
+
+	return CT_DONE;
+}
+
+static int
+vfs_fchown_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_fd(proc, "fd", m_out->m_lc_vfs_chown.fd);
+	/* -1 means "keep the current value" so print as signed */
+	put_value(proc, "owner", "%d", m_out->m_lc_vfs_chown.owner);
+	put_value(proc, "group", "%d", m_out->m_lc_vfs_chown.group);
+
+	return CT_DONE;
+}
+
+static const char *
+vfs_utimens_name(const message * m_out)
+{
+	int has_path, has_flags;
+
+	has_path = (m_out->m_vfs_utimens.name != NULL);
+	has_flags = (m_out->m_vfs_utimens.flags != 0);
+
+	if (has_path && m_out->m_vfs_utimens.flags == AT_SYMLINK_NOFOLLOW)
+		return "lutimens";
+	if (has_path && !has_flags)
+		return "utimens";
+	else if (!has_path && !has_flags)
+		return "futimens";
+	else
+		return "utimensat";
+}
+
+static const struct flags at_flags[] = {
+	FLAG(AT_EACCESS),
+	FLAG(AT_SYMLINK_NOFOLLOW),
+	FLAG(AT_SYMLINK_FOLLOW),
+	FLAG(AT_REMOVEDIR),
+};
+
+static void
+put_utimens_timespec(struct trace_proc * proc, const char * name,
+	time_t sec, long nsec)
+{
+
+	/* No field names. */
+	put_open(proc, name, PF_NONAME, "{", ", ");
+
+	put_time(proc, "tv_sec", sec);
+
+	if (!valuesonly && nsec == UTIME_NOW)
+		put_field(proc, "tv_nsec", "UTIME_NOW");
+	else if (!valuesonly && nsec == UTIME_OMIT)
+		put_field(proc, "tv_nsec", "UTIME_OMIT");
+	else
+		put_value(proc, "tv_nsec", "%ld", nsec);
+
+	put_close(proc, "}");
+}
+
+static int
+vfs_utimens_out(struct trace_proc * proc, const message * m_out)
+{
+	int has_path, has_flags;
+
+	/* Here we do not care about the utimens/lutimens distinction. */
+	has_path = (m_out->m_vfs_utimens.name != NULL);
+	has_flags = !!(m_out->m_vfs_utimens.flags & ~AT_SYMLINK_NOFOLLOW);
+
+	if (has_path && has_flags)
+		put_field(proc, "fd", "AT_CWD"); /* utimensat */
+	else if (!has_path)
+		put_fd(proc, "fd", m_out->m_vfs_utimens.fd); /* futimes */
+	if (has_path || has_flags) /* lutimes, utimes, utimensat */
+		put_buf(proc, "path", PF_PATH,
+		    (vir_bytes)m_out->m_vfs_utimens.name,
+		    m_out->m_vfs_utimens.len);
+
+	put_open(proc, "times", 0, "[", ", ");
+	put_utimens_timespec(proc, "atime", m_out->m_vfs_utimens.atime,
+	    m_out->m_vfs_utimens.ansec);
+	put_utimens_timespec(proc, "mtime", m_out->m_vfs_utimens.mtime,
+	    m_out->m_vfs_utimens.mnsec);
+	put_close(proc, "]");
+
+	if (has_flags)
+		put_flags(proc, "flag", at_flags, COUNT(at_flags), "0x%x",
+		    m_out->m_vfs_utimens.flags);
+
+	return CT_DONE;
+}
+
+static const struct flags statvfs_flags[] = {
+	FLAG(ST_WAIT),
+	FLAG(ST_NOWAIT),
+};
+
+static const struct flags st_flags[] = {
+	FLAG(ST_RDONLY),
+	FLAG(ST_SYNCHRONOUS),
+	FLAG(ST_NOEXEC),
+	FLAG(ST_NOSUID),
+	FLAG(ST_NODEV),
+	FLAG(ST_UNION),
+	FLAG(ST_ASYNC),
+	FLAG(ST_NOCOREDUMP),
+	FLAG(ST_RELATIME),
+	FLAG(ST_IGNORE),
+	FLAG(ST_NOATIME),
+	FLAG(ST_SYMPERM),
+	FLAG(ST_NODEVMTIME),
+	FLAG(ST_SOFTDEP),
+	FLAG(ST_LOG),
+	FLAG(ST_EXTATTR),
+	FLAG(ST_EXRDONLY),
+	FLAG(ST_EXPORTED),
+	FLAG(ST_DEFEXPORTED),
+	FLAG(ST_EXPORTANON),
+	FLAG(ST_EXKERB),
+	FLAG(ST_EXNORESPORT),
+	FLAG(ST_EXPUBLIC),
+	FLAG(ST_LOCAL),
+	FLAG(ST_QUOTA),
+	FLAG(ST_ROOTFS),
+	FLAG(ST_NOTRUNC),
+};
+
+static void
+put_struct_statvfs(struct trace_proc * proc, const char * name, int flags,
+	vir_bytes addr)
+{
+	struct statvfs buf;
+
+	if (!put_open_struct(proc, name, flags, addr, &buf, sizeof(buf)))
+		return;
+
+	put_flags(proc, "f_flag", st_flags, COUNT(st_flags), "0x%x",
+	    buf.f_flag);
+	put_value(proc, "f_bsize", "%lu", buf.f_bsize);
+	if (verbose > 0 || buf.f_bsize != buf.f_frsize)
+		put_value(proc, "f_frsize", "%lu", buf.f_frsize);
+	if (verbose > 1)
+		put_value(proc, "f_iosize", "%lu", buf.f_iosize);
+
+	put_value(proc, "f_blocks", "%"PRIu64, buf.f_blocks);
+	put_value(proc, "f_bfree", "%"PRIu64, buf.f_bfree);
+	if (verbose > 1) {
+		put_value(proc, "f_bavail", "%"PRIu64, buf.f_bavail);
+		put_value(proc, "f_bresvd", "%"PRIu64, buf.f_bresvd);
+	}
+
+	if (verbose > 0) {
+		put_value(proc, "f_files", "%"PRIu64, buf.f_files);
+		put_value(proc, "f_ffree", "%"PRIu64, buf.f_ffree);
+	}
+	if (verbose > 1) {
+		put_value(proc, "f_favail", "%"PRIu64, buf.f_favail);
+		put_value(proc, "f_fresvd", "%"PRIu64, buf.f_fresvd);
+	}
+
+	if (verbose > 1) {
+		put_value(proc, "f_syncreads", "%"PRIu64, buf.f_syncreads);
+		put_value(proc, "f_syncwrites", "%"PRIu64, buf.f_syncwrites);
+		put_value(proc, "f_asyncreads", "%"PRIu64, buf.f_asyncreads);
+		put_value(proc, "f_asyncwrites", "%"PRIu64, buf.f_asyncwrites);
+
+		put_value(proc, "f_fsidx", "<%"PRId32",%"PRId32">",
+		    buf.f_fsidx.__fsid_val[0], buf.f_fsidx.__fsid_val[1]);
+	}
+	put_dev(proc, "f_fsid", buf.f_fsid); /* MINIX3 interpretation! */
+
+	if (verbose > 0)
+		put_value(proc, "f_namemax", "%lu", buf.f_namemax);
+	if (verbose > 1)
+		put_value(proc, "f_owner", "%u", buf.f_owner);
+
+	put_buf(proc, "f_fstypename", PF_STRING | PF_LOCADDR,
+	    (vir_bytes)&buf.f_fstypename, sizeof(buf.f_fstypename));
+	if (verbose > 0)
+		put_buf(proc, "f_mntfromname", PF_STRING | PF_LOCADDR,
+		    (vir_bytes)&buf.f_mntfromname, sizeof(buf.f_mntfromname));
+	put_buf(proc, "f_mntonname", PF_STRING | PF_LOCADDR,
+	    (vir_bytes)&buf.f_mntonname, sizeof(buf.f_mntonname));
+
+	put_close_struct(proc, verbose > 1);
+}
+
+static void
+put_statvfs_array(struct trace_proc * proc, const char * name, int flags,
+	vir_bytes addr, int count)
+{
+	struct statvfs buf;
+	int i, max;
+
+	if ((flags & PF_FAILED) || valuesonly || count < 0) {
+		put_ptr(proc, name, addr);
+
+		return;
+	}
+
+	if (count == 0) {
+		put_field(proc, name, "[]");
+
+		return;
+	}
+
+	if (verbose == 0)
+		max = 0;
+	else if (verbose == 1)
+		max = 1; /* TODO: is this reasonable? */
+	else
+		max = INT_MAX;
+
+	if (max > count)
+		max = count;
+
+	for (i = 0; i < max; i++) {
+		if (mem_get_data(proc->pid, addr + i * sizeof(buf), &buf,
+		    sizeof(buf)) < 0) {
+			if (i == 0) {
+				put_ptr(proc, name, addr);
+
+				return;
+			}
+
+			break;
+		}
+
+		if (i == 0)
+			put_open(proc, name, PF_NONAME, "[", ", ");
+
+		put_struct_statvfs(proc, NULL, PF_LOCADDR, (vir_bytes)&buf);
+	}
+
+	if (i == 0)
+		put_open(proc, name, PF_NONAME, "[", ", ");
+	if (i < max)
+		put_tail(proc, 0, 0);
+	else if (count > i)
+		put_tail(proc, count, i);
+	put_close(proc, "]");
+}
+
+static int
+vfs_getvfsstat_out(struct trace_proc * proc, const message * m_out)
+{
+
+	if (m_out->m_lc_vfs_getvfsstat.buf == 0) {
+		put_ptr(proc, "buf", m_out->m_lc_vfs_getvfsstat.buf);
+		put_value(proc, "bufsize", "%zu",
+		    m_out->m_lc_vfs_getvfsstat.len);
+		put_flags(proc, "flags", statvfs_flags, COUNT(statvfs_flags),
+		    "%d", m_out->m_lc_vfs_getvfsstat.flags);
+		return CT_DONE;
+	} else
+		return CT_NOTDONE;
+}
+
+static void
+vfs_getvfsstat_in(struct trace_proc * proc, const message * m_out,
+	const message * m_in, int failed)
+{
+
+	if (m_out->m_lc_vfs_getvfsstat.buf != 0) {
+		put_statvfs_array(proc, "buf", failed,
+		    m_out->m_lc_vfs_getvfsstat.buf, m_in->m_type);
+		put_value(proc, "bufsize", "%zu",
+		    m_out->m_lc_vfs_getvfsstat.len);
+		put_flags(proc, "flags", statvfs_flags, COUNT(statvfs_flags),
+		    "%d", m_out->m_lc_vfs_getvfsstat.flags);
+		put_equals(proc);
+	}
+	put_result(proc);
+}
+
+static int
+vfs_statvfs1_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_buf(proc, "path", PF_PATH, m_out->m_lc_vfs_statvfs1.name,
+	    m_out->m_lc_vfs_statvfs1.len);
+
+	return CT_NOTDONE;
+}
+
+static void
+vfs_statvfs1_in(struct trace_proc * proc, const message * m_out,
+	const message * __unused m_in, int failed)
+{
+
+	put_struct_statvfs(proc, "buf", failed, m_out->m_lc_vfs_statvfs1.buf);
+	put_flags(proc, "flags", statvfs_flags, COUNT(statvfs_flags), "%d",
+	    m_out->m_lc_vfs_statvfs1.flags);
+	put_equals(proc);
+	put_result(proc);
+}
+
+/* This function is shared between statvfs1 and fstatvfs1. */
+static int
+vfs_fstatvfs1_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_fd(proc, "fd", m_out->m_lc_vfs_statvfs1.fd);
+
+	return CT_NOTDONE;
+}
+
+static int
+vfs_getrusage_out(struct trace_proc * __unused proc,
+	const message * __unused m_out)
+{
+
+	return CT_NOTDONE;
+}
+
+static void
+vfs_getrusage_in(struct trace_proc * proc, const message * m_out,
+	const message * __unused m_in, int failed)
+{
+	struct rusage buf;
+
+	/* Inline; we will certainly not be reusing this anywhere else. */
+	if (put_open_struct(proc, "rusage", failed,
+	    m_out->m_lc_vfs_rusage.addr, &buf, sizeof(buf))) {
+		/* Reason for hiding these two better: they're always zero. */
+		if (verbose > 1) {
+			put_value(proc, "ru_inblock", "%ld", buf.ru_inblock);
+			put_value(proc, "ru_oublock", "%ld", buf.ru_oublock);
+		}
+		if (verbose > 0) {
+			put_value(proc, "ru_ixrss", "%ld", buf.ru_ixrss);
+			put_value(proc, "ru_idrss", "%ld", buf.ru_idrss);
+			put_value(proc, "ru_isrss", "%ld", buf.ru_isrss);
+		}
+
+		put_close_struct(proc, verbose > 1);
+	}
+	put_equals(proc);
+	put_result(proc);
+}
+
+static int
+vfs_svrctl_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_ioctl_req(proc, "request", m_out->m_lc_svrctl.request,
+	    TRUE /*is_svrctl*/);
+	return put_ioctl_arg_out(proc, "arg", m_out->m_lc_svrctl.request,
+	    m_out->m_lc_svrctl.arg, TRUE /*is_svrctl*/);
+}
+
+static void
+vfs_svrctl_in(struct trace_proc * proc, const message * m_out,
+	const message * __unused m_in, int failed)
+{
+
+	put_ioctl_arg_in(proc, "arg", failed, m_out->m_lc_svrctl.request,
+	    m_out->m_lc_svrctl.arg, TRUE /*is_svrctl*/);
+}
+
+static int
+vfs_gcov_flush_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_ptr(proc, "buff", m_out->m_lc_vfs_gcov.buff_p);
+	put_value(proc, "buff_sz", "%zu", m_out->m_lc_vfs_gcov.buff_sz);
+	put_value(proc, "server_pid", "%d", m_out->m_lc_vfs_gcov.pid);
+
+	return CT_DONE;
+}
+
+#define VFS_CALL(c) [((VFS_ ## c) - VFS_BASE)]
+
+static const struct call_handler vfs_map[] = {
+	VFS_CALL(READ) = HANDLER("read", vfs_read_out, vfs_read_in),
+	VFS_CALL(WRITE) = HANDLER("write", vfs_write_out, default_in),
+	VFS_CALL(LSEEK) = HANDLER("lseek", vfs_lseek_out, vfs_lseek_in),
+	VFS_CALL(OPEN) = HANDLER("open", vfs_open_out, vfs_open_in),
+	VFS_CALL(CREAT) = HANDLER("open", vfs_creat_out, vfs_open_in),
+	VFS_CALL(CLOSE) = HANDLER("close", vfs_close_out, default_in),
+	VFS_CALL(LINK) = HANDLER("link", vfs_link_out, default_in),
+	VFS_CALL(UNLINK) = HANDLER("unlink", vfs_path_out, default_in),
+	VFS_CALL(CHDIR) = HANDLER("chdir", vfs_path_out, default_in),
+	VFS_CALL(MKDIR) = HANDLER("mkdir", vfs_path_mode_out, default_in),
+	VFS_CALL(MKNOD) = HANDLER("mknod", vfs_mknod_out, default_in),
+	VFS_CALL(CHMOD) = HANDLER("chmod", vfs_path_mode_out, default_in),
+	VFS_CALL(CHOWN) = HANDLER("chown", vfs_chown_out, default_in),
+	VFS_CALL(MOUNT) = HANDLER("mount", vfs_mount_out, default_in),
+	VFS_CALL(UMOUNT) = HANDLER("umount", vfs_umount_out, vfs_umount_in),
+	VFS_CALL(ACCESS) = HANDLER("access", vfs_access_out, default_in),
+	VFS_CALL(SYNC) = HANDLER("sync", default_out, default_in),
+	VFS_CALL(RENAME) = HANDLER("rename", vfs_link_out, default_in),
+	VFS_CALL(RMDIR) = HANDLER("rmdir", vfs_path_out, default_in),
+	VFS_CALL(SYMLINK) = HANDLER("symlink", vfs_link_out, default_in),
+	VFS_CALL(READLINK) = HANDLER("readlink", vfs_readlink_out,
+	    vfs_readlink_in),
+	VFS_CALL(STAT) = HANDLER("stat", vfs_stat_out, vfs_stat_in),
+	VFS_CALL(FSTAT) = HANDLER("fstat", vfs_fstat_out, vfs_fstat_in),
+	VFS_CALL(LSTAT) = HANDLER("lstat", vfs_stat_out, vfs_stat_in),
+	VFS_CALL(IOCTL) = HANDLER("ioctl", vfs_ioctl_out, vfs_ioctl_in),
+	VFS_CALL(FCNTL) = HANDLER("fcntl", vfs_fcntl_out, vfs_fcntl_in),
+	VFS_CALL(PIPE2) = HANDLER("pipe2", vfs_pipe2_out, vfs_pipe2_in),
+	VFS_CALL(UMASK) = HANDLER("umask", vfs_umask_out, vfs_umask_in),
+	VFS_CALL(CHROOT) = HANDLER("chroot", vfs_path_out, default_in),
+	VFS_CALL(GETDENTS) = HANDLER("getdents", vfs_getdents_out,
+	    vfs_getdents_in),
+	VFS_CALL(SELECT) = HANDLER("select", vfs_select_out, vfs_select_in),
+	VFS_CALL(FCHDIR) = HANDLER("fchdir", vfs_fchdir_out, default_in),
+	VFS_CALL(FSYNC) = HANDLER("fsync", vfs_fsync_out, default_in),
+	VFS_CALL(TRUNCATE) = HANDLER("truncate", vfs_truncate_out, default_in),
+	VFS_CALL(FTRUNCATE) = HANDLER("ftruncate", vfs_ftruncate_out,
+	    default_in),
+	VFS_CALL(FCHMOD) = HANDLER("fchmod", vfs_fchmod_out, default_in),
+	VFS_CALL(FCHOWN) = HANDLER("fchown", vfs_fchown_out, default_in),
+	VFS_CALL(UTIMENS) = HANDLER_NAME(vfs_utimens_name, vfs_utimens_out,
+	    default_in),
+	VFS_CALL(GETVFSSTAT) = HANDLER("getvfsstat", vfs_getvfsstat_out,
+	    vfs_getvfsstat_in),
+	VFS_CALL(STATVFS1) = HANDLER("statvfs1", vfs_statvfs1_out,
+	    vfs_statvfs1_in),
+	VFS_CALL(FSTATVFS1) = HANDLER("fstatvfs1", vfs_fstatvfs1_out,
+	    vfs_statvfs1_in),
+	VFS_CALL(GETRUSAGE) = HANDLER("vfs_getrusage", vfs_getrusage_out,
+	    vfs_getrusage_in),
+	VFS_CALL(SVRCTL) = HANDLER("vfs_svrctl", vfs_svrctl_out,
+	    vfs_svrctl_in),
+	VFS_CALL(GCOV_FLUSH) = HANDLER("gcov_flush", vfs_gcov_flush_out,
+	    default_in),
+};
+
+const struct calls vfs_calls = {
+	.endpt = VFS_PROC_NR,
+	.base = VFS_BASE,
+	.map = vfs_map,
+	.count = COUNT(vfs_map)
+};
diff --git a/minix/usr.bin/trace/service/vm.c b/minix/usr.bin/trace/service/vm.c
new file mode 100644
index 000000000..43d54239c
--- /dev/null
+++ b/minix/usr.bin/trace/service/vm.c
@@ -0,0 +1,135 @@
+
+#include "inc.h"
+
+#include <sys/mman.h>
+#include <sys/resource.h>
+
+static int
+vm_brk_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_ptr(proc, "addr", (vir_bytes)m_out->m_lc_vm_brk.addr);
+
+	return CT_DONE;
+}
+
+static const struct flags mmap_prot[] = {
+	FLAG_ZERO(PROT_NONE),
+	FLAG(PROT_READ),
+	FLAG(PROT_WRITE),
+	FLAG(PROT_EXEC),
+};
+
+static const struct flags mmap_flags[] = {
+	FLAG(MAP_SHARED),
+	FLAG(MAP_PRIVATE),
+	FLAG(MAP_FIXED),
+	FLAG(MAP_RENAME),
+	FLAG(MAP_NORESERVE),
+	FLAG(MAP_INHERIT),
+	FLAG(MAP_HASSEMAPHORE),
+	FLAG(MAP_TRYFIXED),
+	FLAG(MAP_WIRED),
+	FLAG_MASK(MAP_ANON | MAP_STACK, MAP_FILE),
+	FLAG(MAP_ANON),
+	FLAG(MAP_STACK),
+	FLAG(MAP_UNINITIALIZED),
+	FLAG(MAP_PREALLOC),
+	FLAG(MAP_CONTIG),
+	FLAG(MAP_LOWER16M),
+	FLAG(MAP_LOWER1M),
+	FLAG(MAP_THIRDPARTY),
+	/* TODO: interpret alignments for which there is no constant */
+	FLAG_MASK(MAP_ALIGNMENT_MASK, MAP_ALIGNMENT_64KB),
+	FLAG_MASK(MAP_ALIGNMENT_MASK, MAP_ALIGNMENT_16MB),
+	FLAG_MASK(MAP_ALIGNMENT_MASK, MAP_ALIGNMENT_4GB),
+	FLAG_MASK(MAP_ALIGNMENT_MASK, MAP_ALIGNMENT_1TB),
+	FLAG_MASK(MAP_ALIGNMENT_MASK, MAP_ALIGNMENT_256TB),
+	FLAG_MASK(MAP_ALIGNMENT_MASK, MAP_ALIGNMENT_64PB),
+};
+
+static int
+vm_mmap_out(struct trace_proc * proc, const message * m_out)
+{
+
+	if (m_out->m_mmap.flags & MAP_THIRDPARTY)
+		put_endpoint(proc, "forwhom", m_out->m_mmap.forwhom);
+	put_ptr(proc, "addr", (vir_bytes)m_out->m_mmap.addr);
+	put_value(proc, "len", "%zu", m_out->m_mmap.len);
+	put_flags(proc, "prot", mmap_prot, COUNT(mmap_prot), "0x%x",
+	    m_out->m_mmap.prot);
+	put_flags(proc, "flags", mmap_flags, COUNT(mmap_flags), "0x%x",
+	    m_out->m_mmap.flags);
+	put_fd(proc, "fd", m_out->m_mmap.fd);
+	put_value(proc, "offset", "%"PRId64, m_out->m_mmap.offset);
+
+	return CT_DONE;
+}
+
+static void
+vm_mmap_in(struct trace_proc * proc, const message * __unused m_out,
+	const message * m_in, int failed)
+{
+
+	if (!failed)
+		put_ptr(proc, NULL, (vir_bytes)m_in->m_mmap.retaddr);
+	else
+		/* TODO: consider printing MAP_FAILED in the right cases */
+		put_result(proc);
+}
+
+static int
+vm_munmap_out(struct trace_proc * proc, const message * m_out)
+{
+
+	put_ptr(proc, "addr", (vir_bytes)m_out->m_mmap.addr);
+	put_value(proc, "len", "%zu", m_out->m_mmap.len);
+
+	return CT_DONE;
+}
+
+static int
+vm_getrusage_out(struct trace_proc * __unused proc,
+	const message * __unused m_out)
+{
+
+	return CT_NOTDONE;
+}
+
+static void
+vm_getrusage_in(struct trace_proc * proc, const message * m_out,
+	const message * __unused m_in, int failed)
+{
+	struct rusage buf;
+
+	/* Inline; we will certainly not be reusing this anywhere else. */
+	if (put_open_struct(proc, "rusage", failed,
+	    m_out->m_lc_vm_rusage.addr, &buf, sizeof(buf))) {
+		if (verbose > 0) {
+			put_value(proc, "ru_maxrss", "%ld", buf.ru_maxrss);
+			put_value(proc, "ru_minflt", "%ld", buf.ru_minflt);
+			put_value(proc, "ru_majflt", "%ld", buf.ru_majflt);
+		}
+
+		put_close_struct(proc, verbose > 0);
+	}
+	put_equals(proc);
+	put_result(proc);
+}
+
+#define VM_CALL(c) [((VM_ ## c) - VM_RQ_BASE)]
+
+static const struct call_handler vm_map[] = {
+	VM_CALL(BRK) = HANDLER("brk", vm_brk_out, default_in),
+	VM_CALL(MMAP) = HANDLER("mmap", vm_mmap_out, vm_mmap_in),
+	VM_CALL(MUNMAP) = HANDLER("munmap", vm_munmap_out, default_in),
+	VM_CALL(GETRUSAGE) = HANDLER("vm_getrusage", vm_getrusage_out,
+	    vm_getrusage_in),
+};
+
+const struct calls vm_calls = {
+	.endpt = VM_PROC_NR,
+	.base = VM_RQ_BASE,
+	.map = vm_map,
+	.count = COUNT(vm_map)
+};
diff --git a/minix/usr.bin/trace/signal.awk b/minix/usr.bin/trace/signal.awk
new file mode 100644
index 000000000..84c54b80c
--- /dev/null
+++ b/minix/usr.bin/trace/signal.awk
@@ -0,0 +1,32 @@
+# This one is a bit trickier than error.awk, because sys/signal.h is not as
+# easy to parse.  We currently assume that all (userland) signals are listed
+# before the first reference to "_KERNEL", and anything else that looks like a
+# signal definition (but isn't) is after that first reference.
+
+BEGIN {
+	printf("/* This file is automatically generated by signal.awk */\n\n");
+	printf("#include \"inc.h\"\n\n");
+	printf("static const char *const signals[] = {\n");
+}
+/^#define/ {
+	name = $2;
+	if (!match(name, "SIG[^_]"))
+		next;
+	number = $3;
+	if (number < 0 || number == "SIGABRT")
+		next;
+	printf("\t[%s] = \"%s\",\n", name, name);
+}
+/_KERNEL/ {
+	exit;
+}
+END {
+	printf("};\n\n");
+	printf("const char *\nget_signal_name(int sig)\n{\n\n");
+	printf("\tif (sig >= 0 && sig < sizeof(signals) / sizeof(signals[0]) &&\n");
+	printf("\t    signals[sig] != NULL)\n");
+	printf("\t\treturn signals[sig];\n");
+	printf("\telse\n");
+	printf("\t\treturn NULL;\n");
+	printf("}\n");
+}
diff --git a/minix/usr.bin/trace/trace.1 b/minix/usr.bin/trace/trace.1
new file mode 100644
index 000000000..239fa9710
--- /dev/null
+++ b/minix/usr.bin/trace/trace.1
@@ -0,0 +1,334 @@
+.Dd November 2, 2014
+.Dt TRACE 1
+.Os
+.Sh NAME
+.Nm trace
+.Nd print process system calls and signals
+.Sh SYNOPSIS
+.Nm
+.Op Fl fgNsVv
+.Op Fl o Ar file
+.Op Fl p Ar pid
+.Op Ar command
+.Sh DESCRIPTION
+The
+.Nm
+utility shows one or more processes to be traced.
+For each traced process,
+.Nm
+prints the system calls the process makes and the signals
+it receives.
+The user can let
+.Nm
+start a
+.Ar command
+to be traced, and/or attach to one or more existing processes.
+.Pp
+The utility will run until no processes are left to trace, or until the user
+presses the interrupt key (typically Ctrl-C).
+Pressing this key once will cause all attached processes to be detached, with
+the hope that the command that was started will also terminate cleanly from the
+interruption.
+Pressing the interrupt key once more kills the command that was started.
+.Pp
+The following options are available:
+.Bl -tag -width XoXfileXX
+.It Fl f
+Follow forks.
+Attach automatically to forked child processes.
+Child processes of the started command will be treated as attached processes,
+in that upon Ctrl-C presses they will be detached rather than killed.
+.It Fl g
+Enable call grouping.
+With this option, the tracing engine tries to reduce noise from call preemption
+by first polling the process that was active last.
+This should reduce in cleaner output, but may also cause a single process to be
+scheduled repeatedly and thus cause starvation.
+.It Fl N
+Print all names.
+By default, the most structure fields are printed with their name.
+This option enables printing of all available names, which also includes
+system call parameter names.
+This flag may be useful to figure out the meaning of a parameter, and for
+automatic processing of the output.
+.It Fl s
+Print stack traces.
+Each system call, and each signal arriving outside a system call, will be
+preceded by a line showing the process's current stack trace.
+For signals blocked by the target process, the stack trace may not be
+meaningful.
+Stack traces may not be supported on all platforms.
+.It Fl V
+Print values only.
+If this flag is given once, numerical values will be printed instead of
+string constants.
+In addition, if it is given twice, the addresses of structures will be printed
+instead of their contents.
+.It Fl v
+Increase verbosity.
+By default, the output will be terse, in that not all structure fields are
+shown, and strings and arrays are not always printed in full.
+If this flag is provided once, more and longer output will be printed.
+If it is provided twice, the tracer will print as much as possible.
+.It Fl o Ar file
+Redirect output.
+By default, the output is sent to standard error.
+With this option, the output is written to the given
+.Ar file
+instead.
+.It Fl p Ar pid
+Attach to a process.
+This option makes
+.Nm
+attach to an existing process with process ID
+.Ar pid .
+This option may be used multiple times.
+When attaching to one or more processes this way, starting a command becomes
+optional.
+.El
+.Pp
+If the user presses the information key (typically Ctrl-T), the list of traced
+process along with their current status will be printed.
+.Sh OUTPUT FORMAT
+System calls are printed with the following general output format:
+.Bd -literal -offset indent
+.Sy name Ns ( Ns Sy parameters Ns ) = Sy result
+.Ed
+.Pp
+Other informational lines may be printed about the status of the process.
+These lines typically start with an uppercase letter, while system calls
+always start with a lowercase letter or an underscore.
+The following example shows the tracer output for a program that prints its
+own user ID:
+.Bd -literal -offset indent
+Tracing printuid (pid 12685)
+minix_getinfo() = 0
+getuid() = 0 (euid=1)
+write(1, "My uid: 0\en", 10) = 10
+exit(0)
+Process exited normally with code 0
+.Ed
+.Pp
+The first and last lines of the output provide status information about the
+traced process.
+Some calls return multiple results; extended results are printed in parentheses
+after the primary call result, typically in
+.Va name Ns = Ns Va value
+format for clarity.
+System calls that do not return on success, such as
+.Fn exit ,
+are printed without the equals sign and result, unless they fail.
+System call failure is printed according to POSIX conventions; that is, the
+call is assumed to return -1 with the value of
+.Va errno
+printed in square brackets after it:
+.Bd -literal -offset indent
+setuid(0) = -1 [EPERM]
+.Ed
+.Pp
+If a system call ends up in an IPC-level failure, the -1 value will be preceded
+by an
+.Dq Li <ipc>
+string.
+However, this string will be omitted if the system call itself is printed at
+the IPC level (that is, as an
+.Fn ipc_sendrec
+call), generally because
+.Nm
+has no handler to print the actual system call.
+.Pp
+Signals are printed as they arrive at the traced process, using two asterisks
+on both side of the signal name.
+Signals may arrive both during and outside the execution of a system call:
+.Bd -literal -offset indent
+read(3, ** SIGUSR1 ** &0xeffff867, 4096) = -1 [EINTR]
+** SIGUSR2 **
+getpid() = 5278 (ppid=5277)
+kill(5278, SIGTERM) = ** SIGTERM ** <..>
+Process terminated from signal SIGTERM
+.Ed
+.Pp
+Multiple signals may be printed consecutively.
+The above example illustrates a few other important aspects of output
+formatting.
+Some call parameters may be printed only after the system call returns, in
+order to show their actual value.
+For the
+.Fn read
+call, this would be the bytes that were read.
+Upon failure, no bytes were read, so the buffer pointer is printed instead.
+Finally, if a call that is expected to return (here,
+.Fn kill )
+does not return before the process terminates, the line ends with a
+.Dq Li <..>
+marker.
+This is an instance of call preemption; more about that later.
+.Pp
+Pointers are printed with a
+.Sq Li &
+prefix, except for NULL, which is printed using its own name.
+In general, named constants are used instead of numerical constants wherever
+that makes sense.
+For pointers of which the address is not available, typically because its
+contents are passed by value,
+.Dq Li &..
+is shown instead.
+.Pp
+Data buffers are printed as double-quoted strings, using C-style character
+escaping for nontextual bytes.
+If either the verbosity level or a copy error prevents the whole data buffer
+from being printed, two dots will be printed after the closing quote.
+The same is done when printing a string buffer which does not have a null
+termination byte within its range.
+Path names are shown in full regardless of the verbosity level.
+.Pp
+Structures are printed as a set of structure fields enclosed in curly brackets.
+The
+.Va name Ns = Ns Va value
+format is used, unless printing names for that structure type would introduce
+too much noise and the
+.Dq print all names
+option is not given.
+For many structures, by default only a subset of their fields are printed.
+In this case, a
+.Dq Li ..
+entry is added at the end.
+In some cases, an attempt is made to print only the most useful fields:
+.Bd -literal -offset indent
+stat("/etc/motd", {st_mode=S_IFREG|0755, st_size=747, ..}) = 0
+stat("/dev/tty", {st_mode=S_IFCHR|0666, st_rdev=<5,0>, ..}) = 0
+.Ed
+.Pp
+As shown in the above example, flag fields are printed as a combination of
+named constants, separated by a
+.Sq Li |
+pipe symbol.
+Any leftover numerical bits are printed at the end.
+The example also shows the format in which major/minor pairs are printed for
+device numbers.
+This is a custom format; there are a few other custom formats throughout the
+.Nm
+output which are supposed to be sufficiently self-explanatory (and rare).
+.Pp
+Arrays are printed using square brackets.
+.Bd -literal -offset indent
+pipe2([3, 4], 0) = 0
+getdents(3, [..(45)], 4096) = 1824
+getdents(3, [{d_name="."}, ..(+44)], 4096) = 1824
+getdents(3, [], 4096) = 0
+.Ed
+.Pp
+If the array contents are not printed as per the settings for the verbosity
+level, a single pseudo-element shows how many actual elements were in the array
+(the second line in the example).
+If the number of printed elements is limited, a final pseudo-element shows how
+many additional elements were not printed (the third line in the example).
+If a copy error occurs while part of the array has been printed already, a
+last
+.Dq Li ..(?)
+pseudo-element is printed; for immediate failure, the array's pointer is shown.
+Empty arrays will be printed as
+.Dq Li [] .
+.Pp
+Bit sets are printed as arrays except with just a space and no comma as
+bit separator, closely following the output format of
+.Nm Ns 's
+original inspiration
+.Sy strace .
+For signal sets in particular, an inverted bit set may be shown, thus printing
+only the bits which are not set; such sets are prefixed with a
+.Sq Li ~
+to the opening bracket:
+.Bd -literal -offset indent
+sigprocmask(SIG_SETMASK, ~[USR1 USR2], []) = 0
+.Ed
+.Pp
+Note how the
+.Dq Li SIG
+prefixes are omitted for brevity in this case.
+.Pp
+When multiple processes are traced at once, each line will have a prefix that
+shows the PID of the corresponding process.
+When the number of processes drops to one again, one more line is prefixed with
+the PID of the remaining process, but using a
+.Sq Li '
+instead of a
+.Sq Li |
+symbol:
+.Bd -literal -offset indent
+fork() = 25813
+25813| Tracing test*F (pid 25813)
+25813| fork() = 0
+25812| waitpid(-1, &.., WNOHANG) = 0
+25813| exit(1)
+25813| Process exited normally with code 1
+25812' waitpid(-1, W_EXITED(1), WNOHANG) = 25813
+exit(0)
+Process exited normally with code 0
+.Ed
+.Pp
+If a process is preempted while making a system call, the system call will
+be shown as suspended with the
+.Dq Li <..>
+suffix.
+Later, when the system call is resumed, the output so far will be repeated,
+either in full or (due to memory limitations) with
+.Dq Li <..>
+in its body, before the remaining part of the system call is printed.
+This time, the line will have a
+.Sq Li *
+asterisk in its prefix, to indicate that this is not a new system call:
+.Bd -literal -offset indent
+25812| write(1, "test\en", 5) = <..>
+25813| setuid(0) = 0
+25812|*write(1, "test\en", 5) = 5
+.Ed
+.Pp
+Finally,
+.Nm
+prints three dashes on their own line whenever the process context (program
+counter and/or stack pointer) is changed during a system call.
+This feature intends to help identify blocks of code run from signal handlers.
+The following example shows a SIGALRM signal handler being invoked.
+.Bd -literal -offset indent
+sigsuspend([]) = ** SIGALRM ** -1 [EINTR]
+---
+sigprocmask(SIG_SETMASK, ~[], [ALRM]) = 0
+sigreturn({sc_mask=[], ..})
+---
+exit(0)
+.Ed
+.Pp
+However, the three dashes are not printed when a signal handler is invoked
+while the program is not in a system call, because the tracer does not see such
+invocations.
+It is however also printed for successful
+.Fn execve
+calls.
+.Sh DIAGNOSTICS
+.Ex
+.Sh SEE ALSO
+.Xr ptrace 2
+.Sh AUTHORS
+The
+.Nm
+utility was written by
+.An David van Moolenbroek
+.Aq david@minix3.org .
+.Sh BUGS
+While the utility aims to provide output for all system calls that can possibly
+be made by user programs, output printers for a small number of rarely-used
+structures and IOCTLs are still missing.  In such cases, plain pointers will be
+printed instead of actual contents.
+.Pp
+A signal arrives at the tracing process when sent to the target process, even
+when the target process is blocking the signal and will thus receive it later.
+This is a limitation of the ptrace infrastructure, although it does ensure that
+a target process is not able to block signals generated for tracing purposes.
+The result is that signals are not always shown at the time that they are
+taken in by the target process, and that stack traces for signals may be off.
+.Pp
+Attaching to system services is currently not supported, due to limitations of
+the ptrace infrastructure.  The
+.Nm
+utility will detect and safely detach from system services, though.
diff --git a/minix/usr.bin/trace/trace.c b/minix/usr.bin/trace/trace.c
new file mode 100644
index 000000000..32ae37343
--- /dev/null
+++ b/minix/usr.bin/trace/trace.c
@@ -0,0 +1,817 @@
+/* trace(1) - the MINIX3 system call tracer - by D.C. van Moolenbroek */
+
+#include "inc.h"
+
+#include <signal.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <err.h>
+
+/* Global variables, used only for a subset of the command line options. */
+int allnames;		 /* FALSE = structure field names, TRUE = all names */
+unsigned int valuesonly; /* 0 = normal, 1 = no symbols, 2 = no structures */
+unsigned int verbose;	 /* 0 = essentials, 1 = elaborate, 2 = everything */
+
+/* Local variables, for signal handling. */
+static int got_signal, got_info;
+
+/*
+ * Signal handler for signals that are supposed to make us terminate.  Let the
+ * main loop do the actual work, since it might be in the middle of processing
+ * a process status change right now.
+ */
+static void
+sig_handler(int __unused sig)
+{
+
+	got_signal = TRUE;
+
+}
+
+/*
+ * Signal handler for the SIGINFO signal.  Let the main loop report on all
+ * processes currenty being traced.  Since SIGINFO is sent to the current
+ * process group, traced children may get the signal as well.  This is both
+ * intentional and impossible to prevent.
+ */
+static void
+info_handler(int __unused sig)
+{
+
+	got_info = TRUE;
+}
+
+/*
+ * Print a list of traced processes and their call status.  We must not
+ * interfere with actual process output, so perform out-of-band printing
+ * (with info lines rather than lines prefixed by each process's PID).
+ */
+static void
+list_info(void)
+{
+	struct trace_proc *proc;
+	int no_call, in_call;
+
+	put_newline();
+
+	for (proc = proc_next(NULL); proc != NULL; proc = proc_next(proc)) {
+		/*
+		 * When attaching to an existing process, there is no way to
+		 * find out whether the process is in a system call or not.
+		 */
+		no_call = (proc->trace_flags & TF_NOCALL);
+		in_call = (proc->trace_flags & TF_INCALL);
+		assert(!in_call || !no_call);
+
+		put_fmt(NULL, "Tracing %s (pid %d), %s%s%s", proc->name,
+		    proc->pid, no_call ? "call status unknown" :
+		    (in_call ? "in a " : "not in a call"),
+		    in_call ? call_name(proc) : "",
+		    in_call ? " call" : "");
+		put_newline();
+	}
+}
+
+/*
+ * Either we have just started or attached to the given process, it the process
+ * has performed a successful execve() call.  Obtain the new process name, and
+ * print a banner for it.
+ */
+static void
+new_exec(struct trace_proc * proc)
+{
+
+	/* Failure to obtain the process name is worrisome, but not fatal.. */
+	if (kernel_get_name(proc->pid, proc->name, sizeof(proc->name)) < 0)
+		strlcpy(proc->name, "<unknown>", sizeof(proc->name));
+
+	put_newline();
+	put_fmt(proc, "Tracing %s (pid %d)", proc->name, proc->pid);
+	put_newline();
+}
+
+/*
+ * We have started or attached to a process.  Set the appropriate flags, and
+ * print a banner showing that we are now tracing it.
+ */
+static void
+new_proc(struct trace_proc * proc, int follow_fork)
+{
+	int fl;
+
+	/* Set the desired tracing options. */
+	fl = TO_ALTEXEC;
+	if (follow_fork) fl |= TO_TRACEFORK;
+
+	(void)ptrace(T_SETOPT, proc->pid, 0, fl);
+
+	/*
+	 * When attaching to an arbitrary process, this process might be in the
+	 * middle of an execve().  Now that we have enabled TO_ALTEXEC, we may
+	 * now get a SIGSTOP signal next.  Guard against this by marking the
+	 * first system call as a possible execve().
+	 */
+	if ((proc->trace_flags & (TF_ATTACH | TF_STOPPING)) == TF_ATTACH)
+		proc->trace_flags |= TF_EXEC;
+
+	new_exec(proc);
+}
+
+/*
+ * A process has terminated or is being detached.  Print the resulting status.
+ */
+static void
+discard_proc(struct trace_proc * proc, int status)
+{
+	const char *signame;
+
+	/*
+	 * The exit() calls are of type no-return, meaning they are expected
+	 * not to return.  However, calls of this type may in fact return an
+	 * error, in which case the error must be printed.  Thus, such calls
+	 * are not actually finished until the end of the call-leave phase.
+	 * For exit() calls, a successful call will never get to the call-leave
+	 * phase.  The result is that such calls will end up being shown as
+	 * suspended, which is unintuitive.  To counter this, we pretend that a
+	 * clean process exit is in fact preceded by a call-leave event, thus
+	 * allowing the call to be printed without suspension.  An example:
+	 *
+	 *        3| exit(0) <..>
+	 *        2| setsid() = 2
+	 * [A]    3| exit(0)
+	 *        3| Process exited normally with code 0
+	 *
+	 * The [A] line is the result of the following code.
+	 */
+	if (WIFEXITED(status) && (proc->trace_flags & TF_INCALL))
+		call_leave(proc, TRUE /*skip*/);
+
+	put_newline();
+	if (WIFEXITED(status)) {
+		put_fmt(proc, "Process exited normally with code %d",
+		    WEXITSTATUS(status));
+	} else if (WIFSIGNALED(status)) {
+		if ((signame = get_signal_name(WTERMSIG(status))) != NULL)
+			put_fmt(proc, "Process terminated from signal %s",
+			    signame);
+		else
+			put_fmt(proc, "Process terminated from signal %d",
+			    WTERMSIG(status));
+	} else if (WIFSTOPPED(status))
+		put_text(proc, "Process detached");
+	else
+		put_fmt(proc, "Bogus wait result (%04x)", status);
+	put_newline();
+
+	proc_del(proc);
+}
+
+/*
+ * The given process has been stopped on a system call, either entering or
+ * leaving that call.
+ */
+static void
+handle_call(struct trace_proc * proc, int show_stack)
+{
+	reg_t pc, sp;
+	int class, skip, new_ctx;
+
+	proc->trace_flags &= ~TF_NOCALL;
+
+	if (proc->trace_flags & TF_SKIP) {
+		/* Skip the call leave phase after a successful execve(). */
+		proc->trace_flags &= ~(TF_INCALL | TF_SKIP);
+	} else if (!(proc->trace_flags & TF_INCALL)) {
+		/*
+		 * The call_enter call returns the class of the call:
+		 * TC_NORMAL, TC_EXEC, or TC_SIGRET.  TC_EXEC means that an
+		 * execve() call is being performed.  This means that if a
+		 * SIGSTOP follows for the current process, the process has
+		 * successfully started a different executable.  TC_SIGRET
+		 * means that if successful, the call will have a bogus return
+		 * value.  TC_NORMAL means that the call requires no exception.
+		 */
+		class = call_enter(proc, show_stack);
+
+		switch (class) {
+		case TC_NORMAL:
+			break;
+		case TC_EXEC:
+			proc->trace_flags |= TF_EXEC;
+			break;
+		case TC_SIGRET:
+			proc->trace_flags |= TF_CTX_SKIP;
+			break;
+		default:
+			assert(0);
+		}
+
+		/* Save the current program counter and stack pointer. */
+		if (!kernel_get_context(proc->pid, &pc, &sp, NULL /*fp*/)) {
+			proc->last_pc = pc;
+			proc->last_sp = sp;
+		} else
+			proc->last_pc = proc->last_sp = 0;
+
+		proc->trace_flags |= TF_INCALL;
+	} else {
+		/*
+		 * Check if the program counter or stack pointer have changed
+		 * during the system call.  If so, this is a strong indication
+		 * that a sigreturn call has succeeded, and thus its result
+		 * must be skipped, since the result register will not contain
+		 * the result of the call.
+		 */
+		new_ctx = (proc->last_pc != 0 &&
+		    !kernel_get_context(proc->pid, &pc, &sp, NULL /*fp*/) &&
+		    (pc != proc->last_pc || sp != proc->last_sp));
+
+		skip = ((proc->trace_flags & TF_CTX_SKIP) && new_ctx);
+
+		call_leave(proc, skip);
+
+		/*
+		 * On such context changes, also print a short dashed line.
+		 * This helps in identifying signal handler invocations,
+		 * although it is not reliable for that purpose: no dashed line
+		 * will be printed if a signal handler is invoked while the
+		 * process is not making a system call.
+		 */
+		if (new_ctx) {
+			put_text(proc, "---");
+			put_newline();
+		}
+
+		proc->trace_flags &= ~(TF_INCALL | TF_CTX_SKIP | TF_EXEC);
+	}
+}
+
+/*
+ * The given process has received the given signal.  Report the receipt.  Due
+ * to the way that signal handling with traced processes works, the signal may
+ * in fact be delivered to the process much later, or never--a problem inherent
+ * to the way signals are handled in PM right now (namely, deferring signal
+ * delivery would let the traced process block signals meant for the tracer).
+ */
+static void
+report_signal(struct trace_proc * proc, int sig, int show_stack)
+{
+	const char *signame;
+
+	/*
+	 * Print a stack trace only if we are not in a call; otherwise, we
+	 * would simply get the same stack trace twice and mess up the output
+	 * in the process, because call suspension is not expected if we are
+	 * tracing a single process only.
+	 * FIXME: the check should be for whether we actually print the call..
+	 */
+	if (show_stack && !(proc->trace_flags & TF_INCALL))
+		kernel_put_stacktrace(proc);
+
+	/*
+	 * If this process is in the middle of a call, the signal will be
+	 * printed within the call.  This will always happen on the call split,
+	 * that is, between the call's entering (out) and leaving (in) phases.
+	 * This also means that the recording of the call-enter phase may be
+	 * replayed more than once, and the call may be suspended more than
+	 * once--after all, a signal is not necessarily followed immediately
+	 * by the call result.  If the process is not in the middle of a call,
+	 * the signal will end up on a separate line.  In both cases, multiple
+	 * consecutive signals may be printed right after one another.  The
+	 * following scenario shows a number of possible combinations:
+	 *
+	 *       2| foo(<..>
+	 *       3| ** SIGHUP ** ** SIGUSR1 **
+	 *       3| bar() = <..>
+	 *       2|*foo(** SIGUSR1 ** ** SIGUSR2 ** <..>
+	 *       3|*bar() = ** SIGCHLD ** 0
+	 *       2|*foo(** SIGINT ** &0xef852000) = -1 [EINTR]
+	 *       3| kill(3, SIGTERM) = ** SIGTERM ** <..>
+	 *       3| Process terminated from signal SIGTERM
+	 */
+
+	call_replay(proc);
+
+	if (!valuesonly && (signame = get_signal_name(sig)) != NULL)
+		put_fmt(proc, "** %s **", signame);
+	else
+		put_fmt(proc, "** SIGNAL %d **", sig);
+
+	put_space(proc);
+
+	output_flush();
+}
+
+/*
+ * Wait for the given process ID to stop on the given signal.  Upon success,
+ * the function will return zero.  Upon failure, it will return -1, and errno
+ * will be either set to an error code, or to zero in order to indicate that
+ * the process exited instead.
+ */
+static int
+wait_sig(pid_t pid, int sig)
+{
+	int status;
+
+	for (;;) {
+		if (waitpid(pid, &status, 0) == -1) {
+			if (errno == EINTR) continue;
+
+			return -1;
+		}
+
+		if (!WIFSTOPPED(status)) {
+			/* The process terminated just now. */
+			errno = 0;
+
+			return -1;
+		}
+
+		if (WSTOPSIG(status) == sig)
+			break;
+
+		(void)ptrace(T_RESUME, pid, 0, WSTOPSIG(status));
+	}
+
+	return 0;
+}
+
+/*
+ * Attach to the given process, and wait for the resulting SIGSTOP signal.
+ * Other signals may arrive first; we pass these on to the process without
+ * reporting them, thus logically modelling them as having arrived before we
+ * attached to the process.  The process might also exit in the meantime,
+ * typically as a result of a lethal signal; following the same logical model,
+ * we pretend the process did not exist in the first place.  Since the SIGSTOP
+ * signal will be pending right after attaching to the process, this procedure
+ * will never block.
+ */
+static int
+attach(pid_t pid)
+{
+
+	if (ptrace(T_ATTACH, pid, 0, 0) != 0) {
+		warn("Unable to attach to pid %d", pid);
+
+		return -1;
+	}
+
+	if (wait_sig(pid, SIGSTOP) != 0) {
+		/* If the process terminated, report it as not found. */
+		if (errno == 0)
+			errno = ESRCH;
+
+		warn("Unable to attach to pid %d", pid);
+
+		return -1;
+	}
+
+	/* Verify that we can read values from the kernel at all. */
+	if (kernel_check(pid) == FALSE) {
+		(void)ptrace(T_DETACH, pid, 0, 0);
+
+		warnx("Kernel magic check failed, recompile trace(1)");
+
+		return -1;
+	}
+
+	/*
+	 * System services are managed by RS, which prevents them from
+	 * being traced properly by PM.  Attaching to a service could
+	 * therefore cause problems, so we should detach immediately.
+	 */
+	if (kernel_is_service(pid) == TRUE) {
+		(void)ptrace(T_DETACH, pid, 0, 0);
+
+		warnx("Cannot attach to system services!");
+
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * Detach from all processes, knowning that they were all processes to which we
+ * attached explicitly (i.e., not started by us) and are all currently stopped.
+ */
+static void
+detach_stopped(void)
+{
+	struct trace_proc *proc;
+
+	for (proc = proc_next(NULL); proc != NULL; proc = proc_next(proc))
+		(void)ptrace(T_DETACH, proc->pid, 0, 0);
+}
+
+/*
+ * Start detaching from all processes to which we previously attached.  The
+ * function is expected to return before detaching is completed, and the caller
+ * must deal with the new situation appropriately.  Do not touch any processes
+ * started by us (to allow graceful termination), unless force is set, in which
+ * case those processes are killed.
+ */
+static void
+detach_running(int force)
+{
+	struct trace_proc *proc;
+
+	for (proc = proc_next(NULL); proc != NULL; proc = proc_next(proc)) {
+		if (proc->trace_flags & TF_ATTACH) {
+			/* Already detaching?  Then do nothing. */
+			if (proc->trace_flags & TF_DETACH)
+				continue;
+
+			if (!(proc->trace_flags & TF_STOPPING))
+				(void)kill(proc->pid, SIGSTOP);
+
+			proc->trace_flags |= TF_DETACH | TF_STOPPING;
+		} else {
+			/*
+			 * The child processes may be ignoring SIGINTs, so upon
+			 * the second try, force them to terminate.
+			 */
+			if (force)
+				(void)kill(proc->pid, SIGKILL);
+		}
+	}
+}
+
+/*
+ * Print command usage.
+ */
+static void __dead
+usage(void)
+{
+
+	(void)fprintf(stderr, "usage: %s [-fgNsVv] [-o file] [-p pid] "
+	    "[command]\n", getprogname());
+
+	exit(EXIT_FAILURE);
+}
+
+/*
+ * The main function of the system call tracer.
+ */
+int
+main(int argc, char * argv[])
+{
+	struct trace_proc *proc;
+	const char *output_file;
+	int status, sig, follow_fork, show_stack, grouping, first_signal;
+	pid_t pid, last_pid;
+	int c, error;
+
+	setprogname(argv[0]);
+
+	proc_init();
+
+	follow_fork = FALSE;
+	show_stack = FALSE;
+	grouping = FALSE;
+	output_file = NULL;
+
+	allnames = FALSE;
+	verbose = 0;
+	valuesonly = 0;
+
+	while ((c = getopt(argc, argv, "fgNsVvo:p:")) != -1) {
+		switch (c) {
+		case 'f':
+			follow_fork = TRUE;
+			break;
+		case 'g':
+			grouping = TRUE;
+			break;
+		case 'N':
+			allnames = TRUE;
+			break;
+		case 's':
+			show_stack = TRUE;
+			break;
+		case 'V':
+			valuesonly++;
+			break;
+		case 'v':
+			verbose++;
+			break;
+		case 'o':
+			output_file = optarg;
+			break;
+		case 'p':
+			pid = atoi(optarg);
+			if (pid <= 0)
+				usage();
+
+			if (proc_get(pid) == NULL && proc_add(pid) == NULL)
+				err(EXIT_FAILURE, NULL);
+
+			break;
+		default:
+			usage();
+		}
+	}
+
+	argv += optind;
+	argc -= optind;
+
+	first_signal = TRUE;
+	got_signal = FALSE;
+	got_info = FALSE;
+
+	signal(SIGINT, sig_handler);
+	signal(SIGINFO, info_handler);
+
+	/* Attach to any processes for which PIDs were given. */
+	for (proc = proc_next(NULL); proc != NULL; proc = proc_next(proc)) {
+		if (attach(proc->pid) != 0) {
+			/*
+			 * Detach from the processes that we have attached to
+			 * so far, i.e. the ones with the TF_ATTACH flag.
+			 */
+			detach_stopped();
+
+			return EXIT_FAILURE;
+		}
+
+		proc->trace_flags = TF_ATTACH | TF_NOCALL;
+	}
+
+	/* If a command is given, start a child that executes the command. */
+	if (argc >= 1) {
+		pid = fork();
+
+		switch (pid) {
+		case -1:
+			warn("Unable to fork");
+
+			detach_stopped();
+
+			return EXIT_FAILURE;
+
+		case 0:
+			(void)ptrace(T_OK, 0, 0, 0);
+
+			(void)execvp(argv[0], argv);
+
+			err(EXIT_FAILURE, "Unable to start %s", argv[0]);
+
+		default:
+			break;
+		}
+
+		/*
+		 * The first signal will now be SIGTRAP from the execvp(),
+		 * unless that fails, in which case the child will terminate.
+		 */
+		if (wait_sig(pid, SIGTRAP) != 0) {
+			/*
+			 * If the child exited, the most likely cause is a
+			 * failure to execute the command.  Let the child
+			 * report the error, and do not say anything here.
+			 */
+			if (errno != 0)
+				warn("Unable to start process");
+
+			detach_stopped();
+
+			return EXIT_FAILURE;
+		}
+
+		/* If we haven't already, perform the kernel magic check. */
+		if (proc_count() == 0 && kernel_check(pid) == FALSE) {
+			warnx("Kernel magic check failed, recompile trace(1)");
+
+			(void)kill(pid, SIGKILL);
+
+			detach_stopped();
+
+			return EXIT_FAILURE;
+		}
+
+		if ((proc = proc_add(pid)) == NULL) {
+			warn(NULL);
+
+			(void)kill(pid, SIGKILL);
+
+			detach_stopped();
+
+			return EXIT_FAILURE;
+		}
+
+		proc->trace_flags = 0;
+	} else
+		pid = -1;
+
+	/* The user will have to give us at least one process to trace. */
+	if (proc_count() == 0)
+		usage();
+
+	/*
+	 * Open an alternative output file if needed.  After that, standard
+	 * error should no longer be used directly, and all output has to go
+	 * through the output module.
+	 */
+	if (output_init(output_file) < 0) {
+		warn("Unable to open output file");
+
+		if (pid > 0)
+			(void)kill(pid, SIGKILL);
+
+		detach_stopped();
+
+		return EXIT_FAILURE;
+	}
+
+	/*
+	 * All the traced processes are currently stopped.  Initialize, report,
+	 * and resume them.
+	 */
+	for (proc = proc_next(NULL); proc != NULL; proc = proc_next(proc)) {
+		new_proc(proc, follow_fork);
+
+		(void)ptrace(T_SYSCALL, proc->pid, 0, 0);
+	}
+
+	/*
+	 * Handle events until there are no traced processes left.
+	 */
+	last_pid = 0;
+	error = FALSE;
+
+	for (;;) {
+		/* If an output error occurred, exit as soon as possible. */
+		if (!error && output_error()) {
+			detach_running(TRUE /*force*/);
+
+			error = TRUE;
+		}
+
+		/*
+		 * If the user pressed ^C once, start detaching the processes
+		 * that we did not start, if any.  If the user pressed ^C
+		 * twice, kill the process that we did start, if any.
+		 */
+		if (got_signal) {
+			detach_running(!first_signal);
+
+			got_signal = FALSE;
+			first_signal = FALSE;
+		}
+
+		/* Upon getting SIGINFO, print a list of traced processes. */
+		if (got_info) {
+			list_info();
+
+			got_info = FALSE;
+		}
+
+		/*
+		 * Block until something happens to a traced process.  If
+		 * enabled from the command line, first try waiting for the
+		 * last process for which we got results, so as to reduce call
+		 * suspensions a bit.
+		 */
+		if (grouping && last_pid > 0 &&
+		    waitpid(last_pid, &status, WNOHANG) > 0)
+			pid = last_pid;
+		else
+		    if ((pid = waitpid(-1, &status, 0)) <= 0) {
+			if (pid == -1 && errno == EINTR) continue;
+			if (pid == -1 && errno == ECHILD) break; /* all done */
+
+			put_fmt(NULL, "Unexpected waitpid failure: %s",
+			    (pid == 0) ? "No result" : strerror(errno));
+			put_newline();
+
+			/*
+			 * We need waitpid to function correctly in order to
+			 * detach from any attached processes, so we can do
+			 * little more than just exit, effectively killing all
+			 * traced processes.
+			 */
+			return EXIT_FAILURE;
+		}
+
+		last_pid = 0;
+
+		/* Get the trace data structure for the process. */
+		if ((proc = proc_get(pid)) == NULL) {
+			/*
+			 * The waitpid() call returned the status of a process
+			 * that we have not yet seen.  This must be a newly
+			 * forked child.  If it is not stopped, it must have
+			 * died immediately, and we choose not to report it.
+			 */
+			if (!WIFSTOPPED(status))
+				continue;
+
+			if ((proc = proc_add(pid)) == NULL) {
+				put_fmt(NULL,
+				    "Error attaching to new child %d: %s",
+				    pid, strerror(errno));
+				put_newline();
+
+				/*
+				 * Out of memory allocating a new child object!
+				 * We can not trace this child, so just let it
+				 * run free by detaching from it.
+				 */
+				if (WSTOPSIG(status) != SIGSTOP) {
+					(void)ptrace(T_RESUME, pid, 0,
+					    WSTOPSIG(status));
+
+					if (wait_sig(pid, SIGSTOP) != 0)
+						continue; /* it died.. */
+				}
+
+				(void)ptrace(T_DETACH, pid, 0, 0);
+
+				continue;
+			}
+
+			/*
+			 * We must specify TF_ATTACH here, even though it may
+			 * be a child of a process we started, in which case it
+			 * should be killed when we exit.  We do not keep track
+			 * of ancestry though, so better safe than sorry.
+			 */
+			proc->trace_flags = TF_ATTACH | TF_STOPPING;
+
+			new_proc(proc, follow_fork);
+
+			/* Repeat entering the fork call for the child. */
+			handle_call(proc, show_stack);
+		}
+
+		/* If the process died, report its status and clean it up. */
+		if (!WIFSTOPPED(status)) {
+			discard_proc(proc, status);
+
+			continue;
+		}
+
+		sig = WSTOPSIG(status);
+
+		if (sig == SIGSTOP && (proc->trace_flags & TF_STOPPING)) {
+			/* We expected the process to be stopped; now it is. */
+			proc->trace_flags &= ~TF_STOPPING;
+
+			if (proc->trace_flags & TF_DETACH) {
+				if (ptrace(T_DETACH, proc->pid, 0, 0) == 0)
+					discard_proc(proc, status);
+
+				/*
+				 * If detaching failed, the process must have
+				 * died, and we'll get notified through wait().
+				 */
+				continue;
+			}
+
+			sig = 0;
+		} else if (sig == SIGSTOP && (proc->trace_flags & TF_EXEC)) {
+			/* The process has performed a successful execve(). */
+			call_leave(proc, TRUE /*skip*/);
+
+			put_text(proc, "---");
+
+			new_exec(proc);
+
+			/*
+			 * A successful execve() has no result, in the sense
+			 * that there is no reply message.  We should therefore
+			 * not even try to copy in the reply message from the
+			 * original location, because it will be invalid.
+			 * Thus, we skip the exec's call leave phase entirely.
+			 */
+			proc->trace_flags &= ~TF_EXEC;
+			proc->trace_flags |= TF_SKIP;
+
+			sig = 0;
+		} else if (sig == SIGTRAP) {
+			/* The process is entering or leaving a system call. */
+			if (!(proc->trace_flags & TF_DETACH))
+				handle_call(proc, show_stack);
+
+			sig = 0;
+		} else {
+			/* The process has received a signal. */
+			report_signal(proc, sig, show_stack);
+
+			/*
+			 * Only in this case do we pass the signal to the
+			 * traced process.
+			 */
+		}
+
+		/*
+		 * Resume process execution.  If this call fails, the process
+		 * has probably died.  We will find out soon enough.
+		 */
+		(void)ptrace(T_SYSCALL, proc->pid, 0, sig);
+
+		last_pid = proc->pid;
+	}
+
+	return (error) ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/minix/usr.bin/trace/type.h b/minix/usr.bin/trace/type.h
new file mode 100644
index 000000000..e126a8d74
--- /dev/null
+++ b/minix/usr.bin/trace/type.h
@@ -0,0 +1,32 @@
+
+#define COUNT(s) (sizeof(s) / sizeof(s[0]))
+
+struct call_handler {
+	const char *name;
+	const char *(*namefunc)(const message *m_out);
+	int (*outfunc)(struct trace_proc *proc, const message *m_out);
+	void (*infunc)(struct trace_proc *proc, const message *m_out,
+	    const message *m_in, int failed);
+};
+#define HANDLER(n,o,i) { .name = n, .outfunc = o, .infunc = i }
+#define HANDLER_NAME(n,o,i) { .namefunc = n, .outfunc = o, .infunc = i }
+
+struct calls {
+	endpoint_t endpt;
+	unsigned int base;
+	const struct call_handler *map;
+	unsigned int count;
+};
+
+struct flags {
+	unsigned int mask;
+	unsigned int value;
+	const char *name;
+};
+#define FLAG(f) { f, f, #f }
+#define FLAG_MASK(m,f) { m, f, #f }
+#define FLAG_ZERO(f) { ~0, f, #f }
+
+/* not great, but it prevents a massive potential for typos.. */
+#define NAME(r) case r: return #r
+#define TEXT(v) case v: text = #v; break