From 521fa314e2aaec3c192c15f2aaa4c677a544e62a Mon Sep 17 00:00:00 2001 From: David van Moolenbroek Date: Tue, 4 Nov 2014 21:33:04 +0000 Subject: [PATCH] Add trace(1): the MINIX3 system call tracer Change-Id: Ib970c8647409196902ed53d6e9631a1673a4ab2e --- distrib/sets/lists/minix/mi | 2 + minix/usr.bin/Makefile | 1 + minix/usr.bin/trace/Makefile | 21 + minix/usr.bin/trace/NOTES | 255 +++++ minix/usr.bin/trace/call.c | 686 +++++++++++++ minix/usr.bin/trace/error.awk | 28 + minix/usr.bin/trace/escape.c | 48 + minix/usr.bin/trace/format.c | 426 ++++++++ minix/usr.bin/trace/inc.h | 22 + minix/usr.bin/trace/ioctl.c | 226 +++++ minix/usr.bin/trace/ioctl/block.c | 229 +++++ minix/usr.bin/trace/ioctl/char.c | 509 ++++++++++ minix/usr.bin/trace/ioctl/net.c | 565 +++++++++++ minix/usr.bin/trace/ioctl/svrctl.c | 63 ++ minix/usr.bin/trace/kernel.c | 307 ++++++ minix/usr.bin/trace/mem.c | 61 ++ minix/usr.bin/trace/output.c | 516 ++++++++++ minix/usr.bin/trace/proc.c | 97 ++ minix/usr.bin/trace/proc.h | 99 ++ minix/usr.bin/trace/proto.h | 130 +++ minix/usr.bin/trace/service/ipc.c | 445 +++++++++ minix/usr.bin/trace/service/pm.c | 1396 ++++++++++++++++++++++++++ minix/usr.bin/trace/service/rs.c | 140 +++ minix/usr.bin/trace/service/vfs.c | 1457 ++++++++++++++++++++++++++++ minix/usr.bin/trace/service/vm.c | 135 +++ minix/usr.bin/trace/signal.awk | 32 + minix/usr.bin/trace/trace.1 | 334 +++++++ minix/usr.bin/trace/trace.c | 817 ++++++++++++++++ minix/usr.bin/trace/type.h | 32 + 29 files changed, 9079 insertions(+) create mode 100644 minix/usr.bin/trace/Makefile create mode 100644 minix/usr.bin/trace/NOTES create mode 100644 minix/usr.bin/trace/call.c create mode 100644 minix/usr.bin/trace/error.awk create mode 100644 minix/usr.bin/trace/escape.c create mode 100644 minix/usr.bin/trace/format.c create mode 100644 minix/usr.bin/trace/inc.h create mode 100644 minix/usr.bin/trace/ioctl.c create mode 100644 minix/usr.bin/trace/ioctl/block.c create mode 100644 minix/usr.bin/trace/ioctl/char.c create mode 100644 minix/usr.bin/trace/ioctl/net.c create mode 100644 minix/usr.bin/trace/ioctl/svrctl.c create mode 100644 minix/usr.bin/trace/kernel.c create mode 100644 minix/usr.bin/trace/mem.c create mode 100644 minix/usr.bin/trace/output.c create mode 100644 minix/usr.bin/trace/proc.c create mode 100644 minix/usr.bin/trace/proc.h create mode 100644 minix/usr.bin/trace/proto.h create mode 100644 minix/usr.bin/trace/service/ipc.c create mode 100644 minix/usr.bin/trace/service/pm.c create mode 100644 minix/usr.bin/trace/service/rs.c create mode 100644 minix/usr.bin/trace/service/vfs.c create mode 100644 minix/usr.bin/trace/service/vm.c create mode 100644 minix/usr.bin/trace/signal.awk create mode 100644 minix/usr.bin/trace/trace.1 create mode 100644 minix/usr.bin/trace/trace.c create mode 100644 minix/usr.bin/trace/type.h diff --git a/distrib/sets/lists/minix/mi b/distrib/sets/lists/minix/mi index 792effd3f..39570edcb 100644 --- a/distrib/sets/lists/minix/mi +++ b/distrib/sets/lists/minix/mi @@ -522,6 +522,7 @@ ./usr/bin/touch minix-sys ./usr/bin/tput minix-sys ./usr/bin/tr minix-sys +./usr/bin/trace minix-sys ./usr/bin/true minix-sys ./usr/bin/truncate minix-sys ./usr/bin/tsort minix-sys @@ -2560,6 +2561,7 @@ ./usr/man/man1/touch.1 minix-sys ./usr/man/man1/tput.1 minix-sys ./usr/man/man1/tr.1 minix-sys +./usr/man/man1/trace.1 minix-sys ./usr/man/man1/trap.1 minix-sys obsolete ./usr/man/man1/true.1 minix-sys ./usr/man/man1/truncate.1 minix-sys diff --git a/minix/usr.bin/Makefile b/minix/usr.bin/Makefile index 60d19bf33..660309d14 100644 --- a/minix/usr.bin/Makefile +++ b/minix/usr.bin/Makefile @@ -8,5 +8,6 @@ SUBDIR+= grep SUBDIR+= ministat SUBDIR+= top SUBDIR+= toproto +SUBDIR+= trace .include diff --git a/minix/usr.bin/trace/Makefile b/minix/usr.bin/trace/Makefile new file mode 100644 index 000000000..af2d63b86 --- /dev/null +++ b/minix/usr.bin/trace/Makefile @@ -0,0 +1,21 @@ +.include + +PROG= trace +SRCS= call.o error.o escape.o format.o ioctl.o kernel.o mem.o output.o \ + proc.o signal.o trace.o +.PATH: ${.CURDIR}/service +SRCS+= pm.o vfs.o rs.o vm.o ipc.o +.PATH: ${.CURDIR}/ioctl +SRCS+= block.o char.o net.o svrctl.o + +CPPFLAGS+= -D_MINIX_SYSTEM=1 -I${.CURDIR} -I${NETBSDSRCDIR}/minix + +error.c: error.awk ${NETBSDSRCDIR}/sys/sys/errno.h + ${TOOL_AWK} -f ${.ALLSRC} > ${.TARGET} + +signal.c: signal.awk ${NETBSDSRCDIR}/sys/sys/signal.h + ${TOOL_AWK} -f ${.ALLSRC} > ${.TARGET} + +CLEANFILES+= error.c signal.c + +.include diff --git a/minix/usr.bin/trace/NOTES b/minix/usr.bin/trace/NOTES new file mode 100644 index 000000000..91d8cb761 --- /dev/null +++ b/minix/usr.bin/trace/NOTES @@ -0,0 +1,255 @@ +Developer notes regarding trace(1), by David van Moolenbroek. + + +OVERALL CODE STRUCTURE + +The general tracing engine is in trace.c. It passes IPC-level system call +enter and leave events off to call.c, which handles IPC-level system call +printing and passes off system calls to be interpreted by a service-specific +system call handler whenever possible. All the service-specific code is in the +service/ subdirectory, grouped by destination service. IOCTLs are a special +case, which are handled in ioctl.c and passed on to driver-type-grouped IOCTL +handlers in the ioctl/ subdirectory (this grouping is not strict). Some of the +generated output goes through the formatting code in format.c, and all of it +ends up in output.c. The remaining source files contain support code. + + +ADDING A SYSTEM CALL HANDLER + +In principle, every system call stops the traced process twice: once when the +system call is started (the call-enter event) and once when the system call +returns (the call-leave event). The tracer uses the call-enter event to print +the request being made, and the call-leave event to print the result of the +call. The output format is supposed to mimic largely what the system call +looks like from a C program, although with additional information where that +makes sense. The general output format for system calls is: + + name(parameters) = result + +..where "name" is the name of the system call, "parameters" is a list of system +call parameters, and "result" is the result of the system call. If possible, +the part up to and including the equals sign is printed from the call-enter +event, and the result is printed from the call-leave event. However, many +system calls actually pass a pointer to a block of memory that is filled with +meaningful content as part of the system call. For that reason, it is also +possible that the call-enter event stops printing somewhere inside the +parameters block, and the call-leave event prints the rest of the parameters, +as well as the equals sign and the result after it. The place in the printed +system call where the call-enter printer stops and the call-leave printer is +supposed to pick up again, is referred to as the "call split". + +The tracer has to a handler structure for every system call that can be made by +a user program to any of the the MINIX3 services. This handler structure +provides three elements: the name of the system call, an "out" function that +handles printing of the call-enter part of the system call, and an "in" +function that handles printing of the call-leave part of the system call. The +"out" function is expected to print zero or more call parameters, and then +return a call type, which indicates whether all parameters have been printed +yet, or not. In fact, there are three call types, shown here with an example +which has a "|" pipe symbol added to indicate the call split: + + CT_DONE: write(5, "foo", 3) = |3 + CT_NOTDONE: read(5, |"foo", 1024) = 3 + CT_NORETURN: execve("foo", ["foo"], []")| = -1 [ENOENT] + +The CT_DONE call type indicates that the handler is done printing all the +parameters during the call-enter event, and the call split will be after the +equals sign. The CT_NOTDONE call type indicates that the handler is not done +printing all parameters yet, thus yielding a call split in the middle of the +parameters block (or even right after the opening parenthesis). The no-return +(CT_NORETURN) call type is used for a small number of functions that do not +return on success. Currently, these are the exit(), execve(), and sigreturn() +system calls. For these calls, no result will be printed at all, unless such +a call fails, in which case a failure result is printed after all. The call +split is such that the entire parameters block is printed upon entering the +call, but the equals sign and result are printed only if the call does return. + +Now more about the handler structure for the system call. First of all, each +system call has a name, which must be a static string. It may be supplied +either as a string, or as a function that returns a name string. The latter is +for cases where one message-level system call is used to implement multiple +C-level system calls (such as setitimer() and getitimer() both going through +PM_ITIMER). The name function has the following prototype: + + const char *svc_syscall_name(const message *m_out); + +..where "m_out" is a local copy of the request message, which the name function +can use to decide what string to return for the system call. As a sidenote, +in the future, the system call name will be used to implement call filtering. + +An "out" printer function has the following prototype: + + int svc_syscall_out(struct trace_proc *proc, const message *m_out); + +Here, "proc" is a pointer to the process structure containing information about +the process making the system call; proc->pid returns the process PID, but the +function should not access any other fields of this structure directly. +Instead, many of the output primitive and helper functions (which are all +prefixed with "put_") take this pointer as part of the call. "m_out" is a +local copy of the request message, and the printer may access its fields as it +sees fit. + +The printer function should simply print parameters. The call name and the +opening parenthesis are printed by the main output routine. + +All simple call parameters should be printed using the put_field() and +put_value() functions. The former prints a parameter or field name as flat +text; the latter is a printf-like interface to the former. By default, call +paramaters are simply printed as "value", but if printing all names is enabled, +call parameters are printed as "name=value". Thus, all parameters should be +given a name, even if this name does not show up by default. Either way, these +two functions take care of deciding whether to print the name, as well as of +printing separators between the parameters. More about printing more complex +parameters (such as structures) in a bit. + +The out printer function must return one of the three CT_ call type values. If +it returns CT_DONE, the main output routine will immediately print the closing +parenthesis and equals sign. If it returns CF_NORETURN, a closing parenthesis +will be printed. If it return CF_NOTDONE, only a parameter field separator +(that is, a comma and a space) will be printed--after all, it can be assumed +that more parameters will be printed later. + +An "in" printer function has the following prototype: + + void svc_syscall_in(struct trace_proc *proc, const message *m_out, + const message *m_in, int failed); + +Again, "proc" is the traced process of which its current system call has now +returned. "m_out" is again the request message, guaranteed to be unchanged +since the "out" call. "m_in" is the reply message from the service. "failed" +is either 0 to indicate that the call appears to have succeeded, or PF_FAILED +to indicate that the call definitely failed. If PF_FAILED is set, the call +has failed either at the IPC level or at the system call level (or for another, +less common reason). In that case, the contents of "m_in" may be garbage and +"m_in" must not be used at all. + +For CF_NOTDONE type calls, the in printer function should first print the +remaining parameters. Here especially, it is important to consider that the +entire call may fail. In that case, the parameters of which the contents were +still going to be printed may also contain garbage, since they were never +filled. The expected behavior is to print such parameters as pointer or "&.." +or something else to indicate that their actual contents are not valid. + +Either way, once a CF_NOTDONE type call function is done printing the remaining +parameters, it must call put_equals(proc) to print the closing parenthesis of +the call and the equals sign. CF_NORETURN calls must also use put_equals(proc) +to print the equals sign. + +Then comes the result part. If the call failed, the in printer function *must* +use put_result(proc) to print the failure result. This call not only takes +care of converting negative error codes from m_in->m_type into "-1 [ECODE]" but +also prints appropriate failure codes for IPC-level and other exceptional +failures. Only if the system call did not fail, may the in printer function +choose to not call put_result(proc), which on success simply prints +m_in->m_type as an integer. Similarly, if the system call succeeded, the in +printer function may print extended results after the primary result, generally +in parentheses. For example, getpid() and getppid() share the same system call +and thus the tracer prints both return values, one as the primary result of the +actual call and one in parentheses with a clarifying name as extended result: + + getpid() = 3 (ppid=1) + +It should now be clear that printing extended results makes no sense if the +system call failed. + +Besidse put_equals and put_result, the following more or less generic support +functions are available to print the various parts of the requests and replies. + + put_field - output a parameter, structure field, and so on; this function + should be used for just about every actual value + put_value - printf-like version of put_field + put_text - output plain text; for call handlers, this should be used only to + to add things right after a put_field call, never on its own + put_fmt - printf-like version of put_text, should generally not be used + from call handlers at all + put_open - open a nested block of fields, surrounded by parentheses, + brackets, or something like that; this is used for structures, + arrays, and any other similar nontrivial case of nesting + put_close - close a previously opened block of fields; the nesting depth is + actually tracked (to keep per-level separators etc), so each + put_open call must have a corresponding put_close call + put_open_struct - perform several tasks necessary to start printing the + fields of a structure; note that this function may fail! + put_close_struct - end successful printing of a structure + put_ptr - print a pointer in the traced process + put_buf - print a buffer or string + put_flags - print a bitwise flags field + put_tail - helper function for printing the continuation part of an array + +Many of these support functions take a flags field which takes PF_-prefixed +flags to modify the output they generate. The value of 'failed' in the in +printer function may actually be passed (bitwise-OR'ed in) as the PF_FAILED +flag to these support functions, and they will do the right thing. For +example, a call to put_open_struct with the PF_FAILED flag will end up simply +printing the pointer to the structure, and not allow printing of the contents +of the structure. + +The above support functions are documented (at a basic level) within the code, +but in many cases, it may be useful to look up how they are used in practice by +the existing handlers. The same goes for various less clear cases; while there +is basic support for printing structures, support for printing arrays must be +coded fully by hand, as has been done for many places. A serious attempt has +been made to make the output consistent across the board (mainly thanks to the +output format of strace, on which the output of this tracer has been based, +sometimes very strictly and sometimes more loosely, but that aside) so it is +always advisable to follow the ways of the existing handlers. Also keep in +mind that there are already printer functions for several generic structures, +and these should be used whenever possible (e.g., see the put_fd() comment). + +Finally, the default_out and default_in functions may be used as printer +functions for call with no parameters, and for functions which need no more +than put_result() to print their system call result, respectively. + + +INTERNALS: MULTIPROCESS OUTPUT AND PREEMPTION + +Things get interesting when multiple processes are traced at once. Due to the +nature of process scheduling, system calls may end up being preempted between +the call-enter and call-leave phases. This means that the output of a system +call has to be suspended to give way to an event from another traced process. +Such preemption may occur with literally all calls; not just "blocking" calls. + +The tracer goes through some lengths to aid the user in following the output in +the light of preemtion. The most important aspect is that the output of the +call-enter phase is recorded, so that in the case of preemption, the call-leave +phase can start by replaying the record. As a result, the user gets to see the +whole system call on a single line, instead of just the second half. Such +system call resumptions are marked with a "*" in their prefix, to show that +the call was not just entered. The output therefore looks like this: + + 2| syscall() = <..> + 3| othercall() = 0 + 2|*syscall() = 0 + +Signals that arrive during a call will cause a resumption of the call as well. +As a result, a call may be resumed multiple times: + + 2| syscall() = <..> + 3| othercall() = 0 + 2|*syscall() = ** SIGUSR1 ** ** SIGUSR2 ** <..> + 3| othercall() = -1 [EBUSY] + 2|*syscall() = ** SIGHUP ** <..> + 3| othercall() = 0 + 2|*syscall() = 0 + +This entire scenario shows one single system call from process 2. + +In the current implementation, the output that should be recorded and/or cause +the "<..>" preemption marker, as well as the cases where the recorded text must +be replayed, are marked by the code explicitly. Replay takes place in three +cases: upon the call-leave event (obviously), upon receiving a signal (as shown +above), and when it is required that a suspended no-return call is shown as +completed before continuing with other output. The last case applies to exit() +and execve(), and both are documented in the code quite extensively. Generally +speaking, in all output lines where no recording or replay actions are +performed, the recording will not be replayed but also not removed. This +allows for intermediate lines for that process in the output. Practically +speaking, future support for job control could even print when a process get +stopped and continued, for that process, while preempting the output for the +ongoing system call for that same process. + +It is possible that the output of the call-enter phase exhausts the recording +buffer for its process. In this case, a new, shorter text is generated upon +process resumption. There are many other aspects to proper output formatting +in the light of preemption, but most of them should be documented as part of +the code reasonably well. diff --git a/minix/usr.bin/trace/call.c b/minix/usr.bin/trace/call.c new file mode 100644 index 000000000..441432992 --- /dev/null +++ b/minix/usr.bin/trace/call.c @@ -0,0 +1,686 @@ + +#include "inc.h" + +#include +#include +#include + +static const struct calls *call_table[] = { + &pm_calls, + &vfs_calls, + &rs_calls, + &vm_calls, + &ipc_calls, +}; + +/* + * Find a call handler for the given endpoint, call number pair. Return NULL + * if no call handler for this call exists. + */ +static const struct call_handler * +find_handler(endpoint_t endpt, int call_nr) +{ + int i, index; + + for (i = 0; i < COUNT(call_table); i++) { + if (call_table[i]->endpt != ANY && + call_table[i]->endpt != endpt) + continue; + + if (call_nr < call_table[i]->base) + continue; + + index = call_nr - call_table[i]->base; + + if (index >= call_table[i]->count) + continue; + + if (call_table[i]->map[index].outfunc == NULL) + continue; + + return &call_table[i]->map[index]; + } + + return NULL; +} + +/* + * Print an endpoint. + */ +void +put_endpoint(struct trace_proc * proc, const char * name, endpoint_t endpt) +{ + const char *text = NULL; + + if (!valuesonly) { + switch (endpt) { + TEXT(ASYNCM); + TEXT(IDLE); + TEXT(CLOCK); + TEXT(SYSTEM); + TEXT(KERNEL); + TEXT(PM_PROC_NR); + TEXT(VFS_PROC_NR); + TEXT(RS_PROC_NR); + TEXT(MEM_PROC_NR); + TEXT(SCHED_PROC_NR); + TEXT(TTY_PROC_NR); + TEXT(DS_PROC_NR); + TEXT(VM_PROC_NR); + TEXT(PFS_PROC_NR); + TEXT(ANY); + TEXT(NONE); + TEXT(SELF); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%d", endpt); +} + +/* + * Print a message structure. The source field will be printed only if the + * PF_ALT flag is given. + */ +static void +put_message(struct trace_proc * proc, const char * name, int flags, + vir_bytes addr) +{ + message m; + + if (!put_open_struct(proc, name, flags, addr, &m, sizeof(m))) + return; + + if (flags & PF_ALT) + put_endpoint(proc, "m_source", m.m_source); + + put_value(proc, "m_type", "%x", m.m_type); + + put_close_struct(proc, FALSE /*all*/); +} + +/* + * Print the call's equals sign, which also implies that the parameters part of + * the call has been fully printed and the corresponding closing parenthesis + * may have to be printed, if it has not been printed already. + */ +void +put_equals(struct trace_proc * proc) +{ + + /* + * Do not allow multiple equals signs on a single line. This check is + * protection against badly written handlers. It does not work for the + * no-return type, but such calls are rare and less error prone anyway. + */ + assert((proc->call_flags & (CF_DONE | CF_NORETURN)) != CF_DONE); + + /* + * We allow (and in fact force) handlers to call put_equals in order to + * indicate that the call's parameters block has ended, so we must end + * the block here, if we hadn't done so before. + */ + if (!(proc->call_flags & CF_DONE)) { + put_close(proc, ") "); + + proc->call_flags |= CF_DONE; + } + + put_align(proc); + put_text(proc, "= "); + + format_set_sep(proc, NULL); +} + +/* + * Print the primary result of a call, after the equals sign. It is always + * possible that this is an IPC-level or other low-level error, in which case + * this takes precedence, which is why this function must be called to print + * the result if the call failed in any way at all; it may or may not be used + * if the call succeeded. For regular call results, default MINIX3/POSIX + * semantics are used: if the return value is negative, the actual call failed + * with -1 and the negative return value is the call's error code. The caller + * may consider other cases a failure (e.g., waitpid() returning 0), but + * negative return values *not* signifying an error are currently not supported + * since they are not present in MINIX3. + */ +void +put_result(struct trace_proc * proc) +{ + const char *errname; + int value; + + /* This call should always be preceded by a put_equals call. */ + assert(proc->call_flags & CF_DONE); + + /* + * If we failed to copy in the result register or message, print a + * basic error and nothing else. + */ + if (proc->call_flags & (CF_REG_ERR | CF_MSG_ERR)) { + put_text(proc, ""); + + return; + } + + /* + * If we are printing a system call rather than an IPC call, and an + * error occurred at the IPC level, prefix the output with "" to + * indicate the IPC failure. If we are printing an IPC call, an IPC- + * level result is implied, so we do not print this. + */ + if (proc->call_handler != NULL && (proc->call_flags & CF_IPC_ERR)) + put_text(proc, " "); + + value = proc->call_result; + + if (value >= 0) + put_fmt(proc, "%d", value); + else if (!valuesonly && (errname = get_error_name(-value)) != NULL) + put_fmt(proc, "-1 [%s]", errname); + else + put_fmt(proc, "-1 [%d]", -value); + + format_set_sep(proc, " "); +} + +/* + * The default enter-call (out) printer, which prints no parameters and is thus + * immediately done with printing parameters. + */ +int +default_out(struct trace_proc * __unused proc, const message * __unused m_out) +{ + + return CT_DONE; +} + +/* + * The default leave-call (in) printer, which simply prints the call result, + * possibly preceded by an equals sign if none was printed yet. For obvious + * reasons, if the handler's out printer returned CT_NOTDONE, this default + * printer must not be used. + */ +void +default_in(struct trace_proc * proc, const message * __unused m_out, + const message * __unused m_in, int __unused failed) +{ + + if ((proc->call_flags & (CF_DONE | CF_NORETURN)) != CF_DONE) + put_equals(proc); + put_result(proc); +} + +/* + * Prepare a sendrec call, by copying in the request message, determining + * whether it is one of the calls that the tracing engine should know about, + * searching for a handler for the call, and returning a name for the call. + */ +static const char * +sendrec_prepare(struct trace_proc * proc, endpoint_t endpt, vir_bytes addr, + int * trace_class) +{ + const char *name; + int r; + + r = mem_get_data(proc->pid, addr, &proc->m_out, sizeof(proc->m_out)); + + if (r == 0) { + if (endpt == PM_PROC_NR) { + if (proc->m_out.m_type == PM_EXEC) + *trace_class = TC_EXEC; + else if (proc->m_out.m_type == PM_SIGRETURN) + *trace_class = TC_SIGRET; + } + + proc->call_handler = find_handler(endpt, proc->m_out.m_type); + } else + proc->call_handler = NULL; + + if (proc->call_handler != NULL) { + if (proc->call_handler->namefunc != NULL) + name = proc->call_handler->namefunc(&proc->m_out); + else + name = proc->call_handler->name; + + assert(name != NULL); + } else + name = "ipc_sendrec"; + + return name; +} + +/* + * Print the outgoing (request) part of a sendrec call. If we found a call + * handler for the call, let the handler generate output. Otherwise, print the + * sendrec call at the kernel IPC level. Return the resulting call flags. + */ +static unsigned int +sendrec_out(struct trace_proc * proc, endpoint_t endpt, vir_bytes addr) +{ + + if (proc->call_handler != NULL) { + return proc->call_handler->outfunc(proc, &proc->m_out); + } else { + put_endpoint(proc, "src_dest", endpt); + /* + * We have already copied in the message, but if we used m_out + * and PF_LOCADDR here, a copy failure would cause "&.." to be + * printed rather than the actual message address. + */ + put_message(proc, "m_ptr", 0, addr); + + return CT_DONE; + } +} + +/* + * Print the incoming (reply) part of a sendrec call. Copy in the reply + * message, determine whether the call is considered to have failed, and let + * the call handler do the rest. If no call handler was found, print an + * IPC-level result. + */ +static void +sendrec_in(struct trace_proc * proc, int failed) +{ + message m_in; + + if (failed) { + /* The call failed at the IPC level. */ + memset(&m_in, 0, sizeof(m_in)); /* not supposed to be used */ + assert(proc->call_flags & CF_IPC_ERR); + } else if (mem_get_data(proc->pid, proc->m_addr, &m_in, + sizeof(m_in)) != 0) { + /* The reply message is somehow unavailable to us. */ + memset(&m_in, 0, sizeof(m_in)); /* not supposed to be used */ + proc->call_result = EGENERIC; /* not supposed to be used */ + proc->call_flags |= CF_MSG_ERR; + failed = PF_FAILED; + } else { + /* The result is for the actual call. */ + proc->call_result = m_in.m_type; + failed = (proc->call_result < 0) ? PF_FAILED : 0; + } + + if (proc->call_handler != NULL) + proc->call_handler->infunc(proc, &proc->m_out, &m_in, failed); + else + put_result(proc); +} + +/* + * Perform preparations for printing a system call. Return two things: the + * name to use for the call, and the trace class of the call. + * special treatment). + */ +static const char * +call_prepare(struct trace_proc * proc, reg_t reg[3], int * trace_class) +{ + + switch (proc->call_type) { + case SENDREC: + return sendrec_prepare(proc, (endpoint_t)reg[1], + (vir_bytes)reg[2], trace_class); + + case SEND: + return "ipc_send"; + + case SENDNB: + return "ipc_sendnb"; + + case RECEIVE: + return "ipc_receive"; + + case NOTIFY: + return "ipc_notify"; + + case SENDA: + return "ipc_senda"; + + case MINIX_KERNINFO: + return "minix_kerninfo"; + + default: + /* + * It would be nice to include the call number here, but we + * must return a string that will last until the entire call is + * finished. Adding another buffer to the trace_proc structure + * is an option, but it seems overkill.. + */ + return "ipc_unknown"; + } +} + +/* + * Print the outgoing (request) part of a system call. Return the resulting + * call flags. + */ +static unsigned int +call_out(struct trace_proc * proc, reg_t reg[3]) +{ + + switch (proc->call_type) { + case SENDREC: + proc->m_addr = (vir_bytes)reg[2]; + + return sendrec_out(proc, (endpoint_t)reg[1], + (vir_bytes)reg[2]); + + case SEND: + case SENDNB: + put_endpoint(proc, "dest", (endpoint_t)reg[1]); + put_message(proc, "m_ptr", 0, (vir_bytes)reg[2]); + + return CT_DONE; + + case RECEIVE: + proc->m_addr = (vir_bytes)reg[2]; + + put_endpoint(proc, "src", (endpoint_t)reg[1]); + + return CT_NOTDONE; + + case NOTIFY: + put_endpoint(proc, "dest", (endpoint_t)reg[1]); + + return CT_DONE; + + case SENDA: + put_ptr(proc, "table", (vir_bytes)reg[2]); + put_value(proc, "count", "%zu", (size_t)reg[1]); + + return CT_DONE; + + case MINIX_KERNINFO: + default: + return CT_DONE; + } +} + +/* + * Print the incoming (reply) part of a call. + */ +static void +call_in(struct trace_proc * proc, int failed) +{ + + switch (proc->call_type) { + case SENDREC: + sendrec_in(proc, failed); + + break; + + case RECEIVE: + /* Print the source as well. */ + put_message(proc, "m_ptr", failed | PF_ALT, proc->m_addr); + put_equals(proc); + put_result(proc); + + break; + + case MINIX_KERNINFO: + /* + * We do not have a platform-independent means to access the + * secondary IPC return value, so we cannot print the receive + * status or minix_kerninfo address. + */ + /* FALLTHROUGH */ + default: + put_result(proc); + + break; + } +} + +/* + * Determine whether to skip printing the given call, based on its name. + */ +static int +call_hide(const char * __unused name) +{ + + /* + * TODO: add support for such filtering, with an strace-like -e command + * line option. For now, we filter nothing, although calls may still + * be hidden as the result of a register retrieval error. + */ + return FALSE; +} + +/* + * The given process entered a system call. Return the trace class of the + * call: TC_EXEC for an execve() call, TC_SIGRET for a sigreturn() call, or + * TC_NORMAL for a call that requires no exceptions in the trace engine. + */ +int +call_enter(struct trace_proc * proc, int show_stack) +{ + const char *name; + reg_t reg[3]; + int trace_class, type; + + /* Get the IPC-level type and parameters of the system call. */ + if (kernel_get_syscall(proc->pid, reg) < 0) { + /* + * If obtaining the details of the system call failed, even + * though we know the process is stopped on a system call, we + * are going to assume that the process got killed somehow. + * Thus, the best we can do is ignore the system call entirely, + * and hope that the next thing we hear about this process is + * its termination. At worst, we ignore a serious error.. + */ + proc->call_flags = CF_HIDE; + + return FALSE; + } + + /* + * Obtain the call name that is to be used for this call, and decide + * whether we want to print this call at all. + */ + proc->call_type = (int)reg[0]; + trace_class = TC_NORMAL; + + name = call_prepare(proc, reg, &trace_class); + + proc->call_name = name; + + if (call_hide(name)) { + proc->call_flags = CF_HIDE; + + return trace_class; + } + + /* Only print a stack trace if we are printing the call itself. */ + if (show_stack) + kernel_put_stacktrace(proc); + + /* + * Start a new line, start recording, and print the call name and + * opening parenthesis. + */ + put_newline(); + + format_reset(proc); + + record_start(proc); + + put_text(proc, name); + put_open(proc, NULL, PF_NONAME, "(", ", "); + + /* + * Print the outgoing part of the call, that is, some or all of its + * parameters. This call returns flags indicating how far printing + * got, and may be one of the following combinations: + * - CT_NOTDONE (0) if printing parameters is not yet complete; after + * the call split, the in handler must print the rest itself; + * - CT_DONE (CF_DONE) if printing parameters is complete, and we + * should now print the closing parenthesis and equals sign; + * - CT_NORETURN (CF_DONE|CF_NORETURN) if printing parameters is + * complete, but we should not print the equals sign, because the + * call is expected not to return (the no-return call type). + */ + type = call_out(proc, reg); + assert(type == CT_NOTDONE || type == CT_DONE || type == CT_NORETURN); + + /* + * Print whatever the handler told us to print for now. + */ + if (type & CF_DONE) { + if (type & CF_NORETURN) { + put_close(proc, ")"); + + put_space(proc); + + proc->call_flags |= type; + } else { + /* + * The equals sign is printed implicitly for the + * CT_DONE type only. For CT_NORETURN and CT_NOTDONE, + * the "in" handler has to do it explicitly. + */ + put_equals(proc); + } + } else { + /* + * If at least one parameter was printed, print the separator + * now. We know that another parameter will follow (otherwise + * the caller would have returned CT_DONE), and this way the + * output looks better. + */ + format_push_sep(proc); + } + + /* + * We are now at the call split; further printing will be done once the + * call returns, through call_leave. Stop recording; if the call gets + * suspended and later resumed, we should replay everything up to here. + */ +#if DEBUG + put_text(proc, "|"); /* warning, this may push a space */ +#endif + + record_stop(proc); + + output_flush(); + + return trace_class; +} + +/* + * The given process left a system call, or if skip is set, the leave phase of + * the current system call should be ended. + */ +void +call_leave(struct trace_proc * proc, int skip) +{ + reg_t retreg; + int hide, failed; + + /* If the call is skipped, it must be a no-return type call. */ + assert(!skip || (proc->call_flags & (CF_NORETURN | CF_HIDE))); + + /* + * Start by replaying the current call, if necessary. If the call was + * suspended and we are about to print the "in" part, this is obviously + * needed. If the call is hidden, replaying will be a no-op, since + * nothing was recorded for this call. The special case is a skipped + * call (which, as established above, must be a no-return call, e.g. + * exec), for which replaying has the effect that if the call was + * previously suspended, it will now be replayed, without suspension: + * + * 2| execve("./test", ["./test"], [..(12)]) <..> + * 3| sigsuspend([]) = <..> + * [A] 2| execve("./test", ["./test"], [..(12)]) + * 2| --- + * 2| Tracing test (pid 2) + * + * The [A] line is the result of replaying the skipped call. + */ + call_replay(proc); + + hide = (proc->call_flags & CF_HIDE); + + if (!hide && !skip) { + /* Get the IPC-level result of the call. */ + if (kernel_get_retreg(proc->pid, &retreg) < 0) { + /* This should never happen. Deal with it anyway. */ + proc->call_flags |= CF_REG_ERR; + failed = PF_FAILED; + } else if ((proc->call_result = (int)retreg) < 0) { + proc->call_flags |= CF_IPC_ERR; + failed = PF_FAILED; + } else + failed = 0; + + /* + * Print the incoming part of the call, that is, possibly some + * or all of its parameters and the call's closing parenthesis + * (if CT_NOTDONE), and the equals sign (if not CT_DONE), then + * the call result. + */ + call_in(proc, failed); + } + + if (!hide) { + /* + * The call is complete now, so clear the recording. This also + * implies that no suspension marker will be printed anymore. + */ + record_clear(proc); + + put_newline(); + } + + /* + * For calls not of the no-return type, an equals sign must have been + * printed by now. This is protection against badly written handlers. + */ + assert(proc->call_flags & CF_DONE); + + proc->call_name = NULL; + proc->call_flags = 0; +} + +/* + * Replay the recorded text, if any, for the enter phase of the given process. + * If there is no recorded text, start a new line anyway. + */ +void +call_replay(struct trace_proc * proc) +{ + + /* + * We get TRUE if the recorded call should be replayed, but the + * recorded text for the call did not fit in the recording buffer. + * In that case, we have to come up with a replacement text for the + * call up to the call split. + */ + if (record_replay(proc) == TRUE) { + /* + * We basically place a "<..>" suspension marker in the + * parameters part of the call, and use its call name and flags + * for the rest. There is a trailing space in all cases. + */ + put_fmt(proc, "%s(<..>%s", proc->call_name, + !(proc->call_flags & CF_DONE) ? "," : + ((proc->call_flags & CF_NORETURN) ? ")" : ") =")); + put_space(proc); + } +} + +/* + * Return the human-readable name of the call currently being made by the given + * process. The process is guaranteed to be in a call, although the call may + * be hidden. Under no circumstances may this function return a NULL pointer. + */ +const char * +call_name(struct trace_proc * proc) +{ + + assert(proc->call_name != NULL); + + return proc->call_name; +} diff --git a/minix/usr.bin/trace/error.awk b/minix/usr.bin/trace/error.awk new file mode 100644 index 000000000..747616230 --- /dev/null +++ b/minix/usr.bin/trace/error.awk @@ -0,0 +1,28 @@ +# Derived from libc errlist.awk + +BEGIN { + printf("/* This file is automatically generated by error.awk */\n\n"); + printf("#include \"inc.h\"\n\n"); + printf("static const char *const errors[] = {\n"); +} +/^#define/ { + name = $2; + if (name == "ELAST") + next; + number = $3; + if (number == "(_SIGN") + number = $4; + if (number < 0 || number == "EAGAIN") + next; + printf("\t[%s] = \"%s\",\n", name, name); +} +END { + printf("};\n\n"); + printf("const char *\nget_error_name(int err)\n{\n\n"); + printf("\tif (err >= 0 && err < sizeof(errors) / sizeof(errors[0]) &&\n"); + printf("\t errors[err] != NULL)\n"); + printf("\t\treturn errors[err];\n"); + printf("\telse\n"); + printf("\t\treturn NULL;\n"); + printf("}\n"); +} diff --git a/minix/usr.bin/trace/escape.c b/minix/usr.bin/trace/escape.c new file mode 100644 index 000000000..7f00ab87d --- /dev/null +++ b/minix/usr.bin/trace/escape.c @@ -0,0 +1,48 @@ + +#include "inc.h" + +static const char *const escape[256] = { + "\\0", "\\x01", "\\x02", "\\x03", "\\x04", "\\x05", "\\x06", "\\x07", + "\\x08", "\\t", "\\n", "\\x0B", "\\x0C", "\\r", "\\x0E", "\\x0F", + "\\x10", "\\x11", "\\x12", "\\x13", "\\x14", "\\x15", "\\x16", "\\x17", + "\\x18", "\\x19", "\\x1A", "\\x1B", "\\x1C", "\\x1D", "\\x1E", "\\x1F", + " ", "!", "\\\"", "#", "$", "%", "&", "'", + "(", ")", "*", "+", ",", "-", ".", "/", + "0", "1", "2", "3", "4", "5", "6", "7", + "8", "9", ":", ";", "<", "=", ">", "?", + "@", "A", "B", "C", "D", "E", "F", "G", + "H", "I", "J", "K", "L", "M", "N", "O", + "P", "Q", "R", "S", "T", "U", "V", "W", + "X", "Y", "Z", "[", "\\", "]", "^", "_", + "`", "a", "b", "c", "d", "e", "f", "g", + "h", "i", "j", "k", "l", "m", "n", "o", + "p", "q", "r", "s", "t", "u", "v", "w", + "x", "y", "z", "{", "|", "}", "~", "\\x7F", + "\\x80", "\\x81", "\\x82", "\\x83", "\\x84", "\\x85", "\\x86", "\\x87", + "\\x88", "\\x89", "\\x8A", "\\x8B", "\\x8C", "\\x8D", "\\x8E", "\\x8F", + "\\x90", "\\x91", "\\x92", "\\x93", "\\x94", "\\x95", "\\x96", "\\x97", + "\\x98", "\\x99", "\\x9A", "\\x9B", "\\x9C", "\\x9D", "\\x9E", "\\x9F", + "\\xA0", "\\xA1", "\\xA2", "\\xA3", "\\xA4", "\\xA5", "\\xA6", "\\xA7", + "\\xA8", "\\xA9", "\\xAA", "\\xAB", "\\xAC", "\\xAD", "\\xAE", "\\xAF", + "\\xB0", "\\xB1", "\\xB2", "\\xB3", "\\xB4", "\\xB5", "\\xB6", "\\xB7", + "\\xB8", "\\xB9", "\\xBA", "\\xBB", "\\xBC", "\\xBD", "\\xBE", "\\xBF", + "\\xC0", "\\xC1", "\\xC2", "\\xC3", "\\xC4", "\\xC5", "\\xC6", "\\xC7", + "\\xC8", "\\xC9", "\\xCA", "\\xCB", "\\xCC", "\\xCD", "\\xCE", "\\xCF", + "\\xD0", "\\xD1", "\\xD2", "\\xD3", "\\xD4", "\\xD5", "\\xD6", "\\xD7", + "\\xD8", "\\xD9", "\\xDA", "\\xDB", "\\xDC", "\\xDD", "\\xDE", "\\xDF", + "\\xE0", "\\xE1", "\\xE2", "\\xE3", "\\xE4", "\\xE5", "\\xE6", "\\xE7", + "\\xE8", "\\xE9", "\\xEA", "\\xEB", "\\xEC", "\\xED", "\\xEE", "\\xEF", + "\\xF0", "\\xF1", "\\xF2", "\\xF3", "\\xF4", "\\xF5", "\\xF6", "\\xF7", + "\\xF8", "\\xF9", "\\xFA", "\\xFB", "\\xFC", "\\xFD", "\\xFE", "\\xFF", +}; + +/* + * For the given character, return a string representing an escaped version of + * the character. + */ +const char * +get_escape(char c) +{ + + return escape[(unsigned int)(unsigned char)c]; +} diff --git a/minix/usr.bin/trace/format.c b/minix/usr.bin/trace/format.c new file mode 100644 index 000000000..10a6b3eb2 --- /dev/null +++ b/minix/usr.bin/trace/format.c @@ -0,0 +1,426 @@ + +#include "inc.h" + +#include + +/* + * The size of the formatting buffer, which in particular limits the maximum + * size of the output from the variadic functions. All printer functions which + * are dealing with potentially large or even unbounded output, should be able + * to generate their output in smaller chunks. In the end, nothing that is + * being printed as a unit should even come close to reaching this limit. + */ +#define FORMAT_BUFSZ 4096 + +/* + * The buffer which is used for all intermediate copying and/or formatting. + * Care must be taken that only one function uses this buffer at any time. + */ +static char formatbuf[FORMAT_BUFSZ]; + +/* + * Reset the line formatting for the given process. + */ +void +format_reset(struct trace_proc * proc) +{ + + proc->next_sep = NULL; + proc->depth = -1; +} + +/* + * Set the next separator for the given process. The given separator may be + * NULL. + */ +void +format_set_sep(struct trace_proc * proc, const char * sep) +{ + + proc->next_sep = sep; +} + +/* + * Print and clear the next separator for the process, if any. + */ +void +format_push_sep(struct trace_proc * proc) +{ + + if (proc->next_sep != NULL) { + put_text(proc, proc->next_sep); + + proc->next_sep = NULL; + } +} + +/* + * Print a field, e.g. a parameter or a field from a structure, separated from + * other fields at the same nesting depth as appropriate. If the given field + * name is not NULL, it may or may not be printed. The given text is what will + * be printed for this field so far, but the caller is allowed to continue + * printing text for the same field with e.g. put_text(). As such, the given + * text may even be an empty string. + */ +void +put_field(struct trace_proc * proc, const char * name, const char * text) +{ + + /* + * At depth -1 (the basic line level), names are not used. A name + * should not be supplied by the caller in that case, but, it happens. + */ + if (proc->depth < 0) + name = NULL; + + format_push_sep(proc); + + if (name != NULL && (proc->depths[proc->depth].name || allnames)) { + put_text(proc, name); + put_text(proc, "="); + } + + put_text(proc, text); + + format_set_sep(proc, proc->depths[proc->depth].sep); +} + +/* + * Increase the nesting depth with a new block of fields, enclosed within + * parentheses, brackets, etcetera. The given name, which may be NULL, is the + * name of the entire nested block. In the flags field, PF_NONAME indicates + * that the fields within the block should have their names printed or not, + * although this may be overridden by setting the allnames variable. The given + * string is the block opening string (e.g., an opening parenthesis). The + * given separator is used to separate the fields within the nested block, and + * should generally be ", " to maintain output consistency. + */ +void +put_open(struct trace_proc * proc, const char * name, int flags, + const char * string, const char * sep) +{ + + put_field(proc, name, string); + + proc->depth++; + + assert(proc->depth < MAX_DEPTH); + + proc->depths[proc->depth].sep = sep; + proc->depths[proc->depth].name = !(flags & PF_NONAME); + + format_set_sep(proc, NULL); +} + +/* + * Decrease the nesting depth by ending a nested block of fields. The given + * string is the closing parenthesis, bracket, etcetera. + */ +void +put_close(struct trace_proc * proc, const char * string) +{ + + assert(proc->depth >= 0); + + put_text(proc, string); + + proc->depth--; + + if (proc->depth >= 0) + format_set_sep(proc, proc->depths[proc->depth].sep); + else + format_set_sep(proc, NULL); +} + +/* + * Version of put_text with variadic arguments. The given process may be NULL. + */ +void +put_fmt(struct trace_proc * proc, const char * fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + (void)vsnprintf(formatbuf, sizeof(formatbuf), fmt, ap); + va_end(ap); + + put_text(proc, formatbuf); +} + +/* + * Version of put_field with variadic arguments. + */ +void +put_value(struct trace_proc * proc, const char * name, const char * fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + (void)vsnprintf(formatbuf, sizeof(formatbuf), fmt, ap); + va_end(ap); + + put_field(proc, name, formatbuf); +} + +/* + * Start printing a structure. In general, the function copies the contents of + * the structure of size 'size' from the traced process at 'addr' into the + * local 'ptr' structure, opens a nested block with name 'name' (which may + * be NULL) using an opening bracket, and returns TRUE to indicate that the + * caller should print fields from the structure. However, if 'flags' contains + * PF_FAILED, the structure will be printed as a pointer, no copy will be made, + * and the call will return FALSE. Similarly, if the remote copy fails, a + * pointer will be printed and the call will return FALSE. If PF_LOCADDR is + * given, 'addr' is a local address, and an intraprocess copy will be made. + */ +int +put_open_struct(struct trace_proc * proc, const char * name, int flags, + vir_bytes addr, void * ptr, size_t size) +{ + + if ((flags & PF_FAILED) || valuesonly > 1 || addr == 0) { + if (flags & PF_LOCADDR) + put_field(proc, name, "&.."); + else + put_ptr(proc, name, addr); + + return FALSE; + } + + if (!(flags & PF_LOCADDR)) { + if (mem_get_data(proc->pid, addr, ptr, size) < 0) { + put_ptr(proc, name, addr); + + return FALSE; + } + } else + memcpy(ptr, (void *) addr, size); + + put_open(proc, name, flags, "{", ", "); + + return TRUE; +} + +/* + * End printing a structure. This must be called only to match a successful + * call to put_open_struct. The given 'all' flag indicates whether all fields + * of the structure have been printed; if not, a ".." continuation text is + * printed to show the user that some structure fields have not been printed. + */ +void +put_close_struct(struct trace_proc * proc, int all) +{ + + if (!all) + put_field(proc, NULL, ".."); + + put_close(proc, "}"); +} + +/* + * Print a pointer. NULL is treated as a special case. + */ +void +put_ptr(struct trace_proc * proc, const char * name, vir_bytes addr) +{ + + if (addr == 0 && !valuesonly) + put_field(proc, name, "NULL"); + else + put_value(proc, name, "&0x%lx", addr); +} + +/* + * Print the contents of a buffer, at remote address 'addr' and of 'bytes' + * size, as a field using name 'name' (which may be NULL). If the PF_FAILED + * flag is given, the buffer address is printed instead, since it is assumed + * that the actual buffer contains garbage. If the PF_LOCADDR flag is given, + * the given address is a local address and no intraprocess copies are + * performed. If the PF_STRING flag is given, the buffer is expected to + * contain a null terminator within its size, and the string will be printed + * only up to there. Normally, the string is cut off beyond a number of bytes + * which depends on the verbosity level; if the PF_FULL flag is given, the full + * string will be printed no matter its size (used mainly for path names, which + * typically become useless once cut off). + */ +void +put_buf(struct trace_proc * proc, const char * name, int flags, vir_bytes addr, + ssize_t size) +{ + const char *escaped; + size_t len, off, max, chunk; + int i, cutoff; + char *p; + + if ((flags & PF_FAILED) || valuesonly || addr == 0 || size < 0) { + if (flags & PF_LOCADDR) + put_field(proc, name, "&.."); + else + put_ptr(proc, name, addr); + + return; + } + + if (size == 0) { + put_field(proc, name, "\"\""); + + return; + } + + /* + * TODO: the maximum says nothing about the size of the printed text. + * Escaped-character printing can make the output much longer. Does it + * make more sense to apply a limit after the escape transformation? + */ + if (verbose == 0) max = 32; + else if (verbose == 1) max = 256; + else max = SIZE_MAX; + + /* + * If the output is cut off, we put two dots after the closing quote. + * For non-string buffers, the output is cut off if the size exceeds + * our limit or we run into a copying error somewhere in the middle. + * For strings, the output is cut off unless we find a null terminator. + */ + cutoff = !!(flags & PF_STRING); + len = (size_t)size; + if (!(flags & PF_FULL) && len > max) { + len = max; + cutoff = TRUE; + } + + for (off = 0; off < len; off += chunk) { + chunk = len - off; + if (chunk > sizeof(formatbuf) - 1) + chunk = sizeof(formatbuf) - 1; + + if (!(flags & PF_LOCADDR)) { + if (mem_get_data(proc->pid, addr + off, formatbuf, + chunk) < 0) { + if (off == 0) { + put_ptr(proc, name, addr); + + return; + } + + cutoff = TRUE; + break; + } + } else + memcpy(formatbuf, (void *)addr, chunk); + + if (off == 0) + put_field(proc, name, "\""); + + /* In strings, look for the terminating null character. */ + if ((flags & PF_STRING) && + (p = memchr(formatbuf, '\0', chunk)) != NULL) { + chunk = (size_t)(p - formatbuf); + cutoff = FALSE; + } + + /* Print the buffer contents using escaped characters. */ + for (i = 0; i < chunk; i++) { + escaped = get_escape(formatbuf[i]); + + put_text(proc, escaped); + } + + /* Stop if we found the end of the string. */ + if ((flags & PF_STRING) && !cutoff) + break; + } + + if (cutoff) + put_text(proc, "\".."); + else + put_text(proc, "\""); +} + +/* + * Print a flags field, using known flag names. The name of the whole field is + * given as 'name' and may be NULL. The caller must supply an array of known + * flags as 'fp' (with 'num' entries). Each entry in the array has a mask, a + * value, and a name. If the given flags 'value', bitwise-ANDed with the mask + * of an entry, yields the value of that entry, then the name is printed. This + * means that certain zero bits may also be printed as actual flags, and that + * by supplying an all-bits-set mask can print a flag name for a zero value, + * for example F_OK for access(). See the FLAG macros and their usage for + * examples. All matching flag names are printed with a "|" separator, and if + * after evaluating all 'num' entries in 'fp' there are still bits in 'value' + * for which nothing has been printed, the remaining bits will be printed with + * the 'fmt' format string for an integer (generally "%d" should be used). + */ +void +put_flags(struct trace_proc * proc, const char * name, const struct flags * fp, + unsigned int num, const char * fmt, unsigned int value) +{ + unsigned int left; + int first; + + if (valuesonly) { + put_value(proc, name, fmt, value); + + return; + } + + put_field(proc, name, ""); + + for (first = TRUE, left = value; num > 0; fp++, num--) { + if ((value & fp->mask) == fp->value) { + if (first) + first = FALSE; + else + put_text(proc, "|"); + put_text(proc, fp->name); + + left -= fp->value; + } + } + + if (left != 0) { + if (first) + first = FALSE; + else + put_text(proc, "|"); + + put_fmt(proc, fmt, left); + } + + /* + * If nothing has been printed so far, simply print a zero. Ignoring + * the given format in this case is intentional: a simple 0 looks + * better than 0x0 or 00 etc. + */ + if (first) + put_text(proc, "0"); +} + +/* + * Print a tail field at the end of an array. The given 'count' value is the + * total number of elements in the array, or 0 to indicate that an error + * occurred. The given 'printed' value is the number of fields printed so far. + * If some fields have been printed already, the number of fields not printed + * will be shown as "..(+N)". If no fields have been printed already, the + * (total) number of fields not printed will be shown as "..(N)". An error + * will print "..(?)". + * + * The rules for printing an array are as follows. In principle, arrays should + * be enclosed in "[]". However, if a copy error occurs immediately, a pointer + * to the array should be printed instead. An empty array should be printed as + * "[]" (not "[..(0)]"). If a copy error occurs in the middle of the array, + * put_tail should be used with count == 0. Only if not all fields in the + * array are printed, put_tail should be used with count > 0. The value of + * 'printed' is typically the result of an arbitrary limit set based on the + * verbosity level. + */ +void +put_tail(struct trace_proc * proc, unsigned int count, unsigned int printed) +{ + + if (count == 0) + put_field(proc, NULL, "..(?)"); + else + put_value(proc, NULL, "..(%s%u)", + (printed > 0) ? "+" : "", count - printed); +} diff --git a/minix/usr.bin/trace/inc.h b/minix/usr.bin/trace/inc.h new file mode 100644 index 000000000..2c84069c0 --- /dev/null +++ b/minix/usr.bin/trace/inc.h @@ -0,0 +1,22 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "proc.h" +#include "type.h" +#include "proto.h" diff --git a/minix/usr.bin/trace/ioctl.c b/minix/usr.bin/trace/ioctl.c new file mode 100644 index 000000000..ed8931baf --- /dev/null +++ b/minix/usr.bin/trace/ioctl.c @@ -0,0 +1,226 @@ + +#include "inc.h" + +#include + +static char ioctlbuf[IOCPARM_MASK]; + +static const struct { + const char *(*name)(unsigned long); + int (*arg)(struct trace_proc *, unsigned long, void *, int); + int is_svrctl; +} ioctl_table[] = { + { block_ioctl_name, block_ioctl_arg, FALSE }, + { char_ioctl_name, char_ioctl_arg, FALSE }, + { net_ioctl_name, net_ioctl_arg, FALSE }, + { svrctl_name, svrctl_arg, TRUE }, +}; + +/* + * Print an IOCTL request code, and save certain values in the corresponding + * process structure in order to be able to print the IOCTL argument. + */ +void +put_ioctl_req(struct trace_proc * proc, const char * name, unsigned long req, + int is_svrctl) +{ + const char *text; + size_t size; + unsigned int group, cmd; + int i, r, w, big; + + proc->ioctl_index = -1; + + if (valuesonly > 1) { + put_value(proc, name, "0x%lx", req); + + return; + } + + /* + * Lookups are bruteforce across the IOCTL submodules; they're all + * checked. We could use the group letter but that would create more + * issues than it solves. Our hope is that at least the compiler is + * smart about looking up particular codes in each switch statement, + * although in the worst case, it's a full O(n) lookup. + */ + for (i = 0; !valuesonly && i < COUNT(ioctl_table); i++) { + /* IOCTLs and SVRCTLs are considered different name spaces. */ + if (ioctl_table[i].is_svrctl != is_svrctl) + continue; + + if ((text = ioctl_table[i].name(req)) != NULL) { + put_field(proc, name, text); + + proc->ioctl_index = i; + + return; + } + } + + r = _MINIX_IOCTL_IOR(req); + w = _MINIX_IOCTL_IOW(req); + big = _MINIX_IOCTL_BIG(req); + size = (size_t)(big ? _MINIX_IOCTL_SIZE_BIG(req) : IOCPARM_LEN(req)); + group = big ? 0 : IOCGROUP(req); + cmd = req & 0xff; /* shockingly there is no macro for this.. */ + + /* + * Not sure why an entire bit is wasted on IOC_VOID (legacy reasons?), + * but since the redundancy is there, we might as well check whether + * this is a valid IOCTL request. Also, we expect the group to be a + * printable character. If either check fails, print just a number. + */ + if (((req & IOC_VOID) && (r || w || big || size > 0)) || + (!(req & IOC_VOID) && ((!r && !w) || size == 0)) || + (!big && (group < 32 || group > 127))) { + put_value(proc, name, "0x%lx", req); + + return; + } + + if (big) { + /* For big IOCTLs, "R" becomes before "W" (old MINIX style). */ + put_value(proc, name, "_IO%s%s_BIG(%u,%zu)", + r ? "R" : "", w ? "W" : "", cmd, size); + } else if (IOCGROUP(req) >= 32 && IOCGROUP(req) < 127) { + /* For normal IOCTLs, "W" comes before "R" (NetBSD style). */ + put_value(proc, name, "_IO%s%s('%c',%u,%zu)", + w ? "W" : "", r ? "R" : "", group, cmd, size); + } +} + +/* + * Print the supplied (out) part of an IOCTL argument, as applicable. For + * efficiency reasons, this function assumes that put_ioctl_req() has been + * called for the corresponding IOCTL already, so that the necessary fields in + * the given proc structure are set as expected. + */ +int +put_ioctl_arg_out(struct trace_proc * proc, const char * name, + unsigned long req, vir_bytes addr, int is_svrctl) +{ + size_t size; + int dir, all; + + dir = (_MINIX_IOCTL_IOW(req) ? IF_OUT : 0) | + (_MINIX_IOCTL_IOR(req) ? IF_IN : 0); + + if (dir == 0) + proc->ioctl_index = -1; /* no argument to print at all */ + + /* No support for printing big-IOCTL contents just yet. */ + if (valuesonly > 1 || _MINIX_IOCTL_BIG(req) || + proc->ioctl_index == -1) { + put_ptr(proc, name, addr); + + return CT_DONE; + } + + assert(proc->ioctl_index >= 0); + assert(proc->ioctl_index < COUNT(ioctl_table)); + assert(ioctl_table[proc->ioctl_index].is_svrctl == is_svrctl); + + proc->ioctl_flags = + ioctl_table[proc->ioctl_index].arg(proc, req, NULL, dir); + + if (proc->ioctl_flags == 0) { /* no argument printing for this IOCTL */ + put_ptr(proc, name, addr); + + proc->ioctl_index = -1; /* forget about the IOCTL handler */ + + return CT_DONE; + } + + /* + * If this triggers, the IOCTL handler returns a direction that is not + * part of the actual IOCTL, and the handler should be fixed. + */ + if (proc->ioctl_flags & ~dir) { + output_flush(); /* show the IOCTL name for debugging */ + + assert(0); + } + + if (!(proc->ioctl_flags & IF_OUT)) + return CT_NOTDONE; + + size = IOCPARM_LEN(req); + + if (size > sizeof(ioctlbuf) || + mem_get_data(proc->pid, addr, ioctlbuf, size) < 0) { + put_ptr(proc, name, addr); + + /* There's no harm in trying the _in side later anyhow.. */ + return CT_DONE; + } + + put_open(proc, name, 0, "{", ", "); + + all = ioctl_table[proc->ioctl_index].arg(proc, req, ioctlbuf, IF_OUT); + + if (!all) + put_field(proc, NULL, ".."); + + put_close(proc, "}"); + + return CT_DONE; +} + +/* + * Print the returned (in) part of an IOCTL argument, as applicable. This + * function assumes that it is preceded by a call to put_ioctl_arg_out for this + * process. + */ +void +put_ioctl_arg_in(struct trace_proc * proc, const char * name, int failed, + unsigned long req, vir_bytes addr, int is_svrctl) +{ + size_t size; + int all; + + if (valuesonly > 1 || _MINIX_IOCTL_BIG(req) || + proc->ioctl_index == -1) { + put_result(proc); + + return; + } + + assert(proc->ioctl_index >= 0); + assert(proc->ioctl_index < COUNT(ioctl_table)); + assert(ioctl_table[proc->ioctl_index].is_svrctl == is_svrctl); + assert(proc->ioctl_flags != 0); + + if (proc->ioctl_flags & IF_OUT) + put_result(proc); + if (!(proc->ioctl_flags & IF_IN)) + return; + + size = IOCPARM_LEN(req); + + if (failed || size > sizeof(ioctlbuf) || + mem_get_data(proc->pid, addr, ioctlbuf, size) < 0) { + if (!(proc->ioctl_flags & IF_OUT)) { + put_ptr(proc, name, addr); + put_equals(proc); + put_result(proc); + } else if (!failed) + put_field(proc, NULL, "{..}"); + + return; + } + + put_open(proc, name, 0, "{", ", "); + + all = ioctl_table[proc->ioctl_index].arg(proc, req, ioctlbuf, IF_IN); + + if (!all) + put_field(proc, NULL, ".."); + + put_close(proc, "}"); + + if (!(proc->ioctl_flags & IF_OUT)) { + put_equals(proc); + put_result(proc); + } +} diff --git a/minix/usr.bin/trace/ioctl/block.c b/minix/usr.bin/trace/ioctl/block.c new file mode 100644 index 000000000..bca7d32d1 --- /dev/null +++ b/minix/usr.bin/trace/ioctl/block.c @@ -0,0 +1,229 @@ + +#include "inc.h" + +#include +#include +#include +#include + +const char * +block_ioctl_name(unsigned long req) +{ + + switch (req) { + NAME(BIOCTRACEBUF); + NAME(BIOCTRACECTL); + NAME(BIOCTRACEGET); /* big IOCTL, not printing argument */ + NAME(DIOCSETP); + NAME(DIOCGETP); + NAME(DIOCEJECT); /* no argument */ + NAME(DIOCTIMEOUT); + NAME(DIOCOPENCT); + NAME(DIOCFLUSH); /* no argument */ + NAME(DIOCGETWC); + NAME(DIOCSETWC); + NAME(FBDCADDRULE); + NAME(FBDCDELRULE); + NAME(FBDCGETRULE); + NAME(MIOCRAMSIZE); + NAME(MTIOCGET); /* TODO: print argument */ + NAME(MTIOCTOP); /* TODO: print argument */ + NAME(VNDIOCCLR); + NAME(VNDIOCGET); + NAME(VNDIOCSET); + } + + return NULL; +} + +static const struct flags fbd_flags[] = { + FLAG(FBD_FLAG_READ), + FLAG(FBD_FLAG_WRITE), +}; + +static void +put_fbd_action(struct trace_proc * proc, const char * name, int action) +{ + const char *text = NULL; + + if (!valuesonly) { + switch (action) { + TEXT(FBD_ACTION_CORRUPT); + TEXT(FBD_ACTION_ERROR); + TEXT(FBD_ACTION_MISDIR); + TEXT(FBD_ACTION_LOSTTORN); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%d", action); +} + +static const struct flags vnd_flags[] = { + FLAG(VNDIOF_HASGEOM), + FLAG(VNDIOF_READONLY), + FLAG(VNDIOF_FORCE), +}; + +int +block_ioctl_arg(struct trace_proc * proc, unsigned long req, void * ptr, + int dir) +{ + struct part_geom *part; + struct fbd_rule *rule; + struct vnd_ioctl *vnd; + struct vnd_user *vnu; + int i; + + switch (req) { + case BIOCTRACEBUF: + if (ptr == NULL) + return IF_OUT; + + put_value(proc, NULL, "%zu", *(size_t *)ptr); + return IF_ALL; + + case BIOCTRACECTL: + if (ptr == NULL) + return IF_OUT; + + i = *(int *)ptr; + if (!valuesonly && i == BTCTL_START) + put_field(proc, NULL, "BTCTL_START"); + else if (!valuesonly && i == BTCTL_STOP) + put_field(proc, NULL, "BTCTL_STOP"); + else + put_value(proc, NULL, "%d", i); + return IF_ALL; + + case DIOCSETP: + if ((part = (struct part_geom *)ptr) == NULL) + return IF_OUT; + + put_value(proc, "base", "%"PRIu64, part->base); + put_value(proc, "size", "%"PRIu64, part->size); + return IF_ALL; + + case DIOCGETP: + if ((part = (struct part_geom *)ptr) == NULL) + return IF_IN; + + put_value(proc, "base", "%"PRIu64, part->base); + put_value(proc, "size", "%"PRIu64, part->size); + if (verbose > 0) { + put_value(proc, "cylinders", "%u", part->cylinders); + put_value(proc, "heads", "%u", part->heads); + put_value(proc, "sectors", "%u", part->sectors); + return IF_ALL; + } else + return 0; + + case DIOCTIMEOUT: + /* Print the old timeout only if verbosity is high enough. */ + if (ptr == NULL) + return IF_OUT | ((verbose > 0) ? IF_IN : 0); + + /* Same action for out and in. */ + put_value(proc, NULL, "%d", *(int *)ptr); + return IF_ALL; + + case DIOCOPENCT: + if (ptr == NULL) + return IF_IN; + + put_value(proc, NULL, "%d", *(int *)ptr); + return IF_ALL; + + case DIOCSETWC: + case DIOCGETWC: + if (ptr == NULL) + return dir; /* out or in, depending on the request */ + + put_value(proc, NULL, "%d", *(int *)ptr); + return IF_ALL; + + case FBDCDELRULE: + if (ptr == NULL) + return IF_OUT; + + put_value(proc, NULL, "%d", *(fbd_rulenum_t *)ptr); + return IF_ALL; + + case FBDCGETRULE: + if ((rule = (struct fbd_rule *)ptr) == NULL) + return IF_OUT | IF_IN; + + if (dir == IF_OUT) { + put_value(proc, "num", "%d", rule->num); + return IF_ALL; + } + + /* + * The returned result is the same as what is passed to the + * add request, so we can use the same code to print both. + */ + /* FALLTHROUGH */ + case FBDCADDRULE: + if ((rule = (struct fbd_rule *)ptr) == NULL) + return IF_OUT; + + if (rule->start != 0 || rule->end != 0 || verbose > 0) { + put_value(proc, "start", "%"PRIu64, rule->start); + put_value(proc, "end", "%"PRIu64, rule->end); + } + if (rule->flags != (FBD_FLAG_READ | FBD_FLAG_WRITE) || + verbose > 0) + put_flags(proc, "flags", fbd_flags, COUNT(fbd_flags), + "0x%x", rule->flags); + if (rule->skip != 0 || verbose > 0) + put_value(proc, "skip", "%u", rule->skip); + if (rule->count != 0 || verbose > 0) + put_value(proc, "count", "%u", rule->count); + put_fbd_action(proc, "action", rule->action); + + return 0; /* TODO: optionally print the union fields */ + + case MIOCRAMSIZE: + if (ptr == NULL) + return IF_OUT; + + put_value(proc, NULL, "%"PRIu32, *(u32_t *)ptr); + return IF_ALL; + + case VNDIOCSET: + if ((vnd = (struct vnd_ioctl *)ptr) == NULL) + return IF_OUT | IF_IN; + + if (dir == IF_OUT) { + put_value(proc, "vnd_fildes", "%d", vnd->vnd_fildes); + put_flags(proc, "vnd_flags", vnd_flags, + COUNT(vnd_flags), "0x%x", vnd->vnd_flags); + return 0; /* TODO: print geometry if given */ + } else { + put_value(proc, "vnd_size", "%"PRIu64, vnd->vnd_size); + return IF_ALL; + } + + case VNDIOCCLR: + if ((vnd = (struct vnd_ioctl *)ptr) == NULL) + return IF_OUT; + + put_flags(proc, "vnd_flags", vnd_flags, COUNT(vnd_flags), + "0x%x", vnd->vnd_flags); + return IF_ALL; + + case VNDIOCGET: + if ((vnu = (struct vnd_user *)ptr) == NULL) + return IF_IN; + + put_value(proc, "vnu_unit", "%d", vnu->vnu_unit); + put_dev(proc, "vnu_dev", vnu->vnu_dev); + put_value(proc, "vnu_ino", "%"PRId64, vnu->vnu_ino); + return IF_ALL; + + default: + return 0; + } +} diff --git a/minix/usr.bin/trace/ioctl/char.c b/minix/usr.bin/trace/ioctl/char.c new file mode 100644 index 000000000..5d940427b --- /dev/null +++ b/minix/usr.bin/trace/ioctl/char.c @@ -0,0 +1,509 @@ + +#include "inc.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +const char * +char_ioctl_name(unsigned long req) +{ + + switch (req) { + NAME(MINIX_I2C_IOCTL_EXEC); + NAME(FBIOGET_VSCREENINFO); + NAME(FBIOPUT_VSCREENINFO); + NAME(FBIOGET_FSCREENINFO); /* TODO: print argument */ + NAME(FBIOPAN_DISPLAY); + NAME(DSPIORATE); + NAME(DSPIOSTEREO); + NAME(DSPIOSIZE); + NAME(DSPIOBITS); + NAME(DSPIOSIGN); + NAME(DSPIOMAX); + NAME(DSPIORESET); /* no argument */ + NAME(DSPIOFREEBUF); + NAME(DSPIOSAMPLESINBUF); + NAME(DSPIOPAUSE); /* no argument */ + NAME(DSPIORESUME); /* no argument */ + NAME(MIXIOGETVOLUME); + NAME(MIXIOGETINPUTLEFT); + NAME(MIXIOGETINPUTRIGHT); + NAME(MIXIOGETOUTPUT); + NAME(MIXIOSETVOLUME); + NAME(MIXIOSETINPUTLEFT); + NAME(MIXIOSETINPUTRIGHT); + NAME(MIXIOSETOUTPUT); + NAME(TIOCEXCL); /* no argument */ + NAME(TIOCNXCL); /* no argument */ + NAME(TIOCFLUSH); + NAME(TIOCGETA); + NAME(TIOCSETA); + NAME(TIOCSETAW); + NAME(TIOCSETAF); + NAME(TIOCGETD); + NAME(TIOCSETD); + NAME(TIOCGLINED); + NAME(TIOCSLINED); + NAME(TIOCSBRK); /* no argument */ + NAME(TIOCCBRK); /* no argument */ + NAME(TIOCSDTR); /* no argument */ + NAME(TIOCCDTR); /* no argument */ + NAME(TIOCGPGRP); + NAME(TIOCSPGRP); + NAME(TIOCOUTQ); + NAME(TIOCSTI); + NAME(TIOCNOTTY); /* no argument */ + NAME(TIOCPKT); + NAME(TIOCSTOP); /* no argument */ + NAME(TIOCSTART); /* no argument */ + NAME(TIOCMSET); /* TODO: print argument */ + NAME(TIOCMBIS); /* TODO: print argument */ + NAME(TIOCMBIC); /* TODO: print argument */ + NAME(TIOCMGET); /* TODO: print argument */ + NAME(TIOCREMOTE); + NAME(TIOCGWINSZ); + NAME(TIOCSWINSZ); + NAME(TIOCUCNTL); + NAME(TIOCSTAT); + NAME(TIOCGSID); + NAME(TIOCCONS); + NAME(TIOCSCTTY); /* no argument */ + NAME(TIOCEXT); + NAME(TIOCSIG); /* no argument */ + NAME(TIOCDRAIN); /* no argument */ + NAME(TIOCGFLAGS); /* TODO: print argument */ + NAME(TIOCSFLAGS); /* TODO: print argument */ + NAME(TIOCDCDTIMESTAMP); /* TODO: print argument */ + NAME(TIOCRCVFRAME); /* TODO: print argument */ + NAME(TIOCXMTFRAME); /* TODO: print argument */ + NAME(TIOCPTMGET); /* TODO: print argument */ + NAME(TIOCGRANTPT); /* no argument */ + NAME(TIOCPTSNAME); /* TODO: print argument */ + NAME(TIOCSQSIZE); + NAME(TIOCGQSIZE); + NAME(TIOCSFON); /* big IOCTL, not printing argument */ + NAME(KIOCBELL); + NAME(KIOCSLEDS); + NAME(KIOCSMAP); /* not worth interpreting */ + NAME(TIOCMAPMEM); + NAME(TIOCUNMAPMEM); + } + + return NULL; +} + +static void +put_i2c_op(struct trace_proc * proc, const char *name, i2c_op_t op) +{ + const char *text = NULL; + + if (!valuesonly) { + switch (op) { + TEXT(I2C_OP_READ); + TEXT(I2C_OP_READ_WITH_STOP); + TEXT(I2C_OP_WRITE); + TEXT(I2C_OP_WRITE_WITH_STOP); + TEXT(I2C_OP_READ_BLOCK); + TEXT(I2C_OP_WRITE_BLOCK); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%d", op); +} + +static void +put_sound_device(struct trace_proc * proc, const char * name, int device) +{ + const char *text = NULL; + + if (!valuesonly) { + switch (device) { + TEXT(Master); + TEXT(Dac); + TEXT(Fm); + TEXT(Cd); + TEXT(Line); + TEXT(Mic); + TEXT(Speaker); + TEXT(Treble); + TEXT(Bass); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%d", device); +} + +static void +put_sound_state(struct trace_proc * proc, const char * name, int state) +{ + + if (!valuesonly && state == ON) + put_field(proc, name, "ON"); + else if (!valuesonly && state == OFF) + put_field(proc, name, "OFF"); + else + put_value(proc, name, "%d", state); +} + +static const struct flags flush_flags[] = { + FLAG(FREAD), + FLAG(FWRITE), +}; + +static const struct flags tc_iflags[] = { + FLAG(IGNBRK), + FLAG(BRKINT), + FLAG(IGNPAR), + FLAG(PARMRK), + FLAG(INPCK), + FLAG(ISTRIP), + FLAG(INLCR), + FLAG(IGNCR), + FLAG(ICRNL), + FLAG(IXON), + FLAG(IXOFF), + FLAG(IXANY), + FLAG(IMAXBEL), +}; + +static const struct flags tc_oflags[] = { + FLAG(OPOST), + FLAG(ONLCR), + FLAG(OXTABS), + FLAG(ONOEOT), + FLAG(OCRNL), + FLAG(ONOCR), + FLAG(ONLRET), +}; + +static const struct flags tc_cflags[] = { + FLAG(CIGNORE), + FLAG_MASK(CSIZE, CS5), + FLAG_MASK(CSIZE, CS6), + FLAG_MASK(CSIZE, CS7), + FLAG_MASK(CSIZE, CS8), + FLAG(CSTOPB), + FLAG(CREAD), + FLAG(PARENB), + FLAG(PARODD), + FLAG(HUPCL), + FLAG(CLOCAL), + FLAG(CRTSCTS), + FLAG(CDTRCTS), + FLAG(MDMBUF), +}; + +static const struct flags tc_lflags[] = { + FLAG(ECHOKE), + FLAG(ECHOE), + FLAG(ECHOK), + FLAG(ECHO), + FLAG(ECHONL), + FLAG(ECHOPRT), + FLAG(ECHOCTL), + FLAG(ISIG), + FLAG(ICANON), + FLAG(ALTWERASE), + FLAG(IEXTEN), + FLAG(EXTPROC), + FLAG(TOSTOP), + FLAG(FLUSHO), + FLAG(NOKERNINFO), + FLAG(PENDIN), + FLAG(NOFLSH), +}; + +static void +put_tty_disc(struct trace_proc * proc, const char * name, int disc) +{ + const char *text = NULL; + + if (!valuesonly) { + switch (disc) { + TEXT(TTYDISC); + TEXT(TABLDISC); + TEXT(SLIPDISC); + TEXT(PPPDISC); + TEXT(STRIPDISC); + TEXT(HDLCDISC); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%d", disc); +} + +static const struct flags kbd_leds[] = { + FLAG(KBD_LEDS_NUM), + FLAG(KBD_LEDS_CAPS), + FLAG(KBD_LEDS_SCROLL), +}; + +int +char_ioctl_arg(struct trace_proc * proc, unsigned long req, void * ptr, + int dir) +{ + minix_i2c_ioctl_exec_t *iie; + struct fb_var_screeninfo *fbvs; + struct volume_level *level; + struct inout_ctrl *inout; + struct termios *tc; + struct winsize *ws; + struct kio_bell *bell; + struct kio_leds *leds; + struct mapreqvm *mapreq; + + switch (req) { + case MINIX_I2C_IOCTL_EXEC: + if ((iie = (minix_i2c_ioctl_exec_t *)ptr) == NULL) + return IF_OUT; /* we print only the request for now */ + + put_i2c_op(proc, "iie_op", iie->iie_op); + put_value(proc, "iie_addr", "0x%04x", iie->iie_addr); + return 0; /* TODO: print command/data/result */ + + case FBIOGET_VSCREENINFO: + if ((fbvs = (struct fb_var_screeninfo *)ptr) == NULL) + return IF_IN; + + put_value(proc, "xres", "%"PRIu32, fbvs->xres); + put_value(proc, "yres", "%"PRIu32, fbvs->yres); + put_value(proc, "xres_virtual", "%"PRIu32, fbvs->xres_virtual); + put_value(proc, "yres_virtual", "%"PRIu32, fbvs->yres_virtual); + put_value(proc, "xoffset", "%"PRIu32, fbvs->xoffset); + put_value(proc, "yoffset", "%"PRIu32, fbvs->yoffset); + put_value(proc, "bits_per_pixel", "%"PRIu32, + fbvs->bits_per_pixel); + return 0; + + case FBIOPUT_VSCREENINFO: + case FBIOPAN_DISPLAY: + if ((fbvs = (struct fb_var_screeninfo *)ptr) == NULL) + return IF_OUT; + + put_value(proc, "xoffset", "%"PRIu32, fbvs->xoffset); + put_value(proc, "yoffset", "%"PRIu32, fbvs->yoffset); + return 0; + + case DSPIORATE: + case DSPIOSTEREO: + case DSPIOSIZE: + case DSPIOBITS: + case DSPIOSIGN: + case DSPIOMAX: + case DSPIOFREEBUF: + case DSPIOSAMPLESINBUF: + if (ptr == NULL) + return dir; + + put_value(proc, NULL, "%u", *(unsigned int *)ptr); + return IF_ALL; + + case MIXIOGETVOLUME: + if ((level = (struct volume_level *)ptr) == NULL) + return dir; + + if (dir == IF_OUT) + put_sound_device(proc, "device", level->device); + else { + put_value(proc, "left", "%d", level->left); + put_value(proc, "right", "%d", level->right); + } + return IF_ALL; + + case MIXIOSETVOLUME: + /* Print the corrected volume levels only with verbosity on. */ + if ((level = (struct volume_level *)ptr) == NULL) + return IF_OUT | ((verbose > 0) ? IF_IN : 0); + + if (dir == IF_OUT) + put_sound_device(proc, "device", level->device); + put_value(proc, "left", "%d", level->left); + put_value(proc, "right", "%d", level->right); + return IF_ALL; + + case MIXIOGETINPUTLEFT: + case MIXIOGETINPUTRIGHT: + case MIXIOGETOUTPUT: + if ((inout = (struct inout_ctrl *)ptr) == NULL) + return dir; + + if (dir == IF_OUT) + put_sound_device(proc, "device", inout->device); + else { + put_sound_state(proc, "left", inout->left); + put_sound_state(proc, "right", inout->right); + } + return IF_ALL; + + case MIXIOSETINPUTLEFT: + case MIXIOSETINPUTRIGHT: + case MIXIOSETOUTPUT: + if ((inout = (struct inout_ctrl *)ptr) == NULL) + return IF_OUT; + + put_sound_device(proc, "device", inout->device); + put_sound_state(proc, "left", inout->left); + put_sound_state(proc, "right", inout->right); + return IF_ALL; + + case TIOCFLUSH: + if (ptr == NULL) + return IF_OUT; + + put_flags(proc, NULL, flush_flags, COUNT(flush_flags), "0x%x", + *(int *)ptr); + return IF_ALL; + + case TIOCGETA: + case TIOCSETA: + case TIOCSETAW: + case TIOCSETAF: + if ((tc = (struct termios *)ptr) == NULL) + return dir; + + /* + * These are fairly common IOCTLs, so printing everything by + * default would create a lot of noise. By default we limit + * ourselves to printing the field that contains what I + * consider to be the most important flag: ICANON. + * TODO: see if we can come up with a decent format for + * selectively printing (relatively important) flags. + */ + if (verbose > 0) { + put_flags(proc, "c_iflag", tc_iflags, COUNT(tc_iflags), + "0x%x", tc->c_iflag); + put_flags(proc, "c_oflag", tc_oflags, COUNT(tc_oflags), + "0x%x", tc->c_oflag); + put_flags(proc, "c_cflag", tc_cflags, COUNT(tc_cflags), + "0x%x", tc->c_cflag); + } + put_flags(proc, "c_lflag", tc_lflags, COUNT(tc_lflags), "0x%x", + tc->c_lflag); + if (verbose > 0) { + put_value(proc, "c_ispeed", "%d", tc->c_ispeed); + put_value(proc, "c_ospeed", "%d", tc->c_ospeed); + } + return 0; /* TODO: print the c_cc fields */ + + case TIOCGETD: + case TIOCSETD: + if (ptr == NULL) + return dir; + + put_tty_disc(proc, NULL, *(int *)ptr); + return IF_ALL; + + case TIOCGLINED: + case TIOCSLINED: + if (ptr == NULL) + return dir; + + put_buf(proc, NULL, PF_LOCADDR | PF_STRING, (vir_bytes)ptr, + sizeof(linedn_t)); + return IF_ALL; + + case TIOCGPGRP: + case TIOCSPGRP: + case TIOCOUTQ: + case TIOCPKT: + case TIOCREMOTE: + case TIOCUCNTL: + case TIOCSTAT: /* argument seems unused? */ + case TIOCGSID: + case TIOCCONS: /* argument seems unused? */ + case TIOCEXT: + case TIOCSQSIZE: + case TIOCGQSIZE: + /* Print a simple integer. */ + if (ptr == NULL) + return dir; + + put_value(proc, NULL, "%d", *(int *)ptr); + return IF_ALL; + + case TIOCSTI: + if (ptr == NULL) + return dir; + + if (!valuesonly) + put_value(proc, NULL, "'%s'", + get_escape(*(char *)ptr)); + else + put_value(proc, NULL, "%u", *(char *)ptr); + return IF_ALL; + + case TIOCGWINSZ: + case TIOCSWINSZ: + if ((ws = (struct winsize *)ptr) == NULL) + return dir; + + /* This is a stupid order, but we follow the struct layout. */ + put_value(proc, "ws_row", "%u", ws->ws_row); + put_value(proc, "ws_col", "%u", ws->ws_col); + if (verbose > 0) { + put_value(proc, "ws_xpixel", "%u", ws->ws_xpixel); + put_value(proc, "ws_ypixel", "%u", ws->ws_ypixel); + } + return (verbose > 0) ? IF_ALL : 0; + + case KIOCBELL: + if ((bell = (struct kio_bell *)ptr) == NULL) + return IF_OUT; + + put_value(proc, "kb_pitch", "%u", bell->kb_pitch); + put_value(proc, "kb_volume", "%lu", bell->kb_volume); + put_struct_timeval(proc, "kb_duration", PF_LOCADDR, + (vir_bytes)&bell->kb_duration); + + return IF_ALL; + + case KIOCSLEDS: + if ((leds = (struct kio_leds *)ptr) == NULL) + return IF_OUT; + + put_flags(proc, "kl_bits", kbd_leds, COUNT(kbd_leds), "0x%x", + leds->kl_bits); + return IF_ALL; + + case TIOCMAPMEM: + if ((mapreq = (struct mapreqvm *)ptr) == NULL) + return dir; + + /* This structure has more fields, but they're all unused.. */ + if (dir == IF_OUT) { + put_value(proc, "phys_offset", "%"PRIu64, + (uint64_t)mapreq->phys_offset); /* future compat */ + put_value(proc, "size", "%zu", mapreq->size); + } else + put_ptr(proc, "vaddr_ret", (vir_bytes)mapreq->vaddr); + return IF_ALL; + + case TIOCUNMAPMEM: + if ((mapreq = (struct mapreqvm *)ptr) == NULL) + return IF_OUT; + + put_ptr(proc, "vaddr", (vir_bytes)mapreq->vaddr); + put_value(proc, "size", "%zu", mapreq->size); + return IF_ALL; + + default: + return 0; + } +} diff --git a/minix/usr.bin/trace/ioctl/net.c b/minix/usr.bin/trace/ioctl/net.c new file mode 100644 index 000000000..3a4f42847 --- /dev/null +++ b/minix/usr.bin/trace/ioctl/net.c @@ -0,0 +1,565 @@ + +#include "inc.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +const char * +net_ioctl_name(unsigned long req) +{ + + switch (req) { + NAME(FIONREAD); + NAME(NWIOSETHOPT); /* TODO: print argument */ + NAME(NWIOGETHOPT); /* TODO: print argument */ + NAME(NWIOGETHSTAT); /* TODO: print argument */ + NAME(NWIOARPGIP); /* TODO: print argument */ + NAME(NWIOARPGNEXT); /* TODO: print argument */ + NAME(NWIOARPSIP); /* TODO: print argument */ + NAME(NWIOARPDIP); /* TODO: print argument */ + NAME(NWIOSIPCONF2); /* TODO: print argument */ + NAME(NWIOSIPCONF); /* TODO: print argument */ + NAME(NWIOGIPCONF2); /* TODO: print argument */ + NAME(NWIOGIPCONF); /* TODO: print argument */ + NAME(NWIOSIPOPT); + NAME(NWIOGIPOPT); + NAME(NWIOGIPOROUTE); /* TODO: print argument */ + NAME(NWIOSIPOROUTE); /* TODO: print argument */ + NAME(NWIODIPOROUTE); /* TODO: print argument */ + NAME(NWIOGIPIROUTE); /* TODO: print argument */ + NAME(NWIOSIPIROUTE); /* TODO: print argument */ + NAME(NWIODIPIROUTE); /* TODO: print argument */ + NAME(NWIOSTCPCONF); + NAME(NWIOGTCPCONF); + NAME(NWIOTCPCONN); + NAME(NWIOTCPLISTEN); + NAME(NWIOTCPATTACH); /* TODO: print argument */ + NAME(NWIOTCPSHUTDOWN); /* no argument */ + NAME(NWIOSTCPOPT); + NAME(NWIOGTCPOPT); + NAME(NWIOTCPPUSH); /* no argument */ + NAME(NWIOTCPLISTENQ); + NAME(NWIOGTCPCOOKIE); + NAME(NWIOTCPACCEPTTO); + NAME(NWIOTCPGERROR); + NAME(NWIOSUDPOPT); + NAME(NWIOGUDPOPT); + NAME(NWIOUDPPEEK); /* TODO: print argument */ + NAME(NWIOSPSIPOPT); /* TODO: print argument */ + NAME(NWIOGPSIPOPT); /* TODO: print argument */ + NAME(NWIOGUDSFADDR); + NAME(NWIOSUDSTADDR); + NAME(NWIOSUDSADDR); + NAME(NWIOGUDSADDR); + NAME(NWIOGUDSPADDR); + NAME(NWIOSUDSTYPE); + NAME(NWIOSUDSBLOG); + NAME(NWIOSUDSCONN); + NAME(NWIOSUDSSHUT); + NAME(NWIOSUDSPAIR); + NAME(NWIOSUDSACCEPT); + NAME(NWIOSUDSCTRL); + NAME(NWIOGUDSCTRL); + NAME(NWIOGUDSSOTYPE); + NAME(NWIOGUDSPEERCRED); + NAME(NWIOGUDSSNDBUF); + NAME(NWIOSUDSSNDBUF); + NAME(NWIOGUDSRCVBUF); + NAME(NWIOSUDSRCVBUF); + } + + return NULL; +} + +static const struct flags ipopt_flags[] = { + FLAG_ZERO(NWIO_NOFLAGS), + FLAG_MASK(NWIO_ACC_MASK, NWIO_EXCL), + FLAG_MASK(NWIO_ACC_MASK, NWIO_SHARED), + FLAG_MASK(NWIO_ACC_MASK, NWIO_COPY), + FLAG(NWIO_EN_LOC), + FLAG(NWIO_DI_LOC), + FLAG(NWIO_EN_BROAD), + FLAG(NWIO_DI_BROAD), + FLAG(NWIO_REMSPEC), + FLAG(NWIO_REMANY), + FLAG(NWIO_PROTOSPEC), + FLAG(NWIO_PROTOANY), + FLAG(NWIO_HDR_O_SPEC), + FLAG(NWIO_HDR_O_ANY), + FLAG(NWIO_RWDATONLY), + FLAG(NWIO_RWDATALL), +}; + +static void +put_ipaddr(struct trace_proc * proc, const char * name, ipaddr_t ipaddr) +{ + struct in_addr in; + + if (!valuesonly) { + in.s_addr = ipaddr; + + /* Is this an acceptable encapsulation? */ + put_value(proc, name, "[%s]", inet_ntoa(in)); + } else + put_value(proc, name, "0x%08x", ntohl(ipaddr)); +} + +static void +put_ipproto(struct trace_proc * proc, const char * name, ipproto_t proto) +{ + const char *text = NULL; + + if (!valuesonly) { + switch (proto) { + TEXT(IPPROTO_ICMP); + TEXT(IPPROTO_TCP); + TEXT(IPPROTO_UDP); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%u", proto); +} + +static const struct flags tcpconf_flags[] = { + FLAG_ZERO(NWTC_NOFLAGS), + FLAG_MASK(NWTC_ACC_MASK, NWTC_EXCL), + FLAG_MASK(NWTC_ACC_MASK, NWTC_SHARED), + FLAG_MASK(NWTC_ACC_MASK, NWTC_COPY), + FLAG_MASK(NWTC_LOCPORT_MASK, NWTC_LP_UNSET), + FLAG_MASK(NWTC_LOCPORT_MASK, NWTC_LP_SET), + FLAG_MASK(NWTC_LOCPORT_MASK, NWTC_LP_SEL), + FLAG(NWTC_SET_RA), + FLAG(NWTC_UNSET_RA), + FLAG(NWTC_SET_RP), + FLAG(NWTC_UNSET_RP), +}; + +#define put_port(proc, name, port) \ + put_value(proc, name, "%u", ntohs(port)) + +static const struct flags tcpcl_flags[] = { + FLAG_ZERO(TCF_DEFAULT), + FLAG(TCF_ASYNCH), +}; + +static const struct flags tcpopt_flags[] = { + FLAG_ZERO(NWTO_NOFLAG), + FLAG(NWTO_SND_URG), + FLAG(NWTO_SND_NOTURG), + FLAG(NWTO_RCV_URG), + FLAG(NWTO_RCV_NOTURG), + FLAG(NWTO_BSD_URG), + FLAG(NWTO_NOTBSD_URG), + FLAG(NWTO_DEL_RST), + FLAG(NWTO_BULK), + FLAG(NWTO_NOBULK), +}; + +static const struct flags udpopt_flags[] = { + FLAG_ZERO(NWUO_NOFLAGS), + FLAG_MASK(NWUO_ACC_MASK, NWUO_EXCL), + FLAG_MASK(NWUO_ACC_MASK, NWUO_SHARED), + FLAG_MASK(NWUO_ACC_MASK, NWUO_COPY), + FLAG_MASK(NWUO_LOCPORT_MASK, NWUO_LP_SET), + FLAG_MASK(NWUO_LOCPORT_MASK, NWUO_LP_SEL), + FLAG_MASK(NWUO_LOCPORT_MASK, NWUO_LP_ANY), + FLAG(NWUO_EN_LOC), + FLAG(NWUO_DI_LOC), + FLAG(NWUO_EN_BROAD), + FLAG(NWUO_DI_BROAD), + FLAG(NWUO_RP_SET), + FLAG(NWUO_RP_ANY), + FLAG(NWUO_RA_SET), + FLAG(NWUO_RA_ANY), + FLAG(NWUO_RWDATONLY), + FLAG(NWUO_RWDATALL), + FLAG(NWUO_EN_IPOPT), + FLAG(NWUO_DI_IPOPT), +}; + +static void +put_family(struct trace_proc * proc, const char * name, int family) +{ + const char *text = NULL; + + if (!valuesonly) { + /* TODO: add all the other protocols */ + switch (family) { + TEXT(AF_UNSPEC); + TEXT(AF_LOCAL); + TEXT(AF_INET); + TEXT(AF_INET6); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%d", family); +} + +static const struct flags sock_type[] = { + FLAG_MASK(~SOCK_FLAGS_MASK, SOCK_STREAM), + FLAG_MASK(~SOCK_FLAGS_MASK, SOCK_DGRAM), + FLAG_MASK(~SOCK_FLAGS_MASK, SOCK_RAW), + FLAG_MASK(~SOCK_FLAGS_MASK, SOCK_RDM), + FLAG_MASK(~SOCK_FLAGS_MASK, SOCK_SEQPACKET), + FLAG(SOCK_CLOEXEC), + FLAG(SOCK_NONBLOCK), + FLAG(SOCK_NOSIGPIPE), +}; + +static void +put_shutdown_how(struct trace_proc * proc, const char * name, int how) +{ + const char *text = NULL; + + if (!valuesonly) { + switch (how) { + TEXT(SHUT_RD); + TEXT(SHUT_WR); + TEXT(SHUT_RDWR); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%d", how); +} + +static void +put_struct_uucred(struct trace_proc * proc, const char * name, int flags, + vir_bytes addr) +{ + struct uucred cred; + + if (!put_open_struct(proc, name, flags, addr, &cred, sizeof(cred))) + return; + + put_value(proc, "cr_uid", "%u", cred.cr_uid); + if (verbose > 0) { + put_value(proc, "cr_gid", "%u", cred.cr_gid); + if (verbose > 1) + put_value(proc, "cr_ngroups", "%d", cred.cr_ngroups); + put_groups(proc, "cr_groups", PF_LOCADDR, + (vir_bytes)&cred.cr_groups, cred.cr_ngroups); + } + + put_close_struct(proc, verbose > 0); +} + +static void +put_cmsg_type(struct trace_proc * proc, const char * name, int type) +{ + const char *text = NULL; + + if (!valuesonly) { + switch (type) { + TEXT(SCM_RIGHTS); + TEXT(SCM_CREDS); + TEXT(SCM_TIMESTAMP); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%d", type); +} + +static void +put_msg_control(struct trace_proc * proc, struct msg_control * ptr) +{ + struct msghdr msg; + struct cmsghdr *cmsg; + size_t len; + int i; + + if (ptr->msg_controllen > sizeof(ptr->msg_control)) { + put_field(proc, NULL, ".."); + + return; + } + + put_open(proc, NULL, PF_NONAME, "[", ", "); + + memset(&msg, 0, sizeof(msg)); + msg.msg_control = ptr->msg_control; + msg.msg_controllen = ptr->msg_controllen; + + /* + * TODO: decide if we need a verbosity-based limit here. The argument + * in favor of printing everything is that upon receipt, SCM_RIGHTS + * actually creates new file descriptors, which is pretty essential in + * terms of figuring out what is happening in a process. In addition, + * these calls should be sufficiently rare that the lengthy output is + * not really disruptive for the general output flow. + */ + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(&msg, cmsg)) { + put_open(proc, NULL, 0, "{", ", "); + + if (verbose > 0) + put_value(proc, "cmsg_len", "%u", cmsg->cmsg_len); + if (!valuesonly && cmsg->cmsg_level == SOL_SOCKET) + put_field(proc, "cmsg_level", "SOL_SOCKET"); + else + put_value(proc, "cmsg_level", "%d", cmsg->cmsg_level); + if (cmsg->cmsg_level == SOL_SOCKET) + put_cmsg_type(proc, "cmsg_type", cmsg->cmsg_type); + + len = cmsg->cmsg_len - CMSG_LEN(0); + + /* Print the contents of the messages that we know. */ + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_RIGHTS) { + put_open(proc, NULL, PF_NONAME, "[", ", "); + for (i = 0; i < len / sizeof(int); i++) + put_fd(proc, NULL, + ((int *)CMSG_DATA(cmsg))[i]); + put_close(proc, "]"); + } else if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_CREDS) { + put_struct_uucred(proc, NULL, PF_LOCADDR, + (vir_bytes)CMSG_DATA(cmsg)); + } else if (len > 0) + put_field(proc, NULL, ".."); + + put_close(proc, "}"); + } + + put_close(proc, "]"); +} + +int +net_ioctl_arg(struct trace_proc * proc, unsigned long req, void * ptr, int dir) +{ + const char *text; + nwio_ipopt_t *ipopt; + nwio_tcpconf_t *nwtc; + nwio_tcpcl_t *nwtcl; + nwio_tcpopt_t *nwto; + tcp_cookie_t *cookie; + nwio_udpopt_t *nwuo; + struct sockaddr_un *sun; + int i; + + switch (req) { + case FIONREAD: + /* + * Arguably this does not belong here, but as of writing, the + * network services are the only ones actually implementing + * support for this IOCTL, and we don't have a more suitable + * place to put it either. + */ + if (ptr == NULL) + return IF_IN; + + put_value(proc, NULL, "%d", *(int *)ptr); + return IF_ALL; + + case NWIOSIPOPT: + case NWIOGIPOPT: + if ((ipopt = (nwio_ipopt_t *)ptr) == NULL) + return dir; + + put_flags(proc, "nwio_flags", ipopt_flags, COUNT(ipopt_flags), + "0x%x", ipopt->nwio_flags); + + if (ipopt->nwio_flags & NWIO_REMSPEC) + put_ipaddr(proc, "nwio_rem", ipopt->nwio_rem); + if (ipopt->nwio_flags & NWIO_PROTOSPEC) + put_ipproto(proc, "nwio_proto", ipopt->nwio_proto); + + return 0; /* TODO: the remaining fields */ + + case NWIOSTCPCONF: + case NWIOGTCPCONF: + if ((nwtc = (nwio_tcpconf_t *)ptr) == NULL) + return dir; + + put_flags(proc, "nwtc_flags", tcpconf_flags, + COUNT(tcpconf_flags), "0x%x", nwtc->nwtc_flags); + + /* The local address cannot be set, just retrieved. */ + if (req == NWIOGTCPCONF) + put_ipaddr(proc, "nwtc_locaddr", nwtc->nwtc_locaddr); + + if ((nwtc->nwtc_flags & NWTC_LOCPORT_MASK) == NWTC_LP_SET) + put_port(proc, "nwtc_locport", nwtc->nwtc_locport); + + if (nwtc->nwtc_flags & NWTC_SET_RA) + put_ipaddr(proc, "nwtc_remaddr", nwtc->nwtc_remaddr); + + if (nwtc->nwtc_flags & NWTC_SET_RP) + put_port(proc, "nwtc_remport", nwtc->nwtc_remport); + + return IF_ALL; + + case NWIOTCPCONN: + case NWIOTCPLISTEN: + if ((nwtcl = (nwio_tcpcl_t *)ptr) == NULL) + return dir; + + put_flags(proc, "nwtcl_flags", tcpcl_flags, + COUNT(tcpcl_flags), "0x%x", nwtcl->nwtcl_flags); + + /* We pretend the unused nwtcl_ttl field does not exist. */ + return IF_ALL; + + case NWIOSTCPOPT: + case NWIOGTCPOPT: + if ((nwto = (nwio_tcpopt_t *)ptr) == NULL) + return dir; + + put_flags(proc, "nwto_flags", tcpopt_flags, + COUNT(tcpopt_flags), "0x%x", nwto->nwto_flags); + return IF_ALL; + + case NWIOTCPLISTENQ: + case NWIOSUDSBLOG: + if (ptr == NULL) + return IF_OUT; + + put_value(proc, NULL, "%d", *(int *)ptr); + return IF_ALL; + + case NWIOGTCPCOOKIE: + case NWIOTCPACCEPTTO: + if ((cookie = (tcp_cookie_t *)ptr) == NULL) + return dir; + + put_value(proc, "tc_ref", "%"PRIu32, cookie->tc_ref); + if (verbose > 0) + put_buf(proc, "tc_secret", PF_LOCADDR, + (vir_bytes)&cookie->tc_secret, + sizeof(cookie->tc_secret)); + return (verbose > 0) ? IF_ALL : 0; + + case NWIOTCPGERROR: + if (ptr == NULL) + return IF_IN; + + i = *(int *)ptr; + if (!valuesonly && (text = get_error_name(i)) != NULL) + put_field(proc, NULL, text); + else + put_value(proc, NULL, "%d", i); + return IF_ALL; + + case NWIOSUDPOPT: + case NWIOGUDPOPT: + if ((nwuo = (nwio_udpopt_t *)ptr) == NULL) + return dir; + + put_flags(proc, "nwuo_flags", udpopt_flags, + COUNT(udpopt_flags), "0x%x", nwuo->nwuo_flags); + + /* The local address cannot be set, just retrieved. */ + if (req == NWIOGUDPOPT) + put_ipaddr(proc, "nwuo_locaddr", nwuo->nwuo_locaddr); + + if ((nwuo->nwuo_flags & NWUO_LOCPORT_MASK) == NWUO_LP_SET) + put_port(proc, "nwuo_locport", nwuo->nwuo_locport); + + if (nwuo->nwuo_flags & NWUO_RA_SET) + put_ipaddr(proc, "nwuo_remaddr", nwuo->nwuo_remaddr); + + if (nwuo->nwuo_flags & NWUO_RP_SET) + put_port(proc, "nwuo_remport", nwuo->nwuo_remport); + + return IF_ALL; + + case NWIOGUDSFADDR: + case NWIOSUDSTADDR: + case NWIOSUDSADDR: + case NWIOGUDSADDR: + case NWIOGUDSPADDR: + case NWIOSUDSCONN: + case NWIOSUDSACCEPT: + if ((sun = (struct sockaddr_un *)ptr) == NULL) + return dir; + + put_family(proc, "sun_family", sun->sun_family); + + /* This could be extended to a generic sockaddr printer.. */ + if (sun->sun_family == AF_LOCAL) { + put_buf(proc, "sun_path", PF_LOCADDR | PF_PATH, + (vir_bytes)&sun->sun_path, sizeof(sun->sun_path)); + return IF_ALL; /* skipping sun_len, it's unused */ + } else + return 0; + + case NWIOSUDSTYPE: + case NWIOGUDSSOTYPE: + if (ptr == NULL) + return dir; + + put_flags(proc, NULL, sock_type, COUNT(sock_type), "0x%x", + *(int *)ptr); + return IF_ALL; + + case NWIOSUDSSHUT: + if (ptr == NULL) + return IF_OUT; + + put_shutdown_how(proc, NULL, *(int *)ptr); + return IF_ALL; + + case NWIOSUDSPAIR: + if (ptr == NULL) + return IF_OUT; + + put_dev(proc, NULL, *(dev_t *)ptr); + return IF_ALL; + + case NWIOSUDSCTRL: + if (ptr == NULL) + return IF_OUT; + + /* FALLTHROUGH */ + case NWIOGUDSCTRL: + if (ptr == NULL) + return IF_IN; + + put_msg_control(proc, (struct msg_control *)ptr); + return IF_ALL; + + case NWIOGUDSPEERCRED: + if (ptr == NULL) + return IF_IN; + + put_struct_uucred(proc, NULL, PF_LOCADDR, (vir_bytes)ptr); + return IF_ALL; + + case NWIOGUDSSNDBUF: + case NWIOSUDSSNDBUF: + case NWIOGUDSRCVBUF: + case NWIOSUDSRCVBUF: + if (ptr == NULL) + return dir; + + put_value(proc, NULL, "%zu", *(size_t *)ptr); + return IF_ALL; + + default: + return 0; + } +} diff --git a/minix/usr.bin/trace/ioctl/svrctl.c b/minix/usr.bin/trace/ioctl/svrctl.c new file mode 100644 index 000000000..8708ed221 --- /dev/null +++ b/minix/usr.bin/trace/ioctl/svrctl.c @@ -0,0 +1,63 @@ + +#include "inc.h" + +#include + +const char * +svrctl_name(unsigned long req) +{ + + switch (req) { + NAME(PMSETPARAM); + NAME(PMGETPARAM); + NAME(VFSGETPARAM); + NAME(VFSSETPARAM); + } + + return NULL; +} + +int +svrctl_arg(struct trace_proc * proc, unsigned long req, void * ptr, int dir) +{ + struct sysgetenv *env; + + switch (req) { + case PMSETPARAM: + case VFSSETPARAM: + if ((env = (struct sysgetenv *)ptr) == NULL) + return IF_OUT; + + put_buf(proc, "key", PF_STRING, (vir_bytes)env->key, + env->keylen); + put_buf(proc, "value", PF_STRING, (vir_bytes)env->val, + env->vallen); + return IF_ALL; + + case PMGETPARAM: + case VFSGETPARAM: + if ((env = (struct sysgetenv *)ptr) == NULL) + return IF_OUT | IF_IN; + + /* + * So far this is the only IOCTL case where the output depends + * on one of the values in the input: if the given key is NULL, + * PM provides the entire system environment in return, which + * means we cannot just print a single string. We rely on PM + * not changing the key field, which (while true) is an + * assumption. With the current (simple) model we would have + * to save the provided key pointer somewhere otherwise. + */ + if (dir == IF_OUT) + put_buf(proc, "key", PF_STRING, (vir_bytes)env->key, + env->keylen); + else + put_buf(proc, "value", + (env->key != NULL) ? PF_STRING : 0, + (vir_bytes)env->val, env->vallen); + return IF_ALL; + + default: + return 0; + } +} diff --git a/minix/usr.bin/trace/kernel.c b/minix/usr.bin/trace/kernel.c new file mode 100644 index 000000000..195123049 --- /dev/null +++ b/minix/usr.bin/trace/kernel.c @@ -0,0 +1,307 @@ +/* + * This file, and only this file, should contain all the ugliness needed to + * obtain values from the kernel. It has to be recompiled every time the + * layout of the kernel "struct proc" and/or "struct priv" structures changes. + * In addition, this file contains the platform-dependent code related to + * interpreting the registers exposed by the kernel. + * + * As a quick note, some functions return TRUE/FALSE, and some return 0/-1. + * The former convention is used for functions that return a boolean value; + * the latter is used for functions that set errno in all cases of failure, + * and where the caller may conceivably use errno as a result. + * + * On a related note, relevant here and elsewhere: we define _MINIX_SYSTEM but + * not _SYSTEM, which means that we should not get negative error numbers. + */ + +#include "inc.h" + +#include +#include +#include "kernel/proc.h" +#include "kernel/priv.h" +#if defined(__i386__) +#include "kernel/arch/i386/include/archconst.h" /* for the KTS_ constants */ +#endif + +#include + +extern struct minix_kerninfo *_minix_kerninfo; + +/* + * Working area. By obtaining values from the kernel into these local process + * structures, and then returning them, we gain a little robustness against + * changes in data types of the fields we need. + */ +static struct proc kernel_proc; +static struct priv kernel_priv; + +/* + * Check whether our notion of the kernel process structure layout matches that + * of the kernel, by comparing magic values. This can be done only once we + * have attached to a process. Return TRUE if everything seems alright; FALSE + * otherwise. + */ +int +kernel_check(pid_t pid) +{ + + if (mem_get_user(pid, offsetof(struct proc, p_magic), + &kernel_proc.p_magic, sizeof(kernel_proc.p_magic)) < 0) + return FALSE; + + return (kernel_proc.p_magic == PMAGIC); +} + +/* + * Obtain the kernel name for the given (stopped) process. Return 0 on + * success, with the (possibly truncated) name stored in the 'name' buffer + * which is of 'size' bytes; the name will be null-terminated. Note that the + * name may contain any suffixes as set by the kernel. Return -1 on failure, + * with errno set as appropriate. + */ +int +kernel_get_name(pid_t pid, char * name, size_t size) +{ + + if (mem_get_user(pid, offsetof(struct proc, p_name), + kernel_proc.p_name, sizeof(kernel_proc.p_name)) < 0) + return -1; + + strlcpy(name, kernel_proc.p_name, size); + return 0; +} + +/* + * Check whether the given process, which we have just attached to, is a system + * service. PM does not prevent us from attaching to most system services, + * even though this utility only supports tracing user programs. Unlike a few + * other routines in this file, this function can not use ProcFS to obtain its + * result, because the given process may actually be VFS or ProcFS itself! + * Return TRUE if the given process is a system service; FALSE if not. + */ +int +kernel_is_service(pid_t pid) +{ + size_t align, off; + + /* + * For T_GETUSER, the priv structure follows the proc structure, but + * possibly with padding in between so as to align the priv structure + * to long boundary. + */ + align = sizeof(long) - 1; + off = (sizeof(struct proc) + align) & ~align; + + if (mem_get_user(pid, off + offsetof(struct priv, s_id), + &kernel_priv.s_id, sizeof(kernel_priv.s_id)) < 0) + return FALSE; /* process may have disappeared, so no danger */ + + return (kernel_priv.s_id != USER_PRIV_ID); +} + +/* + * For the given process, which must be stopped on entering a system call, + * retrieve the three register values describing the system call. Return 0 on + * success, or -1 on failure with errno set as appropriate. + */ +int +kernel_get_syscall(pid_t pid, reg_t reg[3]) +{ + + assert(sizeof(kernel_proc.p_defer) == sizeof(reg_t) * 3); + + if (mem_get_user(pid, offsetof(struct proc, p_defer), + &kernel_proc.p_defer, sizeof(kernel_proc.p_defer)) < 0) + return -1; + + reg[0] = kernel_proc.p_defer.r1; + reg[1] = kernel_proc.p_defer.r2; + reg[2] = kernel_proc.p_defer.r3; + return 0; +} + +/* + * Retrieve the value of the primary return register for the given process, + * which must be stopped on leaving a system call. This register contains the + * IPC-level result of the system call. Return 0 on success, or -1 on failure + * with errno set as appropriate. + */ +int +kernel_get_retreg(pid_t pid, reg_t * retreg) +{ + size_t off; + + /* + * Historically p_reg had to be the first field in the proc structure, + * but since this is no longer a hard requirement, getting its actual + * offset into the proc structure certainly doesn't hurt. + */ + off = offsetof(struct proc, p_reg); + + if (mem_get_user(pid, off + offsetof(struct stackframe_s, retreg), + &kernel_proc.p_reg.retreg, sizeof(kernel_proc.p_reg.retreg)) < 0) + return -1; + + *retreg = kernel_proc.p_reg.retreg; + return 0; +} + +/* + * Return the stack top for user processes. This is needed for execve(), since + * the supplied frame contains pointers prepared for the new location of the + * frame, which is at the stack top of the process after the execve(). + */ +vir_bytes +kernel_get_stacktop(void) +{ + + return _minix_kerninfo->kinfo->user_sp; +} + +/* + * For the given stopped process, get its program counter (pc), stack pointer + * (sp), and optionally its frame pointer (fp). The given fp pointer may be + * NULL, in which case the frame pointer is not obtained. The given pc and sp + * pointers must not be NULL, and this is intentional: obtaining fp may require + * obtaining sp first. Return 0 on success, or -1 on failure with errno set + * as appropriate. This functionality is not essential for tracing processes, + * and may not be supported on all platforms, in part or full. In particular, + * on some platforms, a zero (= invalid) frame pointer may be returned on + * success, indicating that obtaining frame pointers is not supported. + */ +int +kernel_get_context(pid_t pid, reg_t * pc, reg_t * sp, reg_t * fp) +{ + size_t off; + + off = offsetof(struct proc, p_reg); /* as above */ + + if (mem_get_user(pid, off + offsetof(struct stackframe_s, pc), + &kernel_proc.p_reg.pc, sizeof(kernel_proc.p_reg.pc)) < 0) + return -1; + if (mem_get_user(pid, off + offsetof(struct stackframe_s, sp), + &kernel_proc.p_reg.sp, sizeof(kernel_proc.p_reg.sp)) < 0) + return -1; + + *pc = kernel_proc.p_reg.pc; + *sp = kernel_proc.p_reg.sp; + + if (fp == NULL) + return 0; + +#if defined(__i386__) + if (mem_get_user(pid, offsetof(struct proc, p_seg) + + offsetof(struct segframe, p_kern_trap_style), + &kernel_proc.p_seg.p_kern_trap_style, + sizeof(kernel_proc.p_seg.p_kern_trap_style)) < 0) + return -1; + + /* This is taken from the kernel i386 exception code. */ + switch (kernel_proc.p_seg.p_kern_trap_style) { + case KTS_SYSENTER: + case KTS_SYSCALL: + if (mem_get_data(pid, *sp + 16, fp, sizeof(fp)) < 0) + return -1; + break; + + default: + if (mem_get_user(pid, off + offsetof(struct stackframe_s, fp), + &kernel_proc.p_reg.fp, sizeof(kernel_proc.p_reg.fp)) < 0) + return -1; + + *fp = kernel_proc.p_reg.fp; + } +#else + *fp = 0; /* not supported; this is not a failure (*pc is valid) */ +#endif + return 0; +} + +/* + * Given a frame pointer, obtain the next program counter and frame pointer. + * Return 0 if successful, or -1 on failure with errno set appropriately. The + * functionality is not essential for tracing processes, and may not be + * supported on all platforms. Thus, on some platforms, this function may + * always fail. + */ +static int +kernel_get_nextframe(pid_t pid, reg_t fp, reg_t * next_pc, reg_t * next_fp) +{ +#if defined(__i386__) + void *p[2]; + + if (mem_get_data(pid, (vir_bytes)fp, &p, sizeof(p)) < 0) + return -1; + + *next_pc = (reg_t)p[1]; + *next_fp = (reg_t)p[0]; + return 0; +#else + /* Not supported (yet). */ + errno = ENOSYS; + return -1; +#endif +} + +/* + * Print a stack trace for the given process, which is known to be stopped on + * entering a system call. This function does not really belong here, but + * without a doubt it is going to have to be fully rewritten to support + * anything other than i386. + * + * Getting symbol names is currently an absolute nightmare. Not just because + * of shared libraries, but also since ProcFS does not offer a /proc/NNN/exe, + * so that we cannot reliably determine the binary being executed: not for + * processes being attached to, and not for exec calls using a relative path. + */ +void +kernel_put_stacktrace(struct trace_proc * proc) +{ + unsigned int count, max; + reg_t pc, sp, fp, low, high; + + if (kernel_get_context(proc->pid, &pc, &sp, &fp) < 0) + return; + + /* + * A low default limit such as 6 looks much prettier, but is simply not + * useful enough for moderately-sized programs in practice. Right now, + * 15 is about two lines on a 80-column terminal. + */ + if (verbose == 0) max = 15; + else if (verbose == 1) max = 31; + else max = UINT_MAX; + + /* + * We keep formatting to an absolute minimum, to facilitate passing + * the lines straight into tools such as addr2line. + */ + put_newline(); + put_fmt(proc, " 0x%x", pc); + + low = high = fp; + + for (count = 1; count < max && fp != 0; count++) { + if (kernel_get_nextframe(proc->pid, fp, &pc, &fp) < 0) + break; + + put_fmt(proc, " 0x%x", pc); + + /* + * Stop if we see a frame pointer that falls within the range + * of the frame pointers we have seen so far. This also + * prevents getting stuck in a loop on the same frame pointer. + */ + if (fp >= low && fp <= high) + break; + if (low > fp) + low = fp; + if (high < fp) + high = fp; + } + + if (fp != 0) + put_text(proc, " .."); + put_newline(); +} diff --git a/minix/usr.bin/trace/mem.c b/minix/usr.bin/trace/mem.c new file mode 100644 index 000000000..e0b67270b --- /dev/null +++ b/minix/usr.bin/trace/mem.c @@ -0,0 +1,61 @@ + +#include "inc.h" + +/* + * Retrieve 'len' bytes from the memory of the traced process 'pid' at address + * 'addr' and put the result in the buffer pointed to by 'ptr'. Return 0 on + * success, or otherwise -1 with errno set appropriately. + */ +int +mem_get_data(pid_t pid, vir_bytes addr, void * ptr, size_t len) +{ + struct ptrace_range pr; + + if (len == 0) return 0; + + pr.pr_space = TS_DATA; + pr.pr_addr = addr; + pr.pr_size = len; + pr.pr_ptr = ptr; + + return ptrace(T_GETRANGE, pid, &pr, 0); +} + +/* + * Retrieve 'len' bytes from the kernel structure memory of the traced process + * 'pid' at offset 'addr' and put the result in the buffer pointed to by 'ptr'. + * Return 0 on success, or otherwise -1 with errno set appropriately. + */ +int +mem_get_user(pid_t pid, vir_bytes addr, void * ptr, size_t len) +{ + long data; + char *p; + size_t off, chunk; + + if (len == 0) return 0; + + /* Align access to address. */ + off = addr & (sizeof(data) - 1); + addr -= off; + + p = ptr; + + while (len > 0) { + errno = 0; + data = ptrace(T_GETUSER, pid, (void *)addr, 0); + if (errno != 0) return -1; + + chunk = sizeof(data) - off; + if (chunk > len) + chunk = len; + + memcpy(p, (char *)&data + off, chunk); + p += chunk; + addr += chunk; + len -= chunk; + off = 0; + } + + return 0; +} diff --git a/minix/usr.bin/trace/output.c b/minix/usr.bin/trace/output.c new file mode 100644 index 000000000..f87416378 --- /dev/null +++ b/minix/usr.bin/trace/output.c @@ -0,0 +1,516 @@ + +#include "inc.h" + +#include +#include + +/* + * The maximum number of bytes that may be buffered before writing the buffered + * output to the underlying file. This is a performance optimization only. + * Writing more than this number of bytes at once will be handled correctly. + */ +#define OUTPUT_BUFSZ 512 + +static int out_fd; +static char out_buf[OUTPUT_BUFSZ]; +static int out_len; +static int out_err; + +static pid_t last_pid; /* not a trace_proc pointer; it could become invalid! */ +static unsigned int line_off; +static unsigned int prefix_off; +static int print_pid; +static int print_susp; +static int add_space; + +/* + * Initialize the output channel. Called before any other output functions, + * but after a child process (to be traced) has already been spawned. If the + * given file string is not NULL, it is the path to a file that is to be used + * to write output to. If it is NULL, output is written to standard error. + */ +int +output_init(const char * file) +{ + + /* Initialize state. */ + out_len = 0; + out_err = FALSE; + + last_pid = 0; + line_off = 0; + prefix_off = 0; + print_pid = FALSE; + print_susp = FALSE; + add_space = FALSE; + + /* + * Ignore signals resulting from writing to a closed pipe. We can + * handle write errors properly ourselves. Setting O_NOSIGPIPE is an + * alternative, but that would affect other processes writing to the + * same file object, even after we have terminated. + */ + signal(SIGPIPE, SIG_IGN); + + /* Initialize the output file descriptor. */ + if (file == NULL) { + /* No output file given? Use standard error. */ + out_fd = STDERR_FILENO; + + return 0; + } else { + /* + * Use a restrictive mask for the output file. Traces may + * contain sensitive information (for security and otherwise), + * and the user might not always be careful about the location + * of the file. + */ + /* The file descriptor is not closed explicitly. */ + out_fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_APPEND, + 0600); + + return (out_fd < 0) ? -1 : 0; + } +} + +/* + * Write the given data to the given file descriptor, taking into account the + * possibility of partial writes and write errors. + */ +static void +write_fd(int fd, const char *buf, size_t len) +{ + ssize_t r; + + /* If we got a write error before, do not try to write more. */ + if (out_err) + return; + + /* Write all output, in chunks if we have to. */ + while (len > 0) { + r = write(fd, buf, len); + + /* + * A write error (and that includes EOF) causes the program to + * terminate with an error code. For obvious reasons we cannot + * print an error about this. Do not even report to standard + * error if the output was redirected, because that may mess + * with the actual programs being run right now. + */ + if (r <= 0) { + out_err = TRUE; + + break; + } + + len -= r; + } +} + +/* + * Return TRUE iff an output error occurred and the program should terminate. + */ +int +output_error(void) +{ + + return out_err; +} + +/* + * Print the given null-terminated string to the output channel. Return the + * number of characters printed, for alignment purposes. In the future, this + * number may end up being different from the number of bytes given to print, + * due to multibyte encoding or colors or whatnot. + */ +static unsigned int +output_write(const char * text) +{ + size_t len; + + len = strlen(text); + + if (out_len + len > sizeof(out_buf)) { + write_fd(out_fd, out_buf, out_len); + + out_len = 0; + + /* Write large buffers right away. */ + if (len > sizeof(out_buf)) { + write_fd(out_fd, text, len); + + return len; + } + } + + memcpy(&out_buf[out_len], text, len); + + out_len += len; + + return len; +} + +/* + * Flush any pending output to the output channel. + */ +void +output_flush(void) +{ + + if (out_len > 0) { + write_fd(out_fd, out_buf, out_len); + + out_len = 0; + } +} + +/* + * Print a PID prefix for the given process, or an info prefix if no process + * (NULL) is given. Prefixes are only relevant when multiple processes are + * traced. As long as there are multiple processes, each line is prefixed with + * the PID of the process. As soon as the number of processes has been reduced + * back to one, one more line is prefixed with the PID of the remaining process + * (with a "'" instead of a "|") to help the user identify which process is + * left. In addition, whenever a preempted call is about to be resumed, a "*" + * is printed instead of a space, so as to show that it is a continuation of a + * previous line. An example of all these cases: + * + * fork() = 3 + * 3| Tracing test (pid 3) + * 3| fork() = 0 + * 3| read(0, <..> + * 2| waitpid(-1, <..> + * INFO| This is an example info line. + * 3|*read(0, "", 1024) = 0 + * 3| exit(1) + * 3| Process exited normally with code 1 + * 2'*waitpid(-1, W_EXITED(1), 0) = 3 + * exit(0) + * Process exited normally with code 0 + */ +static void +put_prefix(struct trace_proc * proc, int resuming) +{ + char prefix[32]; + unsigned int count; + + assert(line_off == 0); + + count = proc_count(); + + /* TODO: add a command line option for always printing the pid. */ + if (print_pid || count > 1 || proc == NULL) { + /* + * TODO: we currently rely on the highest PID having at most + * five digits, but this will eventually change. There are + * several ways to deal with that, but none are great. + */ + if (proc == NULL) + snprintf(prefix, sizeof(prefix), "%5s| ", "INFO"); + else + snprintf(prefix, sizeof(prefix), "%5d%c%c", + proc->pid, (count > 1) ? '|' : '\'', + resuming ? '*' : ' '); + + prefix_off = line_off = output_write(prefix); + + last_pid = (proc != NULL ? proc->pid : 0); + } else { + assert(!resuming); + + prefix_off = 0; + } + + /* Remember whether the next line should get prefixed regardless. */ + print_pid = (count > 1 || proc == NULL); +} + +/* + * Add a string to the end of the text recording for the given process. + * This is used only to record the call-enter output of system calls. + */ +static void +record_add(struct trace_proc * proc, const char * text) +{ + size_t len; + + assert(proc->recording); + + /* If the recording buffer is already full, do not record more. */ + if (proc->outlen == sizeof(proc->outbuf)) + return; + + len = strlen(text); + + /* If nonempty, the recording buffer is always null terminated. */ + if (len < sizeof(proc->outbuf) - proc->outlen - 1) { + strcpy(&proc->outbuf[proc->outlen], text); + + proc->outlen += len; + } else + proc->outlen = sizeof(proc->outbuf); /* buffer exhausted */ +} + +/* + * Start recording text for the given process. Since this marks the start of + * a call, remember to print a preemption marker when the call gets preempted. + */ +void +record_start(struct trace_proc * proc) +{ + + proc->recording = TRUE; + + print_susp = TRUE; +} + +/* + * Stop recording text for the given process. + */ +void +record_stop(struct trace_proc * proc) +{ + + proc->recording = FALSE; +} + +/* + * Clear recorded text for the given process. Since this also marks the end of + * the entire call, no longer print a supension marker before the next newline. + */ +void +record_clear(struct trace_proc * proc) +{ + + assert(!proc->recording); + proc->outlen = 0; + + if (proc->pid == last_pid) + print_susp = FALSE; +} + +/* + * Replay the record for the given process on a new line, if the current line + * does not already have output for this process. If it does, do nothing. + * If the process has no recorded output, just start a new line. Return TRUE + * iff the caller must print its own replay text due to a recording overflow. + */ +int +record_replay(struct trace_proc * proc) +{ + int space; + + assert(!proc->recording); + + /* + * If there is output on the current line, and it is for the current + * process, we must assume that it is the original, recorded text, and + * thus, we should do nothing. If output on the current line is for + * another process, we must force a new line before replaying. + */ + if (line_off > 0) { + if (proc->pid == last_pid) + return FALSE; + + put_newline(); + } + + /* + * If there is nothing to replay, do nothing further. This case may + * occur when printing signals, in which case the caller still expects + * a new line to be started. This line must not be prefixed with a + * "resuming" marker though--after all, nothing is being resumed here. + */ + if (proc->outlen == 0) + return FALSE; + + /* + * If there is text to replay, then this does mean we are in effect + * resuming the recorded call, even if it is just to print a signal. + * Thus, we must print a prefix that shows the call is being resumed. + * Similarly, unless the recording is cleared before a newline, we must + * suspend the line again, too. + */ + put_prefix(proc, TRUE /*resuming*/); + + print_susp = TRUE; + + /* + * If the recording buffer was exhausted during recording, the caller + * must generate the replay text instead. + */ + if (proc->outlen == sizeof(proc->outbuf)) + return TRUE; + + /* + * Replay the recording. If it ends with a space, turn it into a soft + * space, because the recording may be followed immediately by a + * newline; an example of this is the exit() exception. + */ + space = proc->outbuf[proc->outlen - 1] == ' '; + if (space) + proc->outbuf[proc->outlen - 1] = 0; + + put_text(proc, proc->outbuf); + + if (space) { + put_space(proc); + + /* Restore the space, in case another replay takes place. */ + proc->outbuf[proc->outlen - 1] = ' '; + } + + return FALSE; +} + +/* + * Start a new line, and adjust the local state accordingly. If nothing has + * been printed on the current line yet, this function is a no-op. Otherwise, + * the output so far may have to be marked as preempted with the "<..>" + * preemption marker. + */ +void +put_newline(void) +{ + + if (line_off == 0) + return; + + if (print_susp) { + if (add_space) + (void)output_write(" "); + + (void)output_write("<..>"); + } + +#if DEBUG + (void)output_write("|"); +#endif + + (void)output_write("\n"); + output_flush(); + + line_off = 0; + add_space = FALSE; + print_susp = FALSE; + last_pid = 0; +} + +/* + * Print a string as part of the output associated with a process. If the + * current line contains output for another process, a newline will be printed + * first. If the current line contains output for the same process, then the + * text will simply continue on the same line. If the current line is empty, + * a process PID prefix may have to be printed first. Either way, after this + * operation, the current line will contain text for the given process. If + * requested, the text may also be recorded for the process, for later replay. + * As an exception, proc may be NULL when printing general information lines. + */ +void +put_text(struct trace_proc * proc, const char * text) +{ + + if (line_off > 0 && (proc == NULL || proc->pid != last_pid)) { + /* + * The current line has not been terminated with a newline yet. + * Start a new line. Note that this means that for lines not + * associated to a process, the whole line must be printed at + * once. This can be fixed but is currently not an issue. + */ + put_newline(); + } + + /* See if we must add a prefix at the start of the line. */ + if (line_off == 0) + put_prefix(proc, FALSE /*resuming*/); + + /* If needed, record the given text. */ + if (proc != NULL && proc->recording) + record_add(proc, text); + + /* + * If we delayed printing a space, print one now. This is never part + * of text that must be saved. In fact, we support these soft spaces + * for exactly one case; see put_space() for details. + */ + if (add_space) { + line_off += output_write(" "); + + add_space = FALSE; + } + + /* Finally, print the actual text. */ + line_off += output_write(text); + + last_pid = (proc != NULL) ? proc->pid : 0; +} + +/* + * Add a space to the output for the given process, but only if and once more + * text is printed for the process afterwards. The aim is to ensure that no + * lines ever end with a space, to prevent needless line wrapping on terminals. + * The space may have to be remembered for the current line (for preemption, + * which does not have a process pointer to work with) as well as recorded for + * later replay, if recording is enabled. Consider the following example: + * + * [A] 3| execve(..) <..> + * 2| getpid(0) = 2 (ppid=1) + * [B] 3| execve(..) = -1 [ENOENT] + * [A] 3| exit(1) <..> + * 2| getpid(0) = 2 (ppid=1) + * 3| exit(1) + * 3| Process exited normally with code 1 + * + * On the [A] lines, the space between the call's closing parenthesis and the + * "<..>" preemption marker is the result of add_space being set to TRUE; on + * the [B] line, the space between the closing parenthesis and the equals sign + * is the result of the space being recorded. + */ +void +put_space(struct trace_proc * proc) +{ + + /* This call must only be used after output for the given process. */ + assert(last_pid == proc->pid); + + /* In case the call does not get preempted. */ + add_space = TRUE; + + /* In case the call does get preempted. */ + if (proc->recording) + record_add(proc, " "); +} + +/* + * Indent the remainders of the text on the line for this process, such that + * similar remainders are similarly aligned. In particular, the remainder is + * the equals sign of a call, and everything after it. Of course, alignment + * can only be used if the call has not already printed beyond the alignment + * position. Also, the prefix must not be counted toward the alignment, as it + * is possible that a line without prefix may be preempted and later continued + * with prefix. All things considered, the result would look like this: + * + * getuid() = 1 (euid=1) + * setuid(0) = -1 [EPERM] + * write(2, "Permission denied\n", 18) = 18 + * fork() = 3 + * 3| Tracing test (pid 3) + * 3| fork() = 0 + * 3| exit(0) + * 3| Process exited normally with code 0 + * 2' waitpid(-1, W_EXITED(0), 0) = 3 + * + */ +void put_align(struct trace_proc * __unused proc) +{ + + /* + * TODO: add actual support for this. The following code works, + * although not so efficiently. The difficulty is the default + * configuration and corresponding options. + + while (line_off - prefix_off < 20) + put_text(proc, " "); + + */ +} diff --git a/minix/usr.bin/trace/proc.c b/minix/usr.bin/trace/proc.c new file mode 100644 index 000000000..665ea9f12 --- /dev/null +++ b/minix/usr.bin/trace/proc.c @@ -0,0 +1,97 @@ + +#include "inc.h" + +static TAILQ_HEAD(, trace_proc) proc_root; +static unsigned int nr_procs; + +/* + * Initialize the list of traced processes. + */ +void +proc_init(void) +{ + + TAILQ_INIT(&proc_root); + nr_procs = 0; +} + +/* + * Add a new process to the list of traced processes, allocating memory for it + * first. Return the new process structure with its PID assigned and the rest + * zeroed out, or NULL upon allocation failure (with errno set appropriately). + */ +struct trace_proc * +proc_add(pid_t pid) +{ + struct trace_proc *proc; + + proc = (struct trace_proc *)malloc(sizeof(struct trace_proc)); + + if (proc == NULL) + return NULL; + + memset(proc, 0, sizeof(*proc)); + + proc->pid = pid; + + TAILQ_INSERT_TAIL(&proc_root, proc, next); + nr_procs++; + + return proc; +} + +/* + * Retrieve the data structure for a traced process based on its PID. Return + * a pointer to the structure, or NULL if no structure exists for this process. + */ +struct trace_proc * +proc_get(pid_t pid) +{ + struct trace_proc *proc; + + /* Linear search for now; se we can easily add a hashtable later.. */ + TAILQ_FOREACH(proc, &proc_root, next) { + if (proc->pid == pid) + return proc; + } + + return NULL; +} + +/* + * Remove a process from the list of traced processes. + */ +void +proc_del(struct trace_proc * proc) +{ + + TAILQ_REMOVE(&proc_root, proc, next); + nr_procs--; + + free(proc); +} + +/* + * Iterator for the list of traced processes. If a NULL pointer is given, + * return the first process in the list; otherwise, return the next process in + * the list. Not stable with respect to list modifications. + */ +struct trace_proc * +proc_next(struct trace_proc * proc) +{ + + if (proc == NULL) + return TAILQ_FIRST(&proc_root); + else + return TAILQ_NEXT(proc, next); +} + +/* + * Return the number of processes in the list of traced processes. + */ +unsigned int +proc_count(void) +{ + + return nr_procs; +} diff --git a/minix/usr.bin/trace/proc.h b/minix/usr.bin/trace/proc.h new file mode 100644 index 000000000..509f523ee --- /dev/null +++ b/minix/usr.bin/trace/proc.h @@ -0,0 +1,99 @@ + +#include + +/* + * The maximum nesting depth of parentheses/brackets. The current maximum + * depth is something like six, for UDS control messages. This constant can be + * increased as necessary without any problem. + */ +#define MAX_DEPTH 10 + +/* + * The maximum size of text that may be recorded, including null terminator. + * Increasing this allows longer lines to be recorded and replayed without + * being cut short (see call_replay), but also increases memory usage. + */ +#define RECORD_BUFSZ 256 + +struct trace_proc { + /* identity (public) */ + pid_t pid; + + /* data structure management (proc.c) */ + TAILQ_ENTRY(trace_proc) next; + + /* general process state (trace.c) */ + char name[PROC_NAME_LEN]; + unsigned int trace_flags; + reg_t last_pc; + reg_t last_sp; + + /* call enter-to-leave state (call.c) */ + int call_type; + vir_bytes m_addr; + message m_out; + const char *call_name; + unsigned int call_flags; + const struct call_handler *call_handler; + int call_result; + + /* output state (output.c) */ + int recording; + char outbuf[RECORD_BUFSZ]; + size_t outlen; + + /* formatting state (format.c) */ + const char *next_sep; + int depth; + struct { + const char *sep; + int name; + } depths[MAX_DEPTH]; + + /* ioctl state (ioctl.c) */ + int ioctl_index; + unsigned int ioctl_flags; +}; + +/* Trace flags. */ +#define TF_INCALL 0x01 /* the process has entered a system call */ +#define TF_SKIP 0x02 /* the system call result is to be skipped */ +#define TF_CTX_SKIP 0x04 /* skip call result only if context changes */ +#define TF_STOPPING 0x08 /* the process is expecting a SIGSTOP */ +#define TF_ATTACH 0x10 /* we have not started this process */ +#define TF_DETACH 0x20 /* detach from the process as soon as we can */ +#define TF_EXEC 0x40 /* the process may be performing an execve() */ +#define TF_NOCALL 0x80 /* no system call seen yet (for info only) */ + +/* Trace classes, determining how the tracer engine should handle a call. */ +#define TC_NORMAL 0 /* normal call, no exceptions required */ +#define TC_EXEC 1 /* exec call, success on subsequent SIGSTOP */ +#define TC_SIGRET 2 /* sigreturn call, success on context change */ + +/* Call flags. */ +#define CF_DONE 0x01 /* printing the call parameters is done */ +#define CF_NORETURN 0x02 /* the call does not return on success */ +#define CF_HIDE 0x04 /* do not print the current call */ +#define CF_IPC_ERR 0x08 /* a failure occurred at the IPC level */ +#define CF_REG_ERR 0x10 /* unable to retrieve the result register */ +#define CF_MSG_ERR 0x20 /* unable to copy in the reply message */ + +/* Call types, determining how much has been printed up to the call split. */ +#define CT_NOTDONE (0) /* not all parameters have been printed yet */ +#define CT_DONE (CF_DONE) /* all parameters have been printed */ +#define CT_NORETURN (CF_DONE | CF_NORETURN) /* the no-return call type */ + +/* Put flags. */ +#define PF_FAILED 0x01 /* call failed, results may be invalid */ +#define PF_LOCADDR 0x02 /* pointer is into local address space */ +/* Yes, PF_LOCAL would conflict with the packet family definition. Bah. */ +#define PF_ALT 0x04 /* alternative output (callee specific) */ +#define PF_STRING PF_ALT /* buffer is string (put_buf only) */ +#define PF_FULL 0x08 /* print full format (callee specific) */ +#define PF_PATH (PF_STRING | PF_FULL) /* flags for path names */ +#define PF_NONAME 0x10 /* default to no field names at this depth */ + +/* I/O control flags. */ +#define IF_OUT 0x1 /* call to print outgoing (written) data */ +#define IF_IN 0x2 /* call to print incoming (read) data */ +#define IF_ALL 0x4 /* all fields printed (not really a bit) */ diff --git a/minix/usr.bin/trace/proto.h b/minix/usr.bin/trace/proto.h new file mode 100644 index 000000000..27a889421 --- /dev/null +++ b/minix/usr.bin/trace/proto.h @@ -0,0 +1,130 @@ + +/* call.c */ +void put_endpoint(struct trace_proc *proc, const char *name, endpoint_t endpt); +void put_equals(struct trace_proc *proc); +void put_result(struct trace_proc *proc); +int default_out(struct trace_proc *proc, const message *m_out); +void default_in(struct trace_proc *proc, const message *m_out, + const message *m_in, int failed); +int call_enter(struct trace_proc *proc, int show_stack); +void call_leave(struct trace_proc *proc, int skip); +void call_replay(struct trace_proc *proc); +const char *call_name(struct trace_proc *proc); + +/* error.c */ +const char *get_error_name(int err); + +/* escape.c */ +const char *get_escape(char c); + +/* format.c */ +void format_reset(struct trace_proc *proc); +void format_set_sep(struct trace_proc *proc, const char *sep); +void format_push_sep(struct trace_proc *proc); +void put_field(struct trace_proc *proc, const char *name, const char *text); +void put_open(struct trace_proc *proc, const char *name, int flags, + const char *string, const char *separator); +void put_close(struct trace_proc *proc, const char *string); +void put_fmt(struct trace_proc *proc, const char *fmt, ...) + __attribute__((__format__(__printf__, 2, 3))); +void put_value(struct trace_proc *proc, const char *name, const char *fmt, ...) + __attribute__((__format__(__printf__, 3, 4))); +int put_open_struct(struct trace_proc *proc, const char *name, int flags, + vir_bytes addr, void *ptr, size_t size); +void put_close_struct(struct trace_proc *proc, int all); +void put_ptr(struct trace_proc *proc, const char *name, vir_bytes addr); +void put_buf(struct trace_proc *proc, const char *name, int flags, + vir_bytes addr, ssize_t size); +void put_flags(struct trace_proc *proc, const char *name, + const struct flags *fp, unsigned int num, const char *fmt, + unsigned int value); +void put_tail(struct trace_proc * proc, unsigned int count, + unsigned int printed); + +/* ioctl.c */ +void put_ioctl_req(struct trace_proc *proc, const char *name, + unsigned long req, int is_svrctl); +int put_ioctl_arg_out(struct trace_proc *proc, const char *name, + unsigned long req, vir_bytes addr, int is_svrctl); +void put_ioctl_arg_in(struct trace_proc *proc, const char *name, int failed, + unsigned long req, vir_bytes addr, int is_svrctl); + +/* kernel.c */ +int kernel_check(pid_t pid); +int kernel_get_name(pid_t pid, char *name, size_t size); +int kernel_is_service(pid_t pid); +int kernel_get_syscall(pid_t pid, reg_t reg[3]); +int kernel_get_retreg(pid_t pid, reg_t *retreg); +vir_bytes kernel_get_stacktop(void); +int kernel_get_context(pid_t pid, reg_t *pc, reg_t *sp, reg_t *fp); +void kernel_put_stacktrace(struct trace_proc * proc); + +/* mem.c */ +int mem_get_data(pid_t pid, vir_bytes addr, void *ptr, size_t len); +int mem_get_user(pid_t pid, vir_bytes addr, void *ptr, size_t len); + +/* pm.c */ +void put_struct_timeval(struct trace_proc *proc, const char *name, int flags, + vir_bytes addr); +void put_time(struct trace_proc *proc, const char *name, time_t time); +void put_groups(struct trace_proc * proc, const char * name, int flags, + vir_bytes addr, int count); + +/* output.c */ +int output_init(const char *file); +int output_error(void); +void output_flush(void); +void record_start(struct trace_proc *proc); +void record_stop(struct trace_proc *proc); +void record_clear(struct trace_proc *proc); +int record_replay(struct trace_proc *proc); +void put_newline(void); +void put_text(struct trace_proc *proc, const char *text); +void put_space(struct trace_proc *proc); +void put_align(struct trace_proc *proc); + +/* proc.c */ +void proc_init(void); +struct trace_proc *proc_add(pid_t pid); +struct trace_proc *proc_get(pid_t pid); +void proc_del(struct trace_proc *proc); +struct trace_proc *proc_next(struct trace_proc *last); +unsigned int proc_count(void); + +/* signal.c */ +const char *get_signal_name(int sig); + +/* trace.c */ +extern int allnames; +extern unsigned int verbose; +extern unsigned int valuesonly; + +/* vfs.c */ +void put_fd(struct trace_proc *proc, const char *name, int fd); +void put_dev(struct trace_proc *proc, const char *name, dev_t dev); + +/* service */ +const struct calls pm_calls; +const struct calls vfs_calls; +const struct calls rs_calls; +const struct calls vm_calls; +const struct calls ipc_calls; + +/* ioctl/block.c */ +const char *block_ioctl_name(unsigned long req); +int block_ioctl_arg(struct trace_proc *proc, unsigned long req, void *ptr, + int dir); + +/* ioctl/char.c */ +const char *char_ioctl_name(unsigned long req); +int char_ioctl_arg(struct trace_proc *proc, unsigned long req, void *ptr, + int dir); + +/* ioctl/net.c */ +const char *net_ioctl_name(unsigned long req); +int net_ioctl_arg(struct trace_proc *proc, unsigned long req, void *ptr, + int dir); + +/* ioctl/svrctl.c */ +const char *svrctl_name(unsigned long req); +int svrctl_arg(struct trace_proc *proc, unsigned long req, void *ptr, int dir); diff --git a/minix/usr.bin/trace/service/ipc.c b/minix/usr.bin/trace/service/ipc.c new file mode 100644 index 000000000..21368d950 --- /dev/null +++ b/minix/usr.bin/trace/service/ipc.c @@ -0,0 +1,445 @@ +/* This file is concerned with the IPC server, not with kernel-level IPC. */ + +#include "inc.h" + +#include +#include +#include + +static void +put_key(struct trace_proc * proc, const char * name, key_t key) +{ + + if (!valuesonly && key == IPC_PRIVATE) + put_field(proc, name, "IPC_PRIVATE"); + else + put_value(proc, name, "%ld", key); +} + +static const struct flags ipcget_flags[] = { + FLAG(IPC_CREAT), + FLAG(IPC_EXCL), +}; + +static int +ipc_shmget_out(struct trace_proc * proc, const message * m_out) +{ + + put_key(proc, "key", m_out->m_lc_ipc_shmget.key); + put_value(proc, "size", "%zu", m_out->m_lc_ipc_shmget.size); + put_flags(proc, "shmflg", ipcget_flags, COUNT(ipcget_flags), "0%o", + m_out->m_lc_ipc_shmget.flag); + + return CT_DONE; +} + +static void +ipc_shmget_in(struct trace_proc * proc, const message * __unused m_out, + const message * m_in, int failed) +{ + + if (!failed) + put_value(proc, NULL, "%d", m_in->m_lc_ipc_shmget.retid); + else + put_result(proc); +} + +static const struct flags shmat_flags[] = { + FLAG(SHM_RDONLY), + FLAG(SHM_RND), +}; + +static int +ipc_shmat_out(struct trace_proc * proc, const message * m_out) +{ + + put_value(proc, "shmid", "%d", m_out->m_lc_ipc_shmat.id); + put_ptr(proc, "shmaddr", (vir_bytes)m_out->m_lc_ipc_shmat.addr); + put_flags(proc, "shmflg", shmat_flags, COUNT(shmat_flags), "0x%x", + m_out->m_lc_ipc_shmat.flag); + + return CT_DONE; +} + +static void +ipc_shmat_in(struct trace_proc * proc, const message * __unused m_out, + const message * m_in, int failed) +{ + + if (!failed) + put_ptr(proc, NULL, (vir_bytes)m_in->m_lc_ipc_shmat.retaddr); + else + put_result(proc); +} + +static int +ipc_shmdt_out(struct trace_proc * proc, const message * m_out) +{ + + put_ptr(proc, "shmaddr", (vir_bytes)m_out->m_lc_ipc_shmdt.addr); + + return CT_DONE; +} + +static void +put_shmctl_cmd(struct trace_proc * proc, const char * name, int cmd) +{ + const char *text = NULL; + + if (!valuesonly) { + switch (cmd) { + TEXT(IPC_RMID); + TEXT(IPC_SET); + TEXT(IPC_STAT); + TEXT(SHM_STAT); + TEXT(SHM_INFO); + TEXT(IPC_INFO); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%d", cmd); +} + +static const struct flags shm_mode_flags[] = { + FLAG(SHM_DEST), + FLAG(SHM_LOCKED), +}; + +static void +put_struct_shmid_ds(struct trace_proc * proc, const char * name, int flags, + vir_bytes addr) +{ + struct shmid_ds buf; + int set; + + if (!put_open_struct(proc, name, flags, addr, &buf, sizeof(buf))) + return; + + /* Is this an IPC_SET call? Then print a small subset of fields.. */ + set = (flags & PF_ALT); + + put_open(proc, "shm_perm", 0, "{", ", "); + + put_value(proc, "uid", "%u", buf.shm_perm.uid); + put_value(proc, "gid", "%u", buf.shm_perm.gid); + if (!set && verbose > 0) { + put_value(proc, "cuid", "%u", buf.shm_perm.cuid); + put_value(proc, "cgid", "%u", buf.shm_perm.cgid); + } + put_flags(proc, "mode", shm_mode_flags, COUNT(shm_mode_flags), + "0%03o", buf.shm_perm.mode); + + put_close(proc, "}"); + + if (!set) { + put_value(proc, "shm_segsz", "%zu", buf.shm_segsz); + if (verbose > 0) { + put_value(proc, "shm_lpid", "%d", buf.shm_lpid); + put_value(proc, "shm_cpid", "%d", buf.shm_cpid); + put_time(proc, "shm_atime", buf.shm_atime); + put_time(proc, "shm_dtime", buf.shm_dtime); + put_time(proc, "shm_ctime", buf.shm_ctime); + } + put_value(proc, "shm_nattch", "%u", buf.shm_nattch); + } + + put_close_struct(proc, set || verbose > 0); +} + +static int +ipc_shmctl_out(struct trace_proc * proc, const message * m_out) +{ + + put_value(proc, "shmid", "%d", m_out->m_lc_ipc_shmctl.id); + put_shmctl_cmd(proc, "cmd", m_out->m_lc_ipc_shmctl.cmd); + + /* TODO: add support for the IPC_INFO and SHM_INFO structures.. */ + switch (m_out->m_lc_ipc_shmctl.cmd) { + case IPC_STAT: + case SHM_STAT: + return CT_NOTDONE; + + case IPC_SET: + put_struct_shmid_ds(proc, "buf", PF_ALT, + (vir_bytes)m_out->m_lc_ipc_shmctl.buf); + + return CT_DONE; + + default: + put_ptr(proc, "buf", (vir_bytes)m_out->m_lc_ipc_shmctl.buf); + + return CT_DONE; + } +} + +static void +ipc_shmctl_in(struct trace_proc * proc, const message * m_out, + const message * m_in, int failed) +{ + + switch (m_out->m_lc_ipc_shmctl.cmd) { + case IPC_STAT: + case SHM_STAT: + put_struct_shmid_ds(proc, "buf", failed, + (vir_bytes)m_out->m_lc_ipc_shmctl.buf); + put_equals(proc); + + break; + } + + if (!failed) { + switch (m_out->m_lc_ipc_shmctl.cmd) { + case SHM_INFO: + case SHM_STAT: + case IPC_INFO: + put_value(proc, NULL, "%d", m_in->m_lc_ipc_shmctl.ret); + + return; + } + } + + put_result(proc); +} + +static int +ipc_semget_out(struct trace_proc * proc, const message * m_out) +{ + + put_key(proc, "key", m_out->m_lc_ipc_semget.key); + put_value(proc, "nsems", "%d", m_out->m_lc_ipc_semget.nr); + put_flags(proc, "semflg", ipcget_flags, COUNT(ipcget_flags), "0%o", + m_out->m_lc_ipc_semget.flag); + + return CT_DONE; +} + +static void +ipc_semget_in(struct trace_proc * proc, const message * __unused m_out, + const message * m_in, int failed) +{ + + if (!failed) + put_value(proc, NULL, "%d", m_in->m_lc_ipc_semget.retid); + else + put_result(proc); +} + +static void +put_semctl_cmd(struct trace_proc * proc, const char * name, int cmd) +{ + const char *text = NULL; + + if (!valuesonly) { + switch (cmd) { + TEXT(IPC_RMID); + TEXT(IPC_SET); + TEXT(IPC_STAT); + TEXT(GETNCNT); + TEXT(GETPID); + TEXT(GETVAL); + TEXT(GETALL); + TEXT(GETZCNT); + TEXT(SETVAL); + TEXT(SETALL); + TEXT(SEM_STAT); + TEXT(SEM_INFO); + TEXT(IPC_INFO); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%d", cmd); +} + +static void +put_struct_semid_ds(struct trace_proc * proc, const char * name, int flags, + vir_bytes addr) +{ + struct semid_ds buf; + int set; + + if (!put_open_struct(proc, name, flags, addr, &buf, sizeof(buf))) + return; + + /* Is this an IPC_SET call? Then print a small subset of fields.. */ + set = (flags & PF_ALT); + + put_open(proc, "sem_perm", 0, "{", ", "); + + put_value(proc, "uid", "%u", buf.sem_perm.uid); + put_value(proc, "gid", "%u", buf.sem_perm.gid); + if (!set && verbose > 0) { + put_value(proc, "cuid", "%u", buf.sem_perm.cuid); + put_value(proc, "cgid", "%u", buf.sem_perm.cgid); + } + put_value(proc, "mode", "0%03o", buf.sem_perm.mode); + + put_close(proc, "}"); + + if (!set) { + if (verbose > 0) { + put_time(proc, "sem_otime", buf.sem_otime); + put_time(proc, "sem_ctime", buf.sem_ctime); + } + put_value(proc, "sem_nsems", "%u", buf.sem_nsems); + } + + put_close_struct(proc, set || verbose > 0); +} + + +static int +ipc_semctl_out(struct trace_proc * proc, const message * m_out) +{ + + put_value(proc, "semid", "%d", m_out->m_lc_ipc_semctl.id); + put_value(proc, "semnum", "%d", m_out->m_lc_ipc_semctl.num); + put_semctl_cmd(proc, "cmd", m_out->m_lc_ipc_semctl.cmd); + + /* TODO: add support for the IPC_INFO and SEM_INFO structures.. */ + switch (m_out->m_lc_ipc_semctl.cmd) { + case IPC_STAT: + case SEM_STAT: + return CT_NOTDONE; + + case IPC_SET: + put_struct_semid_ds(proc, "buf", PF_ALT, + (vir_bytes)m_out->m_lc_ipc_semctl.opt); + + return CT_DONE; + + case IPC_INFO: + case SEM_INFO: + put_ptr(proc, "buf", (vir_bytes)m_out->m_lc_ipc_semctl.opt); + + return CT_DONE; + + case GETALL: + case SETALL: + put_ptr(proc, "array", (vir_bytes)m_out->m_lc_ipc_semctl.opt); + + return CT_DONE; + + case SETVAL: + put_value(proc, "val", "%d", m_out->m_lc_ipc_semctl.opt); + + return CT_DONE; + + default: + return CT_DONE; + } +} + +static void +ipc_semctl_in(struct trace_proc * proc, const message * m_out, + const message * m_in, int failed) +{ + + switch (m_out->m_lc_ipc_semctl.cmd) { + case IPC_STAT: + case SEM_STAT: + put_struct_semid_ds(proc, "buf", failed, + (vir_bytes)m_out->m_lc_ipc_semctl.opt); + put_equals(proc); + + break; + } + + if (!failed) { + switch (m_out->m_lc_ipc_semctl.cmd) { + case GETNCNT: + case GETPID: + case GETVAL: + case GETZCNT: + case SEM_INFO: + case SEM_STAT: + case IPC_INFO: + put_value(proc, NULL, "%d", m_in->m_lc_ipc_semctl.ret); + return; + } + } + put_result(proc); +} + +static const struct flags sem_flags[] = { + FLAG(IPC_NOWAIT), + FLAG(SEM_UNDO), +}; + +static void +put_struct_sembuf(struct trace_proc * proc, const char * name, int flags, + vir_bytes addr) +{ + struct sembuf buf; + int all; + + if (!put_open_struct(proc, name, flags, addr, &buf, sizeof(buf))) + return; + + all = FALSE; + put_value(proc, "sem_num", "%u", buf.sem_num); + put_value(proc, "sem_op", "%d", buf.sem_op); + if (verbose > 0 || (buf.sem_flg & ~SEM_UNDO) != 0) { + put_flags(proc, "sem_flg", sem_flags, COUNT(sem_flags), "0x%x", + buf.sem_flg); + all = TRUE; + } + + put_close_struct(proc, all); +} + +static void +put_sembuf_array(struct trace_proc * proc, const char * name, vir_bytes addr, + size_t count) +{ + struct sembuf buf[SEMOPM]; /* about 600 bytes, so OK for the stack */ + size_t i; + + if (valuesonly > 1 || count > SEMOPM || + mem_get_data(proc->pid, addr, &buf, count * sizeof(buf[0])) != 0) { + put_ptr(proc, name, addr); + + return; + } + + put_open(proc, name, PF_NONAME, "[", ", "); + for (i = 0; i < count; i++) + put_struct_sembuf(proc, NULL, PF_LOCADDR, (vir_bytes)&buf[i]); + put_close(proc, "]"); +} + +static int +ipc_semop_out(struct trace_proc * proc, const message * m_out) +{ + + put_value(proc, "semid", "%d", m_out->m_lc_ipc_semop.id); + put_sembuf_array(proc, "sops", (vir_bytes)m_out->m_lc_ipc_semop.ops, + m_out->m_lc_ipc_semop.size); + put_value(proc, "nsops", "%zu", m_out->m_lc_ipc_semop.size); + + return CT_DONE; +} + +#define IPC_CALL(c) [((IPC_ ## c) - IPC_BASE)] + +static const struct call_handler ipc_map[] = { + IPC_CALL(SHMGET) = HANDLER("shmget", ipc_shmget_out, ipc_shmget_in), + IPC_CALL(SHMAT) = HANDLER("shmat", ipc_shmat_out, ipc_shmat_in), + IPC_CALL(SHMDT) = HANDLER("shmdt", ipc_shmdt_out, default_in), + IPC_CALL(SHMCTL) = HANDLER("shmctl", ipc_shmctl_out, ipc_shmctl_in), + IPC_CALL(SEMGET) = HANDLER("semget", ipc_semget_out, ipc_semget_in), + IPC_CALL(SEMCTL) = HANDLER("semctl", ipc_semctl_out, ipc_semctl_in), + IPC_CALL(SEMOP) = HANDLER("semop", ipc_semop_out, default_in), +}; + +const struct calls ipc_calls = { + .endpt = ANY, + .base = IPC_BASE, + .map = ipc_map, + .count = COUNT(ipc_map) +}; diff --git a/minix/usr.bin/trace/service/pm.c b/minix/usr.bin/trace/service/pm.c new file mode 100644 index 000000000..15611235e --- /dev/null +++ b/minix/usr.bin/trace/service/pm.c @@ -0,0 +1,1396 @@ + +#include "inc.h" + +#include +#include +#include +#include +#include +#include +#include + +static int +pm_exit_out(struct trace_proc * proc, const message * m_out) +{ + + put_value(proc, "status", "%d", m_out->m_lc_pm_exit.status); + + return CT_NORETURN; +} + +static const struct flags waitpid_options[] = { + FLAG(WNOHANG), + FLAG(WUNTRACED), + FLAG(WALTSIG), + FLAG(WALLSIG), + FLAG(WNOWAIT), + FLAG(WNOZOMBIE), + FLAG(WOPTSCHECKED), +}; + +static void +put_waitpid_status(struct trace_proc * proc, const char * name, int status) +{ + const char *signame; + int sig; + + /* + * There is no suitable set of macros to be used here, so we're going + * to invent our own: W_EXITED, W_SIGNALED, and W_STOPPED. Hopefully + * they are sufficiently clear even though they don't actually exist. + * The code below is downright messy, but it also ensures that no bits + * are set unexpectedly in the status. + */ + if (!valuesonly && WIFEXITED(status) && + status == W_EXITCODE(WEXITSTATUS(status), 0)) { + put_value(proc, name, "W_EXITED(%d)", + WEXITSTATUS(status)); + + return; + } + + /* WCOREDUMP() actually returns WCOREFLAG or 0, but better safe.. */ + if (!valuesonly && WIFSIGNALED(status) && status == (W_EXITCODE(0, + WTERMSIG(status)) | (WCOREDUMP(status) ? WCOREFLAG : 0))) { + sig = WTERMSIG(status); + + if ((signame = get_signal_name(sig)) != NULL) + put_value(proc, name, "W_SIGNALED(%s)", signame); + else + put_value(proc, name, "W_SIGNALED(%u)", sig); + + if (WCOREDUMP(status)) + put_text(proc, "|WCOREDUMP"); + + return; + } + + if (!valuesonly && WIFSTOPPED(status) && + status == W_STOPCODE(WSTOPSIG(status))) { + sig = WSTOPSIG(status); + + if ((signame = get_signal_name(sig)) != NULL) + put_value(proc, name, "W_STOPPED(%s)", signame); + else + put_value(proc, name, "W_STOPPED(%u)", sig); + + return; + } + + /* + * If we get here, either valuesonly is enabled or the resulting status + * is not one we recognize, for example because extra bits are set. + */ + put_value(proc, name, "0x%04x", status); +} + +static int +pm_waitpid_out(struct trace_proc * proc, const message * m_out) +{ + + put_value(proc, "pid", "%d", m_out->m_lc_pm_waitpid.pid); + + return CT_NOTDONE; +} + +static void +pm_waitpid_in(struct trace_proc * proc, const message * m_out, + const message * m_in, int failed) +{ + + /* + * If the result is zero, there is no status to show. Also, since the + * status is returned in the result message, we cannot print the user- + * given pointer. Instead, upon failure we show "&.." to indicate an + * unknown pointer. + */ + if (!failed && m_in->m_type > 0) + put_waitpid_status(proc, "status", + m_in->m_pm_lc_waitpid.status); + else + put_field(proc, "status", "&.."); + put_flags(proc, "options", waitpid_options, COUNT(waitpid_options), + "0x%x", m_out->m_lc_pm_waitpid.options); + put_equals(proc); + put_result(proc); +} + +static void +pm_getpid_in(struct trace_proc * proc, const message * __unused m_out, + const message * m_in, int failed) +{ + + put_result(proc); + if (!failed) { + put_open(proc, NULL, 0, "(", ", "); + put_value(proc, "ppid", "%d", m_in->m_pm_lc_getpid.parent_pid); + put_close(proc, ")"); + } +} + +/* This function is shared between setuid and seteuid. */ +static int +pm_setuid_out(struct trace_proc * proc, const message * m_out) +{ + + put_value(proc, "uid", "%u", m_out->m_lc_pm_setuid.uid); + + return CT_DONE; +} + +static void +pm_getuid_in(struct trace_proc * proc, const message * __unused m_out, + const message * m_in, int failed) +{ + + put_result(proc); + if (!failed) { + put_open(proc, NULL, 0, "(", ", "); + put_value(proc, "euid", "%u", m_in->m_pm_lc_getuid.euid); + put_close(proc, ")"); + } +} + +static int +pm_stime_out(struct trace_proc * proc, const message * m_out) +{ + + put_time(proc, "time", m_out->m_lc_pm_time.sec); + + return CT_DONE; +} + +static void +put_signal(struct trace_proc * proc, const char * name, int sig) +{ + const char *signame; + + if (!valuesonly && (signame = get_signal_name(sig)) != NULL) + put_field(proc, name, signame); + else + put_value(proc, name, "%d", sig); +} + +static void +put_ptrace_req(struct trace_proc * proc, const char * name, int req) +{ + const char *text = NULL; + + if (!valuesonly) { + switch (req) { + TEXT(T_STOP); + TEXT(T_OK); + TEXT(T_ATTACH); + TEXT(T_DETACH); + TEXT(T_RESUME); + TEXT(T_STEP); + TEXT(T_SYSCALL); + TEXT(T_EXIT); + TEXT(T_GETINS); + TEXT(T_GETDATA); + TEXT(T_GETUSER); + TEXT(T_SETINS); + TEXT(T_SETDATA); + TEXT(T_SETUSER); + TEXT(T_SETOPT); + TEXT(T_GETRANGE); + TEXT(T_SETRANGE); + TEXT(T_READB_INS); + TEXT(T_WRITEB_INS); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%d", req); +} + +static void +put_struct_ptrace_range(struct trace_proc * proc, const char * name, int flags, + vir_bytes addr) +{ + struct ptrace_range pr; + + if (!put_open_struct(proc, name, flags, addr, &pr, sizeof(pr))) + return; + + if (!valuesonly && pr.pr_space == TS_INS) + put_field(proc, "pr_space", "TS_INS"); + else if (!valuesonly && pr.pr_space == TS_DATA) + put_field(proc, "pr_space", "TS_DATA"); + else + put_value(proc, "pr_space", "%d", pr.pr_space); + put_value(proc, "pr_addr", "0x%lx", pr.pr_addr); + put_ptr(proc, "pr_ptr", (vir_bytes)pr.pr_ptr); + put_value(proc, "pr_size", "%zu", pr.pr_size); + + put_close_struct(proc, TRUE /*all*/); +} + +static int +pm_ptrace_out(struct trace_proc * proc, const message * m_out) +{ + + put_ptrace_req(proc, "req", m_out->m_lc_pm_ptrace.req); + put_value(proc, "pid", "%d", m_out->m_lc_pm_ptrace.pid); + + switch (m_out->m_lc_pm_ptrace.req) { + case T_GETINS: + case T_GETDATA: + case T_GETUSER: + case T_READB_INS: + put_value(proc, "addr", "0x%lx", m_out->m_lc_pm_ptrace.addr); + put_value(proc, "data", "%ld", m_out->m_lc_pm_ptrace.data); + break; + case T_SETINS: + case T_SETDATA: + case T_SETUSER: + case T_WRITEB_INS: + put_value(proc, "addr", "0x%lx", m_out->m_lc_pm_ptrace.addr); + put_value(proc, "data", "0x%lx", m_out->m_lc_pm_ptrace.data); + break; + case T_RESUME: + case T_STEP: + case T_SYSCALL: + put_value(proc, "addr", "%ld", m_out->m_lc_pm_ptrace.addr); + put_signal(proc, "data", m_out->m_lc_pm_ptrace.data); + break; + case T_GETRANGE: + case T_SETRANGE: + put_struct_ptrace_range(proc, "addr", 0, + m_out->m_lc_pm_ptrace.addr); + put_value(proc, "data", "%ld", m_out->m_lc_pm_ptrace.data); + break; + default: + put_value(proc, "addr", "%ld", m_out->m_lc_pm_ptrace.addr); + put_value(proc, "data", "%ld", m_out->m_lc_pm_ptrace.data); + break; + } + + return CT_DONE; +} + +static void +pm_ptrace_in(struct trace_proc * proc, const message * m_out, + const message * m_in, int failed) +{ + + if (!failed) { + switch (m_out->m_lc_pm_ptrace.req) { + case T_GETINS: + case T_GETDATA: + case T_GETUSER: + case T_READB_INS: + put_value(proc, NULL, "0x%lx", + m_in->m_pm_lc_ptrace.data); + return; + } + } + + put_result(proc); +} + +void +put_groups(struct trace_proc * proc, const char * name, int flags, + vir_bytes addr, int count) +{ + gid_t groups[NGROUPS_MAX]; + int i; + + if ((flags & PF_FAILED) || valuesonly || count < 0 || + count > NGROUPS_MAX || (count > 0 && mem_get_data(proc->pid, addr, + groups, count * sizeof(groups[0])) < 0)) { + if (flags & PF_LOCADDR) + put_field(proc, name, "&.."); + else + put_ptr(proc, name, addr); + + return; + } + + put_open(proc, name, PF_NONAME, "[", ", "); + for (i = 0; i < count; i++) + put_value(proc, NULL, "%u", groups[i]); + put_close(proc, "]"); +} + +static int +pm_setgroups_out(struct trace_proc * proc, const message * m_out) +{ + + put_value(proc, "ngroups", "%d", m_out->m_lc_pm_groups.num); + put_groups(proc, "grouplist", 0, m_out->m_lc_pm_groups.ptr, + m_out->m_lc_pm_groups.num); + + return CT_DONE; +} + +static int +pm_getgroups_out(struct trace_proc * proc, const message * m_out) +{ + + put_value(proc, "ngroups", "%d", m_out->m_lc_pm_groups.num); + + return CT_NOTDONE; +} + +static void +pm_getgroups_in(struct trace_proc * proc, const message * m_out, + const message * m_in, int failed) +{ + + put_groups(proc, "grouplist", failed, m_out->m_lc_pm_groups.ptr, + m_in->m_type); + put_equals(proc); + put_result(proc); +} + +static int +pm_kill_out(struct trace_proc * proc, const message * m_out) +{ + + put_value(proc, "pid", "%d", m_out->m_lc_pm_sig.pid); + put_signal(proc, "sig", m_out->m_lc_pm_sig.nr); + + return CT_DONE; +} + +/* This function is shared between setgid and setegid. */ +static int +pm_setgid_out(struct trace_proc * proc, const message * m_out) +{ + + put_value(proc, "gid", "%u", m_out->m_lc_pm_setgid.gid); + + return CT_DONE; +} + +static void +pm_getgid_in(struct trace_proc * proc, const message * __unused m_out, + const message * m_in, int failed) +{ + + put_result(proc); + if (!failed) { + put_open(proc, NULL, 0, "(", ", "); + put_value(proc, "egid", "%u", m_in->m_pm_lc_getgid.egid); + put_close(proc, ")"); + } +} + +static int +put_frame_string(struct trace_proc * proc, vir_bytes frame, size_t len, + vir_bytes addr) +{ + vir_bytes stacktop, offset; + + /* + * The addresses in the frame assume that the process has already been + * changed, and the top of the frame is now located at the new process + * stack top, which is a hardcoded system-global value. In order to + * print the strings, we must convert back each address to its location + * within the given frame. + */ + stacktop = kernel_get_stacktop(); + + if (addr >= stacktop) + return FALSE; + offset = stacktop - addr; + if (offset >= len) + return FALSE; + addr = frame + len - offset; + + /* + * TODO: while using put_buf() is highly convenient, it does require at + * least one copy operation per printed string. The strings are very + * likely to be consecutive in memory, so copying in larger chunks at + * once would be preferable. Also, if copying from the frame fails, + * put_buf() will print the string address as we corrected it above, + * rather than the address as found in the frame. A copy failure would + * always be a case of malice on the traced process's behalf, though. + */ + put_buf(proc, NULL, PF_STRING, addr, len - offset); + + return TRUE; +} + +/* + * Print the contents of the exec frame, which includes both pointers and + * actual string data for the arguments and environment variables to be used. + * Even though we know that the entire frame is not going to exceed ARG_MAX + * bytes, this is too large a size for a static buffer, and we'd like to avoid + * allocating large dynamic buffers as well. The situation is complicated by + * the fact that any string in the frame may run up to the end of the frame. + */ +static void +put_exec_frame(struct trace_proc * proc, vir_bytes addr, size_t len) +{ + void *argv[64]; + size_t off, chunk; + unsigned int i, count, max, argv_max, envp_max; + int first, ok, nulls; + + if (valuesonly) { + put_ptr(proc, "frame", addr); + put_value(proc, "framelen", "%zu", len); + + return; + } + + if (verbose == 0) { + argv_max = 16; + envp_max = 0; + } else if (verbose == 1) + argv_max = envp_max = 64; + else + argv_max = envp_max = INT_MAX; + + off = sizeof(int); /* skip 'argc' at the start of the frame */ + first = TRUE; + ok = TRUE; + nulls = 0; + count = 0; + max = argv_max; + + do { + chunk = sizeof(argv); + if (chunk > len - off) + chunk = len - off; + + if (mem_get_data(proc->pid, addr + off, argv, chunk) != 0) + break; + + if (first) { + put_open(proc, "argv", PF_NONAME, "[", ", "); + + first = FALSE; + } + + for (i = 0; i < chunk / sizeof(void *) && ok; i++) { + if (argv[i] == NULL) { + if (count > max) + put_tail(proc, count, max); + put_close(proc, "]"); + if (nulls++ == 0) { + put_open(proc, "envp", PF_NONAME, "[", + ", "); + count = 0; + max = envp_max; + } else + break; /* two NULL pointers: done! */ + } else if (count++ < max) + ok = put_frame_string(proc, addr, len, + (vir_bytes)argv[i]); + } + + off += chunk; + } while (nulls < 2 && ok); + + /* + * Handle failure cases, implied by not reaching the second NULL + * in the array. Successful completion is handled in the loop above. + * Note that 'ok' is not always cleared on failure, as it is used only + * to break out of the outer loop. + */ + if (first) { + put_ptr(proc, "argv", addr + off); + put_field(proc, "envp", "&.."); + } else if (nulls < 2) { + put_tail(proc, 0, 0); + put_close(proc, "]"); + if (nulls < 1) { + put_open(proc, "envp", PF_NONAME, "[", ", "); + put_tail(proc, 0, 0); + put_close(proc, "]"); + } + } +} + +static int +pm_exec_out(struct trace_proc * proc, const message * m_out) +{ + + put_buf(proc, "path", PF_PATH, m_out->m_lc_pm_exec.name, + m_out->m_lc_pm_exec.namelen); + put_exec_frame(proc, m_out->m_lc_pm_exec.frame, + m_out->m_lc_pm_exec.framelen); + + return CT_NORETURN; +} + +/* The idea is that this function may one day print a human-readable time. */ +void +put_time(struct trace_proc * proc, const char * name, time_t time) +{ + + put_value(proc, name, "%"PRId64, time); +} + +void +put_struct_timeval(struct trace_proc * proc, const char * name, int flags, + vir_bytes addr) +{ + struct timeval tv; + + /* No field names; they just make things harder to read. */ + if (!put_open_struct(proc, name, flags | PF_NONAME, addr, &tv, + sizeof(tv))) + return; + + if (flags & PF_ALT) + put_time(proc, "tv_sec", tv.tv_sec); + else + put_value(proc, "tv_sec", "%"PRId64, tv.tv_sec); + put_value(proc, "tv_usec", "%d", tv.tv_usec); + + put_close_struct(proc, TRUE /*all*/); +} + +static void +put_struct_itimerval(struct trace_proc * proc, const char * name, int flags, + vir_bytes addr) +{ + struct itimerval it; + + /* + * This used to pass PF_NONAME, but the layout may not be clear enough + * without names. It does turn simple alarm(1) calls into rather + * lengthy output, though. + */ + if (!put_open_struct(proc, name, flags, addr, &it, sizeof(it))) + return; + + put_struct_timeval(proc, "it_interval", PF_LOCADDR, + (vir_bytes)&it.it_interval); + put_struct_timeval(proc, "it_value", PF_LOCADDR, + (vir_bytes)&it.it_value); + + put_close_struct(proc, TRUE /*all*/); +} + +static void +put_itimer_which(struct trace_proc * proc, const char * name, int which) +{ + const char *text = NULL; + + if (!valuesonly) { + switch (which) { + TEXT(ITIMER_REAL); + TEXT(ITIMER_VIRTUAL); + TEXT(ITIMER_PROF); + TEXT(ITIMER_MONOTONIC); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%d", which); +} + +static const char * +pm_itimer_name(const message * m_out) +{ + + return (m_out->m_lc_pm_itimer.value != 0) ? "setitimer" : "getitimer"; +} + +static int +pm_itimer_out(struct trace_proc * proc, const message * m_out) +{ + + put_itimer_which(proc, "which", m_out->m_lc_pm_itimer.which); + if (m_out->m_lc_pm_itimer.value != 0) { + put_struct_itimerval(proc, "value", 0, + m_out->m_lc_pm_itimer.value); + + /* + * If there will be no old values to print, finish the call + * now. For setitimer only; getitimer may not pass NULL. + */ + if (m_out->m_lc_pm_itimer.ovalue == 0) { + put_ptr(proc, "ovalue", 0); + + return CT_DONE; + } + } + + return CT_NOTDONE; +} + +static void +pm_itimer_in(struct trace_proc * proc, const message * m_out, + const message * __unused m_in, int failed) +{ + + if (m_out->m_lc_pm_itimer.value == 0 || + m_out->m_lc_pm_itimer.ovalue != 0) { + put_struct_itimerval(proc, + (m_out->m_lc_pm_itimer.value != 0) ? "ovalue" : "value", + failed, m_out->m_lc_pm_itimer.ovalue); + put_equals(proc); + } + put_result(proc); +} + +static void +put_struct_mcontext(struct trace_proc * proc, const char * name, int flags, + vir_bytes addr) +{ + mcontext_t ctx; + + if (!put_open_struct(proc, name, flags, addr, &ctx, sizeof(ctx))) + return; + + /* + * TODO: print actual fields. Then again, the ones that are saved and + * restored (FPU state) are hardly interesting enough to print.. + */ + + put_close_struct(proc, FALSE /*all*/); +} + +static int +pm_getmcontext_out(struct trace_proc * proc, const message * m_out) +{ + + return CT_NOTDONE; +} + +static void +pm_getmcontext_in(struct trace_proc * proc, const message * m_out, + const message * m_in, int failed) +{ + + put_struct_mcontext(proc, "mcp", failed, m_out->m_lc_pm_mcontext.ctx); + put_equals(proc); + put_result(proc); +} + +static int +pm_setmcontext_out(struct trace_proc * proc, const message * m_out) +{ + + put_struct_mcontext(proc, "mcp", 0, m_out->m_lc_pm_mcontext.ctx); + + return CT_DONE; +} + +static void +put_sigset(struct trace_proc * proc, const char * name, sigset_t set) +{ + const char *signame; + unsigned int count, unknown; + int sig, invert; + + /* + * First decide whether we should print a normal or an inverted mask. + * Unfortunately, depending on the place, a filled set may or may not + * have bits outside the 1..NSIG range set. Therefore, we ignore the + * bits outside this range entirely, and use simple heuristics to + * decide whether to show an inverted set. If we know all the signal + * names for either set and not the other, show that one; otherwise, + * show an inverted mask if at least 3/4th of the bits are set. + */ + count = 0; + unknown = 0; + for (sig = 1; sig < NSIG; sig++) { + if (sigismember(&set, sig)) + count++; + if (get_signal_name(sig) == NULL) + unknown |= 1 << !!sigismember(&set, sig); + } + if (unknown == 1 /*for unset bit*/ || unknown == 2 /*for set bit*/) + invert = unknown - 1; + else + invert = (count >= (NSIG - 1) * 3 / 4); + + put_open(proc, name, PF_NONAME, invert ? "~[" : "[", " "); + + for (sig = 1; sig < NSIG; sig++) { + /* Note that sigismember() may not strictly return 0 or 1.. */ + if (!sigismember(&set, sig) != invert) + continue; + + if ((signame = get_signal_name(sig)) != NULL) { + /* Skip the "SIG" prefix for brevity. */ + if (!strncmp(signame, "SIG", 3)) + put_field(proc, NULL, &signame[3]); + else + put_field(proc, NULL, signame); + } else + put_value(proc, NULL, "%d", sig); + } + + put_close(proc, "]"); +} + +static const struct flags sa_flags[] = { + FLAG(SA_ONSTACK), + FLAG(SA_RESTART), + FLAG(SA_RESETHAND), + FLAG(SA_NODEFER), + FLAG(SA_NOCLDSTOP), + FLAG(SA_NOCLDWAIT), +#ifdef SA_SIGINFO + FLAG(SA_SIGINFO), +#endif + FLAG(SA_NOKERNINFO) +}; + +static void +put_sa_handler(struct trace_proc * proc, const char * name, vir_bytes handler) +{ + const char *text = NULL; + + if (!valuesonly) { + switch ((int)handler) { + case (int)SIG_DFL: text = "SIG_DFL"; break; + case (int)SIG_IGN: text = "SIG_IGN"; break; + case (int)SIG_HOLD: text = "SIG_HOLD"; break; + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_ptr(proc, name, handler); +} + +static void +put_struct_sigaction(struct trace_proc * proc, const char * name, int flags, + vir_bytes addr) +{ + struct sigaction sa; + + if (!put_open_struct(proc, name, flags, addr, &sa, sizeof(sa))) + return; + + put_sa_handler(proc, "sa_handler", (vir_bytes)sa.sa_handler); + + if (verbose > 1) + put_sigset(proc, "sa_mask", sa.sa_mask); + + /* A somewhat lame attempt to reduce noise a bit. */ + if ((sa.sa_flags & ~(SA_ONSTACK | SA_RESTART | SA_RESETHAND | + SA_NODEFER)) != 0 || sa.sa_handler != SIG_DFL || verbose > 0) + put_flags(proc, "sa_flags", sa_flags, COUNT(sa_flags), "0x%x", + sa.sa_flags); + + put_close_struct(proc, verbose > 1); +} + +static int +pm_sigaction_out(struct trace_proc * proc, const message * m_out) +{ + + put_signal(proc, "signal", m_out->m_lc_pm_sig.nr); + put_struct_sigaction(proc, "act", 0, m_out->m_lc_pm_sig.act); + + /* If there will be no old values to print, finish the call now. */ + if (m_out->m_lc_pm_sig.oact == 0) { + put_ptr(proc, "oact", 0); + return CT_DONE; + } else + return CT_NOTDONE; +} + +static void +pm_sigaction_in(struct trace_proc * proc, const message * m_out, + const message * __unused m_in, int failed) +{ + + if (m_out->m_lc_pm_sig.oact != 0) { + put_struct_sigaction(proc, "oact", failed, + m_out->m_lc_pm_sig.oact); + put_equals(proc); + } + put_result(proc); +} + +static int +pm_sigsuspend_out(struct trace_proc * proc, const message * m_out) +{ + + put_sigset(proc, "set", m_out->m_lc_pm_sigset.set); + + return CT_DONE; +} + +static int +pm_sigpending_out(struct trace_proc * __unused proc, + const message * __unused m_out) +{ + + return CT_NOTDONE; +} + +static void +pm_sigpending_in(struct trace_proc * proc, const message * __unused m_out, + const message * m_in, int failed) +{ + + if (!failed) + put_sigset(proc, "set", m_in->m_pm_lc_sigset.set); + else + put_field(proc, "set", "&.."); + put_equals(proc); + put_result(proc); +} + +static void +put_sigprocmask_how(struct trace_proc * proc, const char * name, int how) +{ + const char *text = NULL; + + if (!valuesonly) { + switch (how) { + case SIG_INQUIRE: /* pseudocode, print something else */ + TEXT(SIG_BLOCK); + TEXT(SIG_UNBLOCK); + TEXT(SIG_SETMASK); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%d", how); +} + +static int +pm_sigprocmask_out(struct trace_proc * proc, const message * m_out) +{ + + put_sigprocmask_how(proc, "how", m_out->m_lc_pm_sigset.how); + if (m_out->m_lc_pm_sigset.how == SIG_INQUIRE) + put_ptr(proc, "set", 0); + else + put_sigset(proc, "set", m_out->m_lc_pm_sigset.set); + + return CT_NOTDONE; +} + +static void +pm_sigprocmask_in(struct trace_proc * proc, const message * __unused m_out, + const message * m_in, int failed) +{ + + if (!failed) + put_sigset(proc, "oset", m_in->m_pm_lc_sigset.set); + else + put_field(proc, "oset", "&.."); + put_equals(proc); + put_result(proc); +} + +static int +pm_sigreturn_out(struct trace_proc * proc, const message * m_out) +{ + struct sigcontext scp; + + if (put_open_struct(proc, "scp", 0, m_out->m_lc_pm_sigset.ctx, &scp, + sizeof(scp))) { + if (verbose == 1) { +#if defined(__i386__) + put_ptr(proc, "sc_eip", scp.sc_eip); + put_ptr(proc, "sc_esp", scp.sc_esp); +#elif defined(__arm__) + put_ptr(proc, "sc_pc", scp.sc_pc); + put_ptr(proc, "sc_usr_sp", scp.sc_usr_sp); +#endif + } + + /* + * We deliberately print the signal set from the message rather + * than from the structure, since in theory they may be + * different and PM uses the one from the message only. + */ + put_sigset(proc, "sc_mask", m_out->m_lc_pm_sigset.set); + + /* + * TODO: print some other fields, although it is probably not + * useful to print all registers even with verbose > 1? + */ + put_close_struct(proc, FALSE /*all*/); + } + + return CT_NORETURN; +} + +static void +pm_sigreturn_in(struct trace_proc * proc, const message * __unused m_out, + const message * __unused m_in, int failed) +{ + + if (failed) { + put_equals(proc); + put_result(proc); + } +} + +static void +put_sysuname_field(struct trace_proc * proc, const char * name, int field) +{ + const char *text = NULL; + + if (!valuesonly) { + switch (field) { + TEXT(_UTS_ARCH); + TEXT(_UTS_KERNEL); + TEXT(_UTS_MACHINE); + TEXT(_UTS_HOSTNAME); + TEXT(_UTS_NODENAME); + TEXT(_UTS_RELEASE); + TEXT(_UTS_VERSION); + TEXT(_UTS_SYSNAME); + TEXT(_UTS_BUS); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%d", field); +} + +static int +pm_sysuname_out(struct trace_proc * proc, const message * m_out) +{ + + if (!valuesonly && m_out->m_lc_pm_sysuname.req == _UTS_GET) + put_field(proc, "req", "_UTS_GET"); + else if (!valuesonly && m_out->m_lc_pm_sysuname.req == _UTS_SET) + put_field(proc, "req", "_UTS_SET"); + else + put_value(proc, "req", "%d", m_out->m_lc_pm_sysuname.req); + put_sysuname_field(proc, "field", m_out->m_lc_pm_sysuname.field); + + if (m_out->m_lc_pm_sysuname.req == _UTS_GET) + return CT_NOTDONE; + + put_buf(proc, "value", PF_STRING, m_out->m_lc_pm_sysuname.value, + m_out->m_lc_pm_sysuname.len); + put_value(proc, "len", "%d", m_out->m_lc_pm_sysuname.len); + return CT_DONE; +} + +static void +pm_sysuname_in(struct trace_proc * proc, const message * m_out, + const message * m_in, int failed) +{ + + if (m_out->m_lc_pm_sysuname.req == _UTS_GET) { + put_buf(proc, "value", failed | PF_STRING, + m_out->m_lc_pm_sysuname.value, m_in->m_type); + put_value(proc, "len", "%d", m_out->m_lc_pm_sysuname.len); + put_equals(proc); + } + put_result(proc); +} + +static void +put_priority_which(struct trace_proc * proc, const char * name, int which) +{ + const char *text = NULL; + + if (!valuesonly) { + switch (which) { + TEXT(PRIO_PROCESS); + TEXT(PRIO_PGRP); + TEXT(PRIO_USER); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%d", which); +} + +static int +pm_getpriority_out(struct trace_proc * proc, const message * m_out) +{ + + put_priority_which(proc, "which", m_out->m_lc_pm_priority.which); + put_value(proc, "who", "%d", m_out->m_lc_pm_priority.who); + + return CT_DONE; +} + +static void +pm_getpriority_in(struct trace_proc * proc, const message * __unused m_out, + const message * m_in, int failed) +{ + + if (!failed) + put_value(proc, NULL, "%d", m_in->m_type + PRIO_MIN); + else + put_result(proc); +} + +static int +pm_setpriority_out(struct trace_proc * proc, const message * m_out) +{ + + put_priority_which(proc, "which", m_out->m_lc_pm_priority.which); + put_value(proc, "who", "%d", m_out->m_lc_pm_priority.who); + put_value(proc, "prio", "%d", m_out->m_lc_pm_priority.prio); + + return CT_DONE; +} + +static int +pm_gettimeofday_out(struct trace_proc * __unused proc, + const message * __unused m_out) +{ + + return CT_NOTDONE; +} + +static void +put_timespec_as_timeval(struct trace_proc * proc, const char * name, + time_t sec, long nsec) +{ + + /* No field names within the structure. */ + put_open(proc, name, PF_NONAME, "{", ", "); + + put_time(proc, "tv_sec", sec); + put_value(proc, "tv_usec", "%ld", nsec / 1000); + + put_close(proc, "}"); +} + +static void +pm_gettimeofday_in(struct trace_proc * proc, const message * __unused m_out, + const message * m_in, int failed) +{ + + if (!failed) { + /* + * The system call returns values which do not match the call + * being made, so just like libc, we have to correct.. + */ + put_timespec_as_timeval(proc, "tp", m_in->m_pm_lc_time.sec, + m_in->m_pm_lc_time.nsec); + } else + put_field(proc, "tp", "&.."); + put_ptr(proc, "tzp", 0); /* not part of the system call (yet) */ + + put_equals(proc); + put_result(proc); +} + +static int +pm_getsid_out(struct trace_proc * proc, const message * m_out) +{ + + put_value(proc, "pid", "%d", m_out->m_lc_pm_getsid.pid); + + return CT_DONE; +} + +static void +put_clockid(struct trace_proc * proc, const char * name, clockid_t clock_id) +{ + const char *text = NULL; + + if (!valuesonly) { + switch (clock_id) { + TEXT(CLOCK_REALTIME); +#ifdef CLOCK_VIRTUAL + TEXT(CLOCK_VIRTUAL); +#endif +#ifdef CLOCK_PROF + TEXT(CLOCK_PROF); +#endif + TEXT(CLOCK_MONOTONIC); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%d", clock_id); +} + +static void +put_clock_timespec(struct trace_proc * proc, const char * name, int flags, + time_t sec, long nsec) +{ + + if (flags & PF_FAILED) { + put_field(proc, name, "&.."); + + return; + } + + /* No field names within the structure. */ + put_open(proc, name, PF_NONAME, "{", ", "); + + if (flags & PF_ALT) + put_time(proc, "tv_sec", sec); + else + put_value(proc, "tv_sec", "%"PRId64, sec); + put_value(proc, "tv_nsec", "%ld", nsec); + + put_close(proc, "}"); +} + +/* This function is shared between clock_getres and clock_gettime. */ +static int +pm_clock_get_out(struct trace_proc * proc, const message * m_out) +{ + + put_clockid(proc, "clock_id", m_out->m_lc_pm_time.clk_id); + + return CT_NOTDONE; +} + +static void +pm_clock_getres_in(struct trace_proc * proc, const message * __unused m_out, + const message * m_in, int failed) +{ + + put_clock_timespec(proc, "res", failed, m_in->m_pm_lc_time.sec, + m_in->m_pm_lc_time.nsec); + put_equals(proc); + put_result(proc); +} + +/* + * Same as pm_clock_getres_in, but different field name and the option to print + * at least some results as time strings (in the future). + */ +static void +pm_clock_gettime_in(struct trace_proc * proc, const message * m_out, + const message * m_in, int failed) +{ + int flags; + + flags = failed; + if (m_out->m_lc_pm_time.clk_id == CLOCK_REALTIME) + flags |= PF_ALT; /* TODO: make this print a time string. */ + + put_clock_timespec(proc, "tp", flags, m_in->m_pm_lc_time.sec, + m_in->m_pm_lc_time.nsec); + put_equals(proc); + put_result(proc); +} + +static const char * +pm_clock_settime_name(const message * m_out) +{ + + if (m_out->m_lc_pm_time.now == 0) + return "adjtime"; + else + return "clock_settime"; +} + +static int +pm_clock_settime_out(struct trace_proc * proc, const message * m_out) +{ + int flags; + + /* These two calls just look completely different.. */ + if (m_out->m_lc_pm_time.now == 0) { + put_timespec_as_timeval(proc, "delta", m_out->m_lc_pm_time.sec, + m_out->m_lc_pm_time.nsec); + put_ptr(proc, "odelta", 0); /* not supported on MINIX3 */ + } else { + flags = 0; + if (m_out->m_lc_pm_time.clk_id == CLOCK_REALTIME) + flags |= PF_ALT; + put_clockid(proc, "clock_id", m_out->m_lc_pm_time.clk_id); + put_clock_timespec(proc, "tp", flags, m_out->m_lc_pm_time.sec, + m_out->m_lc_pm_time.nsec); + } + + return CT_DONE; +} + +static int +pm_getrusage_out(struct trace_proc * proc, const message * m_out) +{ + + if (!valuesonly && m_out->m_lc_pm_rusage.who == RUSAGE_SELF) + put_field(proc, "who", "RUSAGE_SELF"); + else if (!valuesonly && m_out->m_lc_pm_rusage.who == RUSAGE_CHILDREN) + put_field(proc, "who", "RUSAGE_CHILDREN"); + else + put_value(proc, "who", "%d", m_out->m_lc_pm_rusage.who); + + return CT_NOTDONE; +} + +static void +pm_getrusage_in(struct trace_proc * proc, const message * m_out, + const message * __unused m_in, int failed) +{ + struct rusage buf; + + /* Inline; we will certainly not be reusing this anywhere else. */ + if (put_open_struct(proc, "rusage", failed, m_out->m_lc_pm_rusage.addr, + &buf, sizeof(buf))) { + put_struct_timeval(proc, "ru_utime", PF_LOCADDR, + (vir_bytes)&buf.ru_utime); + put_struct_timeval(proc, "ru_stime", PF_LOCADDR, + (vir_bytes)&buf.ru_stime); + + if (verbose > 0) + put_value(proc, "ru_nsignals", "%ld", buf.ru_nsignals); + put_close_struct(proc, verbose > 0); + } + put_equals(proc); + put_result(proc); +} + +static const struct flags reboot_flags[] = { + FLAG_ZERO(RB_AUTOBOOT), + FLAG(RB_ASKNAME), + FLAG(RB_DUMP), + FLAG_MASK(RB_POWERDOWN, RB_HALT), + FLAG(RB_POWERDOWN), + FLAG(RB_INITNAME), + FLAG(RB_KDB), + FLAG(RB_NOSYNC), + FLAG(RB_RDONLY), + FLAG(RB_SINGLE), + FLAG(RB_STRING), + FLAG(RB_USERCONF), +}; + +static int +pm_reboot_out(struct trace_proc * proc, const message * m_out) +{ + + put_flags(proc, "how", reboot_flags, COUNT(reboot_flags), "0x%x", + m_out->m_lc_pm_reboot.how); + put_ptr(proc, "bootstr", 0); /* not supported on MINIX3 */ + + return CT_DONE; +} + +static int +pm_svrctl_out(struct trace_proc * proc, const message * m_out) +{ + + put_ioctl_req(proc, "request", m_out->m_lc_svrctl.request, + TRUE /*is_svrctl*/); + return put_ioctl_arg_out(proc, "arg", m_out->m_lc_svrctl.request, + m_out->m_lc_svrctl.arg, TRUE /*is_svrctl*/); +} + +static void +pm_svrctl_in(struct trace_proc * proc, const message * m_out, + const message * __unused m_in, int failed) +{ + + put_ioctl_arg_in(proc, "arg", failed, m_out->m_lc_svrctl.request, + m_out->m_lc_svrctl.arg, TRUE /*is_svrctl*/); +} + +static int +pm_sprof_out(struct trace_proc * proc, const message * m_out) +{ + int freq; + + if (!valuesonly && m_out->m_lc_pm_sprof.action == PROF_START) + put_field(proc, "action", "PROF_START"); + else if (!valuesonly && m_out->m_lc_pm_sprof.action == PROF_STOP) + put_field(proc, "action", "PROF_STOP"); + else + put_value(proc, "action", "%d", m_out->m_lc_pm_sprof.action); + + put_value(proc, "size", "%zu", m_out->m_lc_pm_sprof.mem_size); + + freq = m_out->m_lc_pm_sprof.freq; + if (!valuesonly && freq >= 3 && freq <= 15) /* no constants.. */ + put_value(proc, "freq", "%u /*%uHz*/", freq, 1 << (16 - freq)); + else + put_value(proc, "freq", "%u", freq); + + if (!valuesonly && m_out->m_lc_pm_sprof.intr_type == PROF_RTC) + put_field(proc, "type", "PROF_RTC"); + else if (!valuesonly && m_out->m_lc_pm_sprof.intr_type == PROF_NMI) + put_field(proc, "type", "PROF_NMI"); + else + put_value(proc, "type", "%d", m_out->m_lc_pm_sprof.intr_type); + + put_ptr(proc, "ctl_ptr", m_out->m_lc_pm_sprof.ctl_ptr); + put_ptr(proc, "mem_ptr", m_out->m_lc_pm_sprof.mem_ptr); + + return CT_DONE; +} + +#define PM_CALL(c) [((PM_ ## c) - PM_BASE)] + +static const struct call_handler pm_map[] = { + PM_CALL(EXIT) = HANDLER("exit", pm_exit_out, default_in), + PM_CALL(FORK) = HANDLER("fork", default_out, default_in), + PM_CALL(WAITPID) = HANDLER("waitpid", pm_waitpid_out, pm_waitpid_in), + PM_CALL(GETPID) = HANDLER("getpid", default_out, pm_getpid_in), + PM_CALL(SETUID) = HANDLER("setuid", pm_setuid_out, default_in), + PM_CALL(GETUID) = HANDLER("getuid", default_out, pm_getuid_in), + PM_CALL(STIME) = HANDLER("stime", pm_stime_out, default_in), + PM_CALL(PTRACE) = HANDLER("ptrace", pm_ptrace_out, pm_ptrace_in), + PM_CALL(SETGROUPS) = HANDLER("setgroups", pm_setgroups_out, + default_in), + PM_CALL(GETGROUPS) = HANDLER("getgroups", pm_getgroups_out, + pm_getgroups_in), + PM_CALL(KILL) = HANDLER("kill", pm_kill_out, default_in), + PM_CALL(SETGID) = HANDLER("setgid", pm_setgid_out, default_in), + PM_CALL(GETGID) = HANDLER("getgid", default_out, pm_getgid_in), + PM_CALL(EXEC) = HANDLER("execve", pm_exec_out, default_in), + PM_CALL(SETSID) = HANDLER("setsid", default_out, default_in), + PM_CALL(GETPGRP) = HANDLER("getpgrp", default_out, default_in), + PM_CALL(ITIMER) = HANDLER_NAME(pm_itimer_name, pm_itimer_out, + pm_itimer_in), + PM_CALL(GETMCONTEXT) = HANDLER("getmcontext", pm_getmcontext_out, + pm_getmcontext_in), + PM_CALL(SETMCONTEXT) = HANDLER("setmcontext", pm_setmcontext_out, + default_in), + PM_CALL(SIGACTION) = HANDLER("sigaction", pm_sigaction_out, + pm_sigaction_in), + PM_CALL(SIGSUSPEND) = HANDLER("sigsuspend", pm_sigsuspend_out, + default_in), + PM_CALL(SIGPENDING) = HANDLER("sigpending", pm_sigpending_out, + pm_sigpending_in), + PM_CALL(SIGPROCMASK) = HANDLER("sigprocmask", pm_sigprocmask_out, + pm_sigprocmask_in), + PM_CALL(SIGRETURN) = HANDLER("sigreturn", pm_sigreturn_out, + pm_sigreturn_in), + PM_CALL(SYSUNAME) = HANDLER("sysuname", pm_sysuname_out, + pm_sysuname_in), + PM_CALL(GETPRIORITY) = HANDLER("getpriority", pm_getpriority_out, + pm_getpriority_in), + PM_CALL(SETPRIORITY) = HANDLER("setpriority", pm_setpriority_out, + default_in), + PM_CALL(GETTIMEOFDAY) = HANDLER("gettimeofday", pm_gettimeofday_out, + pm_gettimeofday_in), + PM_CALL(SETEUID) = HANDLER("seteuid", pm_setuid_out, default_in), + PM_CALL(SETEGID) = HANDLER("setegid", pm_setgid_out, default_in), + PM_CALL(ISSETUGID) = HANDLER("issetugid", default_out, default_in), + PM_CALL(GETSID) = HANDLER("getsid", pm_getsid_out, default_in), + PM_CALL(CLOCK_GETRES) = HANDLER("clock_getres", pm_clock_get_out, + pm_clock_getres_in), + PM_CALL(CLOCK_GETTIME) = HANDLER("clock_gettime", pm_clock_get_out, + pm_clock_gettime_in), + PM_CALL(CLOCK_SETTIME) = HANDLER_NAME(pm_clock_settime_name, + pm_clock_settime_out, default_in), + PM_CALL(GETRUSAGE) = HANDLER("pm_getrusage", pm_getrusage_out, + pm_getrusage_in), + PM_CALL(REBOOT) = HANDLER("reboot", pm_reboot_out, default_in), + PM_CALL(SVRCTL) = HANDLER("pm_svrctl", pm_svrctl_out, pm_svrctl_in), + PM_CALL(SPROF) = HANDLER("sprofile", pm_sprof_out, default_in), +}; + +const struct calls pm_calls = { + .endpt = PM_PROC_NR, + .base = PM_BASE, + .map = pm_map, + .count = COUNT(pm_map) +}; diff --git a/minix/usr.bin/trace/service/rs.c b/minix/usr.bin/trace/service/rs.c new file mode 100644 index 000000000..514a4836c --- /dev/null +++ b/minix/usr.bin/trace/service/rs.c @@ -0,0 +1,140 @@ + +#include "inc.h" + +#include + +static const struct flags rss_flags[] = { + FLAG(RSS_COPY), + FLAG(RSS_REUSE), + FLAG(RSS_NOBLOCK), + FLAG(RSS_REPLICA), + FLAG(RSS_SELF_LU), + FLAG(RSS_SYS_BASIC_CALLS), + FLAG(RSS_VM_BASIC_CALLS), + FLAG(RSS_NO_BIN_EXP), +}; + +static void +put_struct_rs_start(struct trace_proc * proc, const char * name, + vir_bytes addr) +{ + struct rs_start buf; + + if (!put_open_struct(proc, name, 0, addr, &buf, sizeof(buf))) + return; + + if (verbose > 0) + put_flags(proc, "rss_flags", rss_flags, COUNT(rss_flags), + "0x%x", buf.rss_flags); + put_buf(proc, "rss_cmd", 0, (vir_bytes)buf.rss_cmd, buf.rss_cmdlen); + put_buf(proc, "rss_label", 0, (vir_bytes)buf.rss_label.l_addr, + buf.rss_label.l_len); + if (verbose > 0 || buf.rss_major != 0) + put_value(proc, "rss_major", "%d", buf.rss_major); + if (verbose > 0 || buf.devman_id != 0) + put_value(proc, "devman_id", "%d", buf.devman_id); + put_value(proc, "rss_uid", "%u", buf.rss_uid); + if (verbose > 0) { + put_endpoint(proc, "rss_sigmgr", buf.rss_sigmgr); + put_endpoint(proc, "rss_scheduler", buf.rss_sigmgr); + } + if (verbose > 1) { + put_value(proc, "rss_priority", "%d", buf.rss_priority); + put_value(proc, "rss_quantum", "%d", buf.rss_quantum); + } + if (verbose > 0) { + put_value(proc, "rss_period", "%ld", buf.rss_period); + put_buf(proc, "rss_script", 0, (vir_bytes)buf.rss_script, + buf.rss_scriptlen); + } + + put_close_struct(proc, FALSE /*all*/); /* TODO: the remaining fields */ +} + +/* This function is shared between rs_up and rs_edit. */ +static int +rs_up_out(struct trace_proc * proc, const message * m_out) +{ + + put_struct_rs_start(proc, "addr", (vir_bytes)m_out->m_rs_req.addr); + + return CT_DONE; +} + +/* + * This function is shared between rs_down, rs_refresh, rs_restart, and + * rs_clone. + */ +static int +rs_label_out(struct trace_proc * proc, const message * m_out) +{ + + /* + * We are not using PF_STRING here, because unlike in most places + * (including rs_lookup), the string length does not include the + * terminating NULL character. + */ + put_buf(proc, "label", 0, (vir_bytes)m_out->m_rs_req.addr, + m_out->m_rs_req.len); + + return CT_DONE; +} + +static int +rs_update_out(struct trace_proc * proc, const message * m_out) +{ + + /* + * FIXME: this is a value from the wrong message union, and that is + * actually a minix bug. + */ + put_struct_rs_start(proc, "addr", (vir_bytes)m_out->m_rs_req.addr); + + /* TODO: interpret these fields */ + put_value(proc, "state", "%d", m_out->m_rs_update.state); + put_value(proc, "maxtime", "%d", m_out->m_rs_update.prepare_maxtime); + + return CT_DONE; +} + +static int +rs_lookup_out(struct trace_proc * proc, const message * m_out) +{ + + put_buf(proc, "label", PF_STRING, (vir_bytes)m_out->m_rs_req.name, + m_out->m_rs_req.name_len); + + return CT_DONE; +} + +static void +rs_lookup_in(struct trace_proc * proc, const message * __unused m_out, + const message * m_in, int failed) +{ + + if (!failed) + put_endpoint(proc, NULL, m_in->m_rs_req.endpoint); + else + put_result(proc); +} + +#define RS_CALL(c) [((RS_ ## c) - RS_RQ_BASE)] + +static const struct call_handler rs_map[] = { + RS_CALL(UP) = HANDLER("rs_up", rs_up_out, default_in), + RS_CALL(DOWN) = HANDLER("rs_down", rs_label_out, default_in), + RS_CALL(REFRESH) = HANDLER("rs_refresh", rs_label_out, default_in), + RS_CALL(RESTART) = HANDLER("rs_restart", rs_label_out, default_in), + RS_CALL(SHUTDOWN) = HANDLER("rs_shutdown", default_out, default_in), + RS_CALL(CLONE) = HANDLER("rs_clone", rs_label_out, default_in), + RS_CALL(UPDATE) = HANDLER("rs_update", rs_update_out, default_in), + RS_CALL(EDIT) = HANDLER("rs_edit", rs_up_out, default_in), + RS_CALL(LOOKUP) = HANDLER("rs_lookup", rs_lookup_out, rs_lookup_in), +}; + +const struct calls rs_calls = { + .endpt = RS_PROC_NR, + .base = RS_RQ_BASE, + .map = rs_map, + .count = COUNT(rs_map) +}; diff --git a/minix/usr.bin/trace/service/vfs.c b/minix/usr.bin/trace/service/vfs.c new file mode 100644 index 000000000..71006c1cb --- /dev/null +++ b/minix/usr.bin/trace/service/vfs.c @@ -0,0 +1,1457 @@ + +#include "inc.h" + +#include +#include +#include +#include +#include + +/* + * This function should always be used when printing a file descriptor. It + * currently offers no benefit, but will in the future allow for features such + * as color highlighting and tracking of specific open files (TODO). + */ +void +put_fd(struct trace_proc * proc, const char * name, int fd) +{ + + put_value(proc, name, "%d", fd); +} + +static int +vfs_read_out(struct trace_proc * proc, const message *m_out) +{ + + put_fd(proc, "fd", m_out->m_lc_vfs_readwrite.fd); + + return CT_NOTDONE; +} + +static void +vfs_read_in(struct trace_proc * proc, const message *m_out, + const message *m_in, int failed) +{ + + put_buf(proc, "buf", failed, m_out->m_lc_vfs_readwrite.buf, + m_in->m_type); + put_value(proc, "len", "%zu", m_out->m_lc_vfs_readwrite.len); + put_equals(proc); + put_result(proc); +} + +static int +vfs_write_out(struct trace_proc * proc, const message *m_out) +{ + + put_fd(proc, "fd", m_out->m_lc_vfs_readwrite.fd); + put_buf(proc, "buf", 0, m_out->m_lc_vfs_readwrite.buf, + m_out->m_lc_vfs_readwrite.len); + put_value(proc, "len", "%zu", m_out->m_lc_vfs_readwrite.len); + + return CT_DONE; +} + +static void +put_lseek_whence(struct trace_proc * proc, const char * name, int whence) +{ + const char *text = NULL; + + if (!valuesonly) { + switch (whence) { + TEXT(SEEK_SET); + TEXT(SEEK_CUR); + TEXT(SEEK_END); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%d", whence); +} + +static int +vfs_lseek_out(struct trace_proc * proc, const message * m_out) +{ + + put_fd(proc, "fd", m_out->m_lc_vfs_lseek.fd); + put_value(proc, "offset", "%"PRId64, m_out->m_lc_vfs_lseek.offset); + put_lseek_whence(proc, "whence", m_out->m_lc_vfs_lseek.whence); + + return CT_DONE; +} + +static void +vfs_lseek_in(struct trace_proc * proc, const message * __unused m_out, + const message * m_in, int failed) +{ + + if (!failed) + put_value(proc, NULL, "%"PRId64, m_in->m_vfs_lc_lseek.offset); + else + put_result(proc); +} + +static const struct flags open_flags[] = { + FLAG_MASK(O_ACCMODE, O_RDONLY), + FLAG_MASK(O_ACCMODE, O_WRONLY), + FLAG_MASK(O_ACCMODE, O_RDWR), +#define ACCMODE_ENTRIES 3 /* the first N entries are for O_ACCMODE */ + FLAG(O_NONBLOCK), + FLAG(O_APPEND), + FLAG(O_SHLOCK), + FLAG(O_EXLOCK), + FLAG(O_ASYNC), + FLAG(O_SYNC), + FLAG(O_NOFOLLOW), + FLAG(O_CREAT), + FLAG(O_TRUNC), + FLAG(O_EXCL), + FLAG(O_NOCTTY), + FLAG(O_DSYNC), + FLAG(O_RSYNC), + FLAG(O_ALT_IO), + FLAG(O_DIRECT), + FLAG(O_DIRECTORY), + FLAG(O_CLOEXEC), + FLAG(O_SEARCH), + FLAG(O_NOSIGPIPE), +}; + +static void +put_open_flags(struct trace_proc * proc, const char * name, int value, + int full) +{ + const struct flags *fp; + unsigned int num; + + fp = open_flags; + num = COUNT(open_flags); + + /* + * If we're not printing a full open()-style set of flags, but instead + * just a loose set of flags, then skip the access mode altogether, + * otherwise we'd be printing O_RDONLY when no access mode is given. + */ + if (!full) { + fp += ACCMODE_ENTRIES; + num -= ACCMODE_ENTRIES; + } + + put_flags(proc, name, fp, num, "0x%x", value); +} + +static const struct flags mode_flags[] = { + FLAG_MASK(S_IFMT, S_IFIFO), + FLAG_MASK(S_IFMT, S_IFCHR), + FLAG_MASK(S_IFMT, S_IFDIR), + FLAG_MASK(S_IFMT, S_IFBLK), + FLAG_MASK(S_IFMT, S_IFREG), + FLAG_MASK(S_IFMT, S_IFLNK), + FLAG_MASK(S_IFMT, S_IFSOCK), + FLAG_MASK(S_IFMT, S_IFWHT), + FLAG(S_ARCH1), + FLAG(S_ARCH2), + FLAG(S_ISUID), + FLAG(S_ISGID), + FLAG(S_ISTXT), +}; + +/* Do not use %04o instead of 0%03o; it is octal even if greater than 0777. */ +#define put_mode(p, n, v) \ + put_flags(p, n, mode_flags, COUNT(mode_flags), "0%03o", v) + +static void +put_path(struct trace_proc * proc, const message * m_out) +{ + size_t len; + + if ((len = m_out->m_lc_vfs_path.len) <= M_PATH_STRING_MAX) + put_buf(proc, "path", PF_LOCADDR | PF_PATH, + (vir_bytes)m_out->m_lc_vfs_path.buf, len); + else + put_buf(proc, "path", PF_PATH, m_out->m_lc_vfs_path.name, len); +} + +static int +vfs_open_out(struct trace_proc * proc, const message * m_out) +{ + + put_path(proc, m_out); + put_open_flags(proc, "flags", m_out->m_lc_vfs_path.flags, + TRUE /*full*/); + + return CT_DONE; +} + +/* This function is shared between creat and open. */ +static void +vfs_open_in(struct trace_proc * proc, const message * __unused m_out, + const message * m_in, int failed) +{ + + if (!failed) + put_fd(proc, NULL, m_in->m_type); + else + put_result(proc); +} + +static int +vfs_creat_out(struct trace_proc * proc, const message * m_out) +{ + + put_buf(proc, "path", PF_PATH, m_out->m_lc_vfs_creat.name, + m_out->m_lc_vfs_creat.len); + put_open_flags(proc, "flags", m_out->m_lc_vfs_creat.flags, + TRUE /*full*/); + put_mode(proc, "mode", m_out->m_lc_vfs_creat.mode); + + return CT_DONE; +} + +static int +vfs_close_out(struct trace_proc * proc, const message * m_out) +{ + + put_fd(proc, "fd", m_out->m_lc_vfs_close.fd); + + return CT_DONE; +} + +/* This function is used for link, rename, and symlink. */ +static int +vfs_link_out(struct trace_proc * proc, const message * m_out) +{ + + put_buf(proc, "path1", PF_PATH, m_out->m_lc_vfs_link.name1, + m_out->m_lc_vfs_link.len1); + put_buf(proc, "path2", PF_PATH, m_out->m_lc_vfs_link.name2, + m_out->m_lc_vfs_link.len2); + + return CT_DONE; +} + +static int +vfs_path_out(struct trace_proc * proc, const message * m_out) +{ + + put_path(proc, m_out); + + return CT_DONE; +} + +static int +vfs_path_mode_out(struct trace_proc * proc, const message * m_out) +{ + + put_path(proc, m_out); + put_mode(proc, "mode", m_out->m_lc_vfs_path.mode); + + return CT_DONE; +} + +void +put_dev(struct trace_proc * proc, const char * name, dev_t dev) +{ + devmajor_t major; + devminor_t minor; + + major = major(dev); + minor = minor(dev); + + /* The value 0 ("no device") should print as "0". */ + if (dev != 0 && makedev(major, minor) == dev && !valuesonly) + put_value(proc, name, "<%d,%d>", major, minor); + else + put_value(proc, name, "%"PRIu64, dev); +} + +static int +vfs_mknod_out(struct trace_proc * proc, const message * m_out) +{ + + put_buf(proc, "path", PF_PATH, m_out->m_lc_vfs_mknod.name, + m_out->m_lc_vfs_mknod.len); + put_mode(proc, "mode", m_out->m_lc_vfs_mknod.mode); + put_dev(proc, "dev", m_out->m_lc_vfs_mknod.device); + + return CT_DONE; +} + +static int +vfs_chown_out(struct trace_proc * proc, const message * m_out) +{ + + put_buf(proc, "path", PF_PATH, m_out->m_lc_vfs_chown.name, + m_out->m_lc_vfs_chown.len); + /* -1 means "keep the current value" so print as signed */ + put_value(proc, "owner", "%d", m_out->m_lc_vfs_chown.owner); + put_value(proc, "group", "%d", m_out->m_lc_vfs_chown.group); + + return CT_DONE; +} + +/* TODO: expand this to the full ST_ set. */ +static const struct flags mount_flags[] = { + FLAG(MNT_RDONLY), +}; + +static int +vfs_mount_out(struct trace_proc * proc, const message * m_out) +{ + + put_buf(proc, "special", PF_PATH, m_out->m_lc_vfs_mount.dev, + m_out->m_lc_vfs_mount.devlen); + put_buf(proc, "path", PF_PATH, m_out->m_lc_vfs_mount.path, + m_out->m_lc_vfs_mount.pathlen); + put_flags(proc, "flags", mount_flags, COUNT(mount_flags), "0x%x", + m_out->m_lc_vfs_mount.flags); + put_buf(proc, "type", PF_STRING, m_out->m_lc_vfs_mount.type, + m_out->m_lc_vfs_mount.typelen); + put_buf(proc, "label", PF_STRING, m_out->m_lc_vfs_mount.label, + m_out->m_lc_vfs_mount.labellen); + + return CT_DONE; +} + +static int +vfs_umount_out(struct trace_proc * proc, const message * m_out) +{ + + put_buf(proc, "path", PF_PATH, m_out->m_lc_vfs_umount.name, + m_out->m_lc_vfs_umount.namelen); + + return CT_DONE; +} + +static void +vfs_umount_in(struct trace_proc * proc, const message * m_out, + const message * __unused m_in, int failed) +{ + + put_result(proc); + + if (!failed) { + put_open(proc, NULL, 0, "(", ", "); + put_buf(proc, "label", PF_STRING, m_out->m_lc_vfs_umount.label, + m_out->m_lc_vfs_umount.labellen); + + put_close(proc, ")"); + } +} + + +static const struct flags access_flags[] = { + FLAG_ZERO(F_OK), + FLAG(R_OK), + FLAG(W_OK), + FLAG(X_OK), +}; + +static int +vfs_access_out(struct trace_proc * proc, const message * m_out) +{ + + put_path(proc, m_out); + put_flags(proc, "mode", access_flags, COUNT(access_flags), "0x%x", + m_out->m_lc_vfs_path.mode); + + return CT_DONE; +} + +static int +vfs_readlink_out(struct trace_proc * proc, const message * m_out) +{ + + put_buf(proc, "path", PF_PATH, m_out->m_lc_vfs_readlink.name, + m_out->m_lc_vfs_readlink.namelen); + + return CT_NOTDONE; +} + +static void +vfs_readlink_in(struct trace_proc * proc, const message * m_out, + const message * m_in, int failed) +{ + + /* The call does not return a string, so do not use PF_STRING here. */ + put_buf(proc, "buf", failed, m_out->m_lc_vfs_readlink.buf, + m_in->m_type); + put_value(proc, "bufsize", "%zd", m_out->m_lc_vfs_readlink.bufsize); + put_equals(proc); + put_result(proc); +} + +static void +put_struct_stat(struct trace_proc * proc, const char * name, int flags, + vir_bytes addr) +{ + struct stat buf; + int is_special; + + if (!put_open_struct(proc, name, flags, addr, &buf, sizeof(buf))) + return; + + /* + * The combination of struct stat's frequent usage and large number of + * fields makes this structure a pain to print. For now, the idea is + * that for verbosity level 0, we print the mode, and the target device + * for block/char special files or the file size for all other files. + * For higher verbosity levels, largely maintain the structure's own + * order of fields. Violate this general structure printing rule for + * some fields though, because the actual field order in struct stat is + * downright ridiculous. Like elsewhere, for verbosity level 1 print + * all fields with meaningful values, and for verbosity level 2 just + * print everything, including fields that are known to be not yet + * supported and fields that contain known values. + */ + is_special = (S_ISBLK(buf.st_mode) || S_ISCHR(buf.st_mode)); + + if (verbose > 0) { + put_dev(proc, "st_dev", buf.st_dev); + put_value(proc, "st_ino", "%"PRId64, buf.st_ino); + } + put_mode(proc, "st_mode", buf.st_mode); + if (verbose > 0) { + put_value(proc, "st_nlink", "%u", buf.st_nlink); + put_value(proc, "st_uid", "%u", buf.st_uid); + put_value(proc, "st_gid", "%u", buf.st_gid); + } + if (is_special || verbose > 1) + put_dev(proc, "st_rdev", buf.st_rdev); + if (verbose > 0) { + /* + * TODO: print the nanosecond part, but possibly only if we are + * not actually interpreting the time as a date (another TODO), + * and/or possibly only with verbose > 1 (largely unsupported). + */ + put_time(proc, "st_atime", buf.st_atime); + put_time(proc, "st_mtime", buf.st_mtime); + put_time(proc, "st_ctime", buf.st_ctime); + } + if (verbose > 1) /* not yet supported on MINIX3 */ + put_time(proc, "st_birthtime", buf.st_birthtime); + if (!is_special || verbose > 1) + put_value(proc, "st_size", "%"PRId64, buf.st_size); + if (verbose > 0) { + put_value(proc, "st_blocks", "%"PRId64, buf.st_blocks); + put_value(proc, "st_blksize", "%"PRId32, buf.st_blksize); + } + if (verbose > 1) { + put_value(proc, "st_flags", "%"PRIu32, buf.st_flags); + put_value(proc, "st_gen", "%"PRIu32, buf.st_gen); + } + + put_close_struct(proc, verbose > 1); +} + +static int +vfs_stat_out(struct trace_proc * proc, const message * m_out) +{ + + put_buf(proc, "path", PF_PATH, m_out->m_lc_vfs_stat.name, + m_out->m_lc_vfs_stat.len); + + return CT_NOTDONE; +} + +static void +vfs_stat_in(struct trace_proc * proc, const message * m_out, + const message * __unused m_in, int failed) +{ + + put_struct_stat(proc, "buf", failed, m_out->m_lc_vfs_stat.buf); + put_equals(proc); + put_result(proc); +} + +static int +vfs_fstat_out(struct trace_proc * proc, const message * m_out) +{ + + put_fd(proc, "fd", m_out->m_lc_vfs_fstat.fd); + + return CT_NOTDONE; +} + +static void +vfs_fstat_in(struct trace_proc * proc, const message * m_out, + const message * __unused m_in, int failed) +{ + + put_struct_stat(proc, "buf", failed, m_out->m_lc_vfs_fstat.buf); + put_equals(proc); + put_result(proc); +} + +static int +vfs_ioctl_out(struct trace_proc * proc, const message * m_out) +{ + + put_fd(proc, "fd", m_out->m_lc_vfs_ioctl.fd); + put_ioctl_req(proc, "req", m_out->m_lc_vfs_ioctl.req, + FALSE /*is_svrctl*/); + return put_ioctl_arg_out(proc, "arg", m_out->m_lc_vfs_ioctl.req, + (vir_bytes)m_out->m_lc_vfs_ioctl.arg, FALSE /*is_svrctl*/); +} + +static void +vfs_ioctl_in(struct trace_proc * proc, const message * m_out, + const message * __unused m_in, int failed) +{ + + put_ioctl_arg_in(proc, "arg", failed, m_out->m_lc_vfs_ioctl.req, + (vir_bytes)m_out->m_lc_vfs_ioctl.arg, FALSE /*is_svrctl*/); +} + +static void +put_fcntl_cmd(struct trace_proc * proc, const char * name, int cmd) +{ + const char *text = NULL; + + if (!valuesonly) { + switch (cmd) { + TEXT(F_DUPFD); + TEXT(F_GETFD); + TEXT(F_SETFD); + TEXT(F_GETFL); + TEXT(F_SETFL); + TEXT(F_GETOWN); + TEXT(F_SETOWN); + TEXT(F_GETLK); + TEXT(F_SETLK); + TEXT(F_SETLKW); + TEXT(F_CLOSEM); + TEXT(F_MAXFD); + TEXT(F_DUPFD_CLOEXEC); + TEXT(F_GETNOSIGPIPE); + TEXT(F_SETNOSIGPIPE); + TEXT(F_FREESP); + TEXT(F_FLUSH_FS_CACHE); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%d", cmd); +} + +static const struct flags fd_flags[] = { + FLAG(FD_CLOEXEC), +}; + +#define put_fd_flags(p, n, v) \ + put_flags(p, n, fd_flags, COUNT(fd_flags), "0x%x", v) + +static void +put_flock_type(struct trace_proc * proc, const char * name, int type) +{ + const char *text = NULL; + + if (!valuesonly) { + switch (type) { + TEXT(F_RDLCK); + TEXT(F_UNLCK); + TEXT(F_WRLCK); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%d", type); +} + +/* + * With PF_FULL, also print l_pid, unless l_type is F_UNLCK in which case + * only that type is printed. With PF_ALT, print only l_whence/l_start/l_len. + */ +static void +put_struct_flock(struct trace_proc * proc, const char * name, int flags, + vir_bytes addr) +{ + struct flock flock; + int limited; + + if (!put_open_struct(proc, name, flags, addr, &flock, sizeof(flock))) + return; + + limited = ((flags & PF_FULL) && flock.l_type == F_UNLCK); + + if (!(flags & PF_ALT)) + put_flock_type(proc, "l_type", flock.l_type); + if (!limited) { + put_lseek_whence(proc, "l_whence", flock.l_whence); + put_value(proc, "l_start", "%"PRId64, flock.l_start); + put_value(proc, "l_len", "%"PRId64, flock.l_len); + if (flags & PF_FULL) + put_value(proc, "l_pid", "%d", flock.l_pid); + } + + put_close_struct(proc, TRUE /*all*/); +} + +static int +vfs_fcntl_out(struct trace_proc * proc, const message * m_out) +{ + int full; + + put_fd(proc, "fd", m_out->m_lc_vfs_fcntl.fd); + put_fcntl_cmd(proc, "cmd", m_out->m_lc_vfs_fcntl.cmd); + + switch (m_out->m_lc_vfs_fcntl.cmd) { + case F_DUPFD: + put_fd(proc, "fd2", m_out->m_lc_vfs_fcntl.arg_int); + break; + case F_SETFD: + put_fd_flags(proc, "flags", m_out->m_lc_vfs_fcntl.arg_int); + break; + case F_SETFL: + /* + * One of those difficult cases: the access mode is ignored, so + * we don't want to print O_RDONLY if it is not given. On the + * other hand, fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_..) is + * a fairly common construction, in which case we don't want to + * print eg O_..|0x2 if the access mode is O_RDWR. Thus, we + * compromise: show the access mode if any of its bits are set. + */ + put_open_flags(proc, "flags", m_out->m_lc_vfs_fcntl.arg_int, + m_out->m_lc_vfs_fcntl.arg_int & O_ACCMODE /*full*/); + break; + case F_SETLK: + case F_SETLKW: + put_struct_flock(proc, "lkp", 0, + m_out->m_lc_vfs_fcntl.arg_ptr); + break; + case F_FREESP: + put_struct_flock(proc, "lkp", PF_ALT, + m_out->m_lc_vfs_fcntl.arg_ptr); + break; + case F_SETNOSIGPIPE: + put_value(proc, "arg", "%d", m_out->m_lc_vfs_fcntl.arg_int); + break; + } + + return (m_out->m_lc_vfs_fcntl.cmd != F_GETLK) ? CT_DONE : CT_NOTDONE; +} + +static void +vfs_fcntl_in(struct trace_proc * proc, const message * m_out, + const message * m_in, int failed) +{ + + switch (m_out->m_lc_vfs_fcntl.cmd) { + case F_GETFD: + if (failed) + break; + put_fd_flags(proc, NULL, m_in->m_type); + return; + case F_GETFL: + if (failed) + break; + put_open_flags(proc, NULL, m_in->m_type, TRUE /*full*/); + return; + case F_GETLK: + put_struct_flock(proc, "lkp", failed | PF_FULL, + m_out->m_lc_vfs_fcntl.arg_ptr); + put_equals(proc); + break; + } + + put_result(proc); +} + +static int +vfs_pipe2_out(struct trace_proc * __unused proc, + const message * __unused m_out) +{ + + return CT_NOTDONE; +} + +static void +vfs_pipe2_in(struct trace_proc * proc, const message * m_out, + const message * m_in, int failed) +{ + + if (!failed) { + put_open(proc, "fd", PF_NONAME, "[", ", "); + put_fd(proc, "rfd", m_in->m_lc_vfs_pipe2.fd0); + put_fd(proc, "wfd", m_in->m_lc_vfs_pipe2.fd1); + put_close(proc, "]"); + } else + put_field(proc, "fd", "&.."); + put_open_flags(proc, "flags", m_out->m_lc_vfs_pipe2.flags, + FALSE /*full*/); + put_equals(proc); + put_result(proc); +} + +static int +vfs_umask_out(struct trace_proc * proc, const message * m_out) +{ + + put_mode(proc, NULL, m_out->m_lc_vfs_umask.mask); + + return CT_DONE; +} + +static void +vfs_umask_in(struct trace_proc * proc, const message * __unused m_out, + const message * m_in, int failed) +{ + + if (!failed) + put_mode(proc, NULL, m_in->m_type); + else + put_result(proc); + +} + +static void +put_dirent_type(struct trace_proc * proc, const char * name, unsigned int type) +{ + const char *text = NULL; + + if (!valuesonly) { + switch (type) { + TEXT(DT_UNKNOWN); + TEXT(DT_FIFO); + TEXT(DT_CHR); + TEXT(DT_DIR); + TEXT(DT_BLK); + TEXT(DT_REG); + TEXT(DT_LNK); + TEXT(DT_SOCK); + TEXT(DT_WHT); + } + } + + if (text != NULL) + put_field(proc, name, text); + else + put_value(proc, name, "%u", type); +} + +static void +put_struct_dirent(struct trace_proc * proc, const char *name, int flags, + vir_bytes addr) +{ + struct dirent dirent; + + if (!put_open_struct(proc, name, flags, addr, &dirent, sizeof(dirent))) + return; + + if (verbose > 0) + put_value(proc, "d_fileno", "%"PRIu64, dirent.d_fileno); + if (verbose > 1) { + put_value(proc, "d_reclen", "%u", dirent.d_reclen); + put_value(proc, "d_namlen", "%u", dirent.d_namlen); + } + if (verbose >= 1 + (dirent.d_type == DT_UNKNOWN)) + put_dirent_type(proc, "d_type", dirent.d_type); + put_buf(proc, "d_name", PF_LOCADDR, (vir_bytes)dirent.d_name, + MIN(dirent.d_namlen, sizeof(dirent.d_name))); + + put_close_struct(proc, verbose > 1); +} + +static void +put_dirent_array(struct trace_proc * proc, const char * name, int flags, + vir_bytes addr, ssize_t size) +{ + struct dirent dirent; + unsigned count, max; + ssize_t off, chunk; + + if ((flags & PF_FAILED) || valuesonly > 1 || size < 0) { + put_ptr(proc, name, addr); + + return; + } + + if (size == 0) { + put_field(proc, name, "[]"); + + return; + } + + if (verbose == 0) + max = 0; /* TODO: should we set this to 1 instead? */ + else if (verbose == 1) + max = 3; /* low; just to give an indication where we are */ + else + max = INT_MAX; + + /* + * TODO: as is, this is highly inefficient, as we are typically copying + * in the same pieces of memory in repeatedly.. + */ + count = 0; + for (off = 0; off < size; off += chunk) { + chunk = size - off; + if (chunk > sizeof(dirent)) + chunk = sizeof(dirent); + if (chunk < _DIRENT_MINSIZE(&dirent)) + break; + + if (mem_get_data(proc->pid, addr + off, &dirent, chunk) < 0) { + if (off == 0) { + put_ptr(proc, name, addr); + + return; + } + + break; + } + + if (off == 0) + put_open(proc, name, PF_NONAME, "[", ", "); + + if (count < max) + put_struct_dirent(proc, NULL, PF_LOCADDR, + (vir_bytes)&dirent); + + if (chunk > dirent.d_reclen) + chunk = dirent.d_reclen; + count++; + } + + if (off < size) + put_tail(proc, 0, 0); + else if (count > max) + put_tail(proc, count, max); + put_close(proc, "]"); +} + +static int +vfs_getdents_out(struct trace_proc * proc, const message * m_out) +{ + + put_fd(proc, "fd", m_out->m_lc_vfs_readwrite.fd); + + return CT_NOTDONE; +} + +static void +vfs_getdents_in(struct trace_proc * proc, const message * m_out, + const message * m_in, int failed) +{ + + put_dirent_array(proc, "buf", failed, m_out->m_lc_vfs_readwrite.buf, + m_in->m_type); + put_value(proc, "len", "%zu", m_out->m_lc_vfs_readwrite.len); + put_equals(proc); + put_result(proc); +} + +static void +put_fd_set(struct trace_proc * proc, const char * name, vir_bytes addr, + int nfds) +{ + fd_set set; + size_t off; + unsigned int i, j, words, count, max; + + if (addr == 0 || nfds < 0) { + put_ptr(proc, name, addr); + + return; + } + + /* + * Each process may define its own FD_SETSIZE, so our fd_set may be of + * a different size than theirs. Thus, we copy at a granularity known + * to be valid in any case: a single word of bits. We make the + * assumption that fd_set consists purely of bits, so that we can use + * the second (and so on) bit word as an fd_set by itself. + */ + words = (nfds + NFDBITS - 1) / NFDBITS; + + count = 0; + + if (verbose == 0) + max = 16; + else if (verbose == 1) + max = FD_SETSIZE; + else + max = INT_MAX; + + /* TODO: copy in more at once, but stick to fd_mask boundaries. */ + for (off = 0, i = 0; i < words; i++, off += sizeof(fd_mask)) { + if (mem_get_data(proc->pid, addr + off, &set, + sizeof(fd_mask)) != 0) { + if (count == 0) { + put_ptr(proc, name, addr); + + return; + } + + break; + } + + for (j = 0; j < NFDBITS; j++) { + if (FD_ISSET(j, &set)) { + if (count == 0) + put_open(proc, name, PF_NONAME, "[", + " "); + + if (count < max) + put_fd(proc, NULL, i * NFDBITS + j); + + count++; + } + } + } + + /* + * The empty set should print as "[]". If copying any part failed, it + * should print as "[x, ..(?)]" where x is the set printed so far, if + * any. If copying never failed, and we did not print all fds in the + * set, print the remaining count n as "[x, ..(+n)]" at the end. + */ + if (count == 0) + put_open(proc, name, PF_NONAME, "[", " "); + + if (i < words) + put_tail(proc, 0, 0); + else if (count > max) + put_tail(proc, count, max); + + put_close(proc, "]"); +} + +static int +vfs_select_out(struct trace_proc * proc, const message * m_out) +{ + int nfds; + + nfds = m_out->m_lc_vfs_select.nfds; + + put_fd(proc, "nfds", nfds); /* not really a file descriptor.. */ + put_fd_set(proc, "readfds", + (vir_bytes)m_out->m_lc_vfs_select.readfds, nfds); + put_fd_set(proc, "writefds", + (vir_bytes)m_out->m_lc_vfs_select.writefds, nfds); + put_fd_set(proc, "errorfds", + (vir_bytes)m_out->m_lc_vfs_select.errorfds, nfds); + put_struct_timeval(proc, "timeout", 0, m_out->m_lc_vfs_select.timeout); + + return CT_DONE; +} + +static void +vfs_select_in(struct trace_proc * proc, const message * m_out, + const message * __unused m_in, int failed) +{ + vir_bytes readfds, writefds, errorfds; + int nfds; + + put_result(proc); + if (failed) + return; + + nfds = m_out->m_lc_vfs_select.nfds; + + readfds = (vir_bytes)m_out->m_lc_vfs_select.readfds; + writefds = (vir_bytes)m_out->m_lc_vfs_select.writefds; + errorfds = (vir_bytes)m_out->m_lc_vfs_select.errorfds; + + if (readfds == 0 && writefds == 0 && errorfds == 0) + return; + + /* Omit names, because it looks weird. */ + put_open(proc, NULL, PF_NONAME, "(", ", "); + if (readfds != 0) + put_fd_set(proc, "readfds", readfds, nfds); + if (writefds != 0) + put_fd_set(proc, "writefds", writefds, nfds); + if (errorfds != 0) + put_fd_set(proc, "errorfds", errorfds, nfds); + put_close(proc, ")"); +} + +static int +vfs_fchdir_out(struct trace_proc * proc, const message * m_out) +{ + + put_fd(proc, "fd", m_out->m_lc_vfs_fchdir.fd); + + return CT_DONE; +} + +static int +vfs_fsync_out(struct trace_proc * proc, const message * m_out) +{ + + put_fd(proc, "fd", m_out->m_lc_vfs_fsync.fd); + + return CT_DONE; +} + +static int +vfs_truncate_out(struct trace_proc * proc, const message * m_out) +{ + + put_buf(proc, "path", PF_PATH, m_out->m_lc_vfs_truncate.name, + m_out->m_lc_vfs_truncate.len); + put_value(proc, "length", "%"PRId64, m_out->m_lc_vfs_truncate.offset); + + return CT_DONE; +} + +static int +vfs_ftruncate_out(struct trace_proc * proc, const message * m_out) +{ + + put_fd(proc, "fd", m_out->m_lc_vfs_truncate.fd); + put_value(proc, "length", "%"PRId64, m_out->m_lc_vfs_truncate.offset); + + return CT_DONE; +} + +static int +vfs_fchmod_out(struct trace_proc * proc, const message * m_out) +{ + + put_fd(proc, "fd", m_out->m_lc_vfs_fchmod.fd); + put_mode(proc, "mode", m_out->m_lc_vfs_fchmod.mode); + + return CT_DONE; +} + +static int +vfs_fchown_out(struct trace_proc * proc, const message * m_out) +{ + + put_fd(proc, "fd", m_out->m_lc_vfs_chown.fd); + /* -1 means "keep the current value" so print as signed */ + put_value(proc, "owner", "%d", m_out->m_lc_vfs_chown.owner); + put_value(proc, "group", "%d", m_out->m_lc_vfs_chown.group); + + return CT_DONE; +} + +static const char * +vfs_utimens_name(const message * m_out) +{ + int has_path, has_flags; + + has_path = (m_out->m_vfs_utimens.name != NULL); + has_flags = (m_out->m_vfs_utimens.flags != 0); + + if (has_path && m_out->m_vfs_utimens.flags == AT_SYMLINK_NOFOLLOW) + return "lutimens"; + if (has_path && !has_flags) + return "utimens"; + else if (!has_path && !has_flags) + return "futimens"; + else + return "utimensat"; +} + +static const struct flags at_flags[] = { + FLAG(AT_EACCESS), + FLAG(AT_SYMLINK_NOFOLLOW), + FLAG(AT_SYMLINK_FOLLOW), + FLAG(AT_REMOVEDIR), +}; + +static void +put_utimens_timespec(struct trace_proc * proc, const char * name, + time_t sec, long nsec) +{ + + /* No field names. */ + put_open(proc, name, PF_NONAME, "{", ", "); + + put_time(proc, "tv_sec", sec); + + if (!valuesonly && nsec == UTIME_NOW) + put_field(proc, "tv_nsec", "UTIME_NOW"); + else if (!valuesonly && nsec == UTIME_OMIT) + put_field(proc, "tv_nsec", "UTIME_OMIT"); + else + put_value(proc, "tv_nsec", "%ld", nsec); + + put_close(proc, "}"); +} + +static int +vfs_utimens_out(struct trace_proc * proc, const message * m_out) +{ + int has_path, has_flags; + + /* Here we do not care about the utimens/lutimens distinction. */ + has_path = (m_out->m_vfs_utimens.name != NULL); + has_flags = !!(m_out->m_vfs_utimens.flags & ~AT_SYMLINK_NOFOLLOW); + + if (has_path && has_flags) + put_field(proc, "fd", "AT_CWD"); /* utimensat */ + else if (!has_path) + put_fd(proc, "fd", m_out->m_vfs_utimens.fd); /* futimes */ + if (has_path || has_flags) /* lutimes, utimes, utimensat */ + put_buf(proc, "path", PF_PATH, + (vir_bytes)m_out->m_vfs_utimens.name, + m_out->m_vfs_utimens.len); + + put_open(proc, "times", 0, "[", ", "); + put_utimens_timespec(proc, "atime", m_out->m_vfs_utimens.atime, + m_out->m_vfs_utimens.ansec); + put_utimens_timespec(proc, "mtime", m_out->m_vfs_utimens.mtime, + m_out->m_vfs_utimens.mnsec); + put_close(proc, "]"); + + if (has_flags) + put_flags(proc, "flag", at_flags, COUNT(at_flags), "0x%x", + m_out->m_vfs_utimens.flags); + + return CT_DONE; +} + +static const struct flags statvfs_flags[] = { + FLAG(ST_WAIT), + FLAG(ST_NOWAIT), +}; + +static const struct flags st_flags[] = { + FLAG(ST_RDONLY), + FLAG(ST_SYNCHRONOUS), + FLAG(ST_NOEXEC), + FLAG(ST_NOSUID), + FLAG(ST_NODEV), + FLAG(ST_UNION), + FLAG(ST_ASYNC), + FLAG(ST_NOCOREDUMP), + FLAG(ST_RELATIME), + FLAG(ST_IGNORE), + FLAG(ST_NOATIME), + FLAG(ST_SYMPERM), + FLAG(ST_NODEVMTIME), + FLAG(ST_SOFTDEP), + FLAG(ST_LOG), + FLAG(ST_EXTATTR), + FLAG(ST_EXRDONLY), + FLAG(ST_EXPORTED), + FLAG(ST_DEFEXPORTED), + FLAG(ST_EXPORTANON), + FLAG(ST_EXKERB), + FLAG(ST_EXNORESPORT), + FLAG(ST_EXPUBLIC), + FLAG(ST_LOCAL), + FLAG(ST_QUOTA), + FLAG(ST_ROOTFS), + FLAG(ST_NOTRUNC), +}; + +static void +put_struct_statvfs(struct trace_proc * proc, const char * name, int flags, + vir_bytes addr) +{ + struct statvfs buf; + + if (!put_open_struct(proc, name, flags, addr, &buf, sizeof(buf))) + return; + + put_flags(proc, "f_flag", st_flags, COUNT(st_flags), "0x%x", + buf.f_flag); + put_value(proc, "f_bsize", "%lu", buf.f_bsize); + if (verbose > 0 || buf.f_bsize != buf.f_frsize) + put_value(proc, "f_frsize", "%lu", buf.f_frsize); + if (verbose > 1) + put_value(proc, "f_iosize", "%lu", buf.f_iosize); + + put_value(proc, "f_blocks", "%"PRIu64, buf.f_blocks); + put_value(proc, "f_bfree", "%"PRIu64, buf.f_bfree); + if (verbose > 1) { + put_value(proc, "f_bavail", "%"PRIu64, buf.f_bavail); + put_value(proc, "f_bresvd", "%"PRIu64, buf.f_bresvd); + } + + if (verbose > 0) { + put_value(proc, "f_files", "%"PRIu64, buf.f_files); + put_value(proc, "f_ffree", "%"PRIu64, buf.f_ffree); + } + if (verbose > 1) { + put_value(proc, "f_favail", "%"PRIu64, buf.f_favail); + put_value(proc, "f_fresvd", "%"PRIu64, buf.f_fresvd); + } + + if (verbose > 1) { + put_value(proc, "f_syncreads", "%"PRIu64, buf.f_syncreads); + put_value(proc, "f_syncwrites", "%"PRIu64, buf.f_syncwrites); + put_value(proc, "f_asyncreads", "%"PRIu64, buf.f_asyncreads); + put_value(proc, "f_asyncwrites", "%"PRIu64, buf.f_asyncwrites); + + put_value(proc, "f_fsidx", "<%"PRId32",%"PRId32">", + buf.f_fsidx.__fsid_val[0], buf.f_fsidx.__fsid_val[1]); + } + put_dev(proc, "f_fsid", buf.f_fsid); /* MINIX3 interpretation! */ + + if (verbose > 0) + put_value(proc, "f_namemax", "%lu", buf.f_namemax); + if (verbose > 1) + put_value(proc, "f_owner", "%u", buf.f_owner); + + put_buf(proc, "f_fstypename", PF_STRING | PF_LOCADDR, + (vir_bytes)&buf.f_fstypename, sizeof(buf.f_fstypename)); + if (verbose > 0) + put_buf(proc, "f_mntfromname", PF_STRING | PF_LOCADDR, + (vir_bytes)&buf.f_mntfromname, sizeof(buf.f_mntfromname)); + put_buf(proc, "f_mntonname", PF_STRING | PF_LOCADDR, + (vir_bytes)&buf.f_mntonname, sizeof(buf.f_mntonname)); + + put_close_struct(proc, verbose > 1); +} + +static void +put_statvfs_array(struct trace_proc * proc, const char * name, int flags, + vir_bytes addr, int count) +{ + struct statvfs buf; + int i, max; + + if ((flags & PF_FAILED) || valuesonly || count < 0) { + put_ptr(proc, name, addr); + + return; + } + + if (count == 0) { + put_field(proc, name, "[]"); + + return; + } + + if (verbose == 0) + max = 0; + else if (verbose == 1) + max = 1; /* TODO: is this reasonable? */ + else + max = INT_MAX; + + if (max > count) + max = count; + + for (i = 0; i < max; i++) { + if (mem_get_data(proc->pid, addr + i * sizeof(buf), &buf, + sizeof(buf)) < 0) { + if (i == 0) { + put_ptr(proc, name, addr); + + return; + } + + break; + } + + if (i == 0) + put_open(proc, name, PF_NONAME, "[", ", "); + + put_struct_statvfs(proc, NULL, PF_LOCADDR, (vir_bytes)&buf); + } + + if (i == 0) + put_open(proc, name, PF_NONAME, "[", ", "); + if (i < max) + put_tail(proc, 0, 0); + else if (count > i) + put_tail(proc, count, i); + put_close(proc, "]"); +} + +static int +vfs_getvfsstat_out(struct trace_proc * proc, const message * m_out) +{ + + if (m_out->m_lc_vfs_getvfsstat.buf == 0) { + put_ptr(proc, "buf", m_out->m_lc_vfs_getvfsstat.buf); + put_value(proc, "bufsize", "%zu", + m_out->m_lc_vfs_getvfsstat.len); + put_flags(proc, "flags", statvfs_flags, COUNT(statvfs_flags), + "%d", m_out->m_lc_vfs_getvfsstat.flags); + return CT_DONE; + } else + return CT_NOTDONE; +} + +static void +vfs_getvfsstat_in(struct trace_proc * proc, const message * m_out, + const message * m_in, int failed) +{ + + if (m_out->m_lc_vfs_getvfsstat.buf != 0) { + put_statvfs_array(proc, "buf", failed, + m_out->m_lc_vfs_getvfsstat.buf, m_in->m_type); + put_value(proc, "bufsize", "%zu", + m_out->m_lc_vfs_getvfsstat.len); + put_flags(proc, "flags", statvfs_flags, COUNT(statvfs_flags), + "%d", m_out->m_lc_vfs_getvfsstat.flags); + put_equals(proc); + } + put_result(proc); +} + +static int +vfs_statvfs1_out(struct trace_proc * proc, const message * m_out) +{ + + put_buf(proc, "path", PF_PATH, m_out->m_lc_vfs_statvfs1.name, + m_out->m_lc_vfs_statvfs1.len); + + return CT_NOTDONE; +} + +static void +vfs_statvfs1_in(struct trace_proc * proc, const message * m_out, + const message * __unused m_in, int failed) +{ + + put_struct_statvfs(proc, "buf", failed, m_out->m_lc_vfs_statvfs1.buf); + put_flags(proc, "flags", statvfs_flags, COUNT(statvfs_flags), "%d", + m_out->m_lc_vfs_statvfs1.flags); + put_equals(proc); + put_result(proc); +} + +/* This function is shared between statvfs1 and fstatvfs1. */ +static int +vfs_fstatvfs1_out(struct trace_proc * proc, const message * m_out) +{ + + put_fd(proc, "fd", m_out->m_lc_vfs_statvfs1.fd); + + return CT_NOTDONE; +} + +static int +vfs_getrusage_out(struct trace_proc * __unused proc, + const message * __unused m_out) +{ + + return CT_NOTDONE; +} + +static void +vfs_getrusage_in(struct trace_proc * proc, const message * m_out, + const message * __unused m_in, int failed) +{ + struct rusage buf; + + /* Inline; we will certainly not be reusing this anywhere else. */ + if (put_open_struct(proc, "rusage", failed, + m_out->m_lc_vfs_rusage.addr, &buf, sizeof(buf))) { + /* Reason for hiding these two better: they're always zero. */ + if (verbose > 1) { + put_value(proc, "ru_inblock", "%ld", buf.ru_inblock); + put_value(proc, "ru_oublock", "%ld", buf.ru_oublock); + } + if (verbose > 0) { + put_value(proc, "ru_ixrss", "%ld", buf.ru_ixrss); + put_value(proc, "ru_idrss", "%ld", buf.ru_idrss); + put_value(proc, "ru_isrss", "%ld", buf.ru_isrss); + } + + put_close_struct(proc, verbose > 1); + } + put_equals(proc); + put_result(proc); +} + +static int +vfs_svrctl_out(struct trace_proc * proc, const message * m_out) +{ + + put_ioctl_req(proc, "request", m_out->m_lc_svrctl.request, + TRUE /*is_svrctl*/); + return put_ioctl_arg_out(proc, "arg", m_out->m_lc_svrctl.request, + m_out->m_lc_svrctl.arg, TRUE /*is_svrctl*/); +} + +static void +vfs_svrctl_in(struct trace_proc * proc, const message * m_out, + const message * __unused m_in, int failed) +{ + + put_ioctl_arg_in(proc, "arg", failed, m_out->m_lc_svrctl.request, + m_out->m_lc_svrctl.arg, TRUE /*is_svrctl*/); +} + +static int +vfs_gcov_flush_out(struct trace_proc * proc, const message * m_out) +{ + + put_ptr(proc, "buff", m_out->m_lc_vfs_gcov.buff_p); + put_value(proc, "buff_sz", "%zu", m_out->m_lc_vfs_gcov.buff_sz); + put_value(proc, "server_pid", "%d", m_out->m_lc_vfs_gcov.pid); + + return CT_DONE; +} + +#define VFS_CALL(c) [((VFS_ ## c) - VFS_BASE)] + +static const struct call_handler vfs_map[] = { + VFS_CALL(READ) = HANDLER("read", vfs_read_out, vfs_read_in), + VFS_CALL(WRITE) = HANDLER("write", vfs_write_out, default_in), + VFS_CALL(LSEEK) = HANDLER("lseek", vfs_lseek_out, vfs_lseek_in), + VFS_CALL(OPEN) = HANDLER("open", vfs_open_out, vfs_open_in), + VFS_CALL(CREAT) = HANDLER("open", vfs_creat_out, vfs_open_in), + VFS_CALL(CLOSE) = HANDLER("close", vfs_close_out, default_in), + VFS_CALL(LINK) = HANDLER("link", vfs_link_out, default_in), + VFS_CALL(UNLINK) = HANDLER("unlink", vfs_path_out, default_in), + VFS_CALL(CHDIR) = HANDLER("chdir", vfs_path_out, default_in), + VFS_CALL(MKDIR) = HANDLER("mkdir", vfs_path_mode_out, default_in), + VFS_CALL(MKNOD) = HANDLER("mknod", vfs_mknod_out, default_in), + VFS_CALL(CHMOD) = HANDLER("chmod", vfs_path_mode_out, default_in), + VFS_CALL(CHOWN) = HANDLER("chown", vfs_chown_out, default_in), + VFS_CALL(MOUNT) = HANDLER("mount", vfs_mount_out, default_in), + VFS_CALL(UMOUNT) = HANDLER("umount", vfs_umount_out, vfs_umount_in), + VFS_CALL(ACCESS) = HANDLER("access", vfs_access_out, default_in), + VFS_CALL(SYNC) = HANDLER("sync", default_out, default_in), + VFS_CALL(RENAME) = HANDLER("rename", vfs_link_out, default_in), + VFS_CALL(RMDIR) = HANDLER("rmdir", vfs_path_out, default_in), + VFS_CALL(SYMLINK) = HANDLER("symlink", vfs_link_out, default_in), + VFS_CALL(READLINK) = HANDLER("readlink", vfs_readlink_out, + vfs_readlink_in), + VFS_CALL(STAT) = HANDLER("stat", vfs_stat_out, vfs_stat_in), + VFS_CALL(FSTAT) = HANDLER("fstat", vfs_fstat_out, vfs_fstat_in), + VFS_CALL(LSTAT) = HANDLER("lstat", vfs_stat_out, vfs_stat_in), + VFS_CALL(IOCTL) = HANDLER("ioctl", vfs_ioctl_out, vfs_ioctl_in), + VFS_CALL(FCNTL) = HANDLER("fcntl", vfs_fcntl_out, vfs_fcntl_in), + VFS_CALL(PIPE2) = HANDLER("pipe2", vfs_pipe2_out, vfs_pipe2_in), + VFS_CALL(UMASK) = HANDLER("umask", vfs_umask_out, vfs_umask_in), + VFS_CALL(CHROOT) = HANDLER("chroot", vfs_path_out, default_in), + VFS_CALL(GETDENTS) = HANDLER("getdents", vfs_getdents_out, + vfs_getdents_in), + VFS_CALL(SELECT) = HANDLER("select", vfs_select_out, vfs_select_in), + VFS_CALL(FCHDIR) = HANDLER("fchdir", vfs_fchdir_out, default_in), + VFS_CALL(FSYNC) = HANDLER("fsync", vfs_fsync_out, default_in), + VFS_CALL(TRUNCATE) = HANDLER("truncate", vfs_truncate_out, default_in), + VFS_CALL(FTRUNCATE) = HANDLER("ftruncate", vfs_ftruncate_out, + default_in), + VFS_CALL(FCHMOD) = HANDLER("fchmod", vfs_fchmod_out, default_in), + VFS_CALL(FCHOWN) = HANDLER("fchown", vfs_fchown_out, default_in), + VFS_CALL(UTIMENS) = HANDLER_NAME(vfs_utimens_name, vfs_utimens_out, + default_in), + VFS_CALL(GETVFSSTAT) = HANDLER("getvfsstat", vfs_getvfsstat_out, + vfs_getvfsstat_in), + VFS_CALL(STATVFS1) = HANDLER("statvfs1", vfs_statvfs1_out, + vfs_statvfs1_in), + VFS_CALL(FSTATVFS1) = HANDLER("fstatvfs1", vfs_fstatvfs1_out, + vfs_statvfs1_in), + VFS_CALL(GETRUSAGE) = HANDLER("vfs_getrusage", vfs_getrusage_out, + vfs_getrusage_in), + VFS_CALL(SVRCTL) = HANDLER("vfs_svrctl", vfs_svrctl_out, + vfs_svrctl_in), + VFS_CALL(GCOV_FLUSH) = HANDLER("gcov_flush", vfs_gcov_flush_out, + default_in), +}; + +const struct calls vfs_calls = { + .endpt = VFS_PROC_NR, + .base = VFS_BASE, + .map = vfs_map, + .count = COUNT(vfs_map) +}; diff --git a/minix/usr.bin/trace/service/vm.c b/minix/usr.bin/trace/service/vm.c new file mode 100644 index 000000000..43d54239c --- /dev/null +++ b/minix/usr.bin/trace/service/vm.c @@ -0,0 +1,135 @@ + +#include "inc.h" + +#include +#include + +static int +vm_brk_out(struct trace_proc * proc, const message * m_out) +{ + + put_ptr(proc, "addr", (vir_bytes)m_out->m_lc_vm_brk.addr); + + return CT_DONE; +} + +static const struct flags mmap_prot[] = { + FLAG_ZERO(PROT_NONE), + FLAG(PROT_READ), + FLAG(PROT_WRITE), + FLAG(PROT_EXEC), +}; + +static const struct flags mmap_flags[] = { + FLAG(MAP_SHARED), + FLAG(MAP_PRIVATE), + FLAG(MAP_FIXED), + FLAG(MAP_RENAME), + FLAG(MAP_NORESERVE), + FLAG(MAP_INHERIT), + FLAG(MAP_HASSEMAPHORE), + FLAG(MAP_TRYFIXED), + FLAG(MAP_WIRED), + FLAG_MASK(MAP_ANON | MAP_STACK, MAP_FILE), + FLAG(MAP_ANON), + FLAG(MAP_STACK), + FLAG(MAP_UNINITIALIZED), + FLAG(MAP_PREALLOC), + FLAG(MAP_CONTIG), + FLAG(MAP_LOWER16M), + FLAG(MAP_LOWER1M), + FLAG(MAP_THIRDPARTY), + /* TODO: interpret alignments for which there is no constant */ + FLAG_MASK(MAP_ALIGNMENT_MASK, MAP_ALIGNMENT_64KB), + FLAG_MASK(MAP_ALIGNMENT_MASK, MAP_ALIGNMENT_16MB), + FLAG_MASK(MAP_ALIGNMENT_MASK, MAP_ALIGNMENT_4GB), + FLAG_MASK(MAP_ALIGNMENT_MASK, MAP_ALIGNMENT_1TB), + FLAG_MASK(MAP_ALIGNMENT_MASK, MAP_ALIGNMENT_256TB), + FLAG_MASK(MAP_ALIGNMENT_MASK, MAP_ALIGNMENT_64PB), +}; + +static int +vm_mmap_out(struct trace_proc * proc, const message * m_out) +{ + + if (m_out->m_mmap.flags & MAP_THIRDPARTY) + put_endpoint(proc, "forwhom", m_out->m_mmap.forwhom); + put_ptr(proc, "addr", (vir_bytes)m_out->m_mmap.addr); + put_value(proc, "len", "%zu", m_out->m_mmap.len); + put_flags(proc, "prot", mmap_prot, COUNT(mmap_prot), "0x%x", + m_out->m_mmap.prot); + put_flags(proc, "flags", mmap_flags, COUNT(mmap_flags), "0x%x", + m_out->m_mmap.flags); + put_fd(proc, "fd", m_out->m_mmap.fd); + put_value(proc, "offset", "%"PRId64, m_out->m_mmap.offset); + + return CT_DONE; +} + +static void +vm_mmap_in(struct trace_proc * proc, const message * __unused m_out, + const message * m_in, int failed) +{ + + if (!failed) + put_ptr(proc, NULL, (vir_bytes)m_in->m_mmap.retaddr); + else + /* TODO: consider printing MAP_FAILED in the right cases */ + put_result(proc); +} + +static int +vm_munmap_out(struct trace_proc * proc, const message * m_out) +{ + + put_ptr(proc, "addr", (vir_bytes)m_out->m_mmap.addr); + put_value(proc, "len", "%zu", m_out->m_mmap.len); + + return CT_DONE; +} + +static int +vm_getrusage_out(struct trace_proc * __unused proc, + const message * __unused m_out) +{ + + return CT_NOTDONE; +} + +static void +vm_getrusage_in(struct trace_proc * proc, const message * m_out, + const message * __unused m_in, int failed) +{ + struct rusage buf; + + /* Inline; we will certainly not be reusing this anywhere else. */ + if (put_open_struct(proc, "rusage", failed, + m_out->m_lc_vm_rusage.addr, &buf, sizeof(buf))) { + if (verbose > 0) { + put_value(proc, "ru_maxrss", "%ld", buf.ru_maxrss); + put_value(proc, "ru_minflt", "%ld", buf.ru_minflt); + put_value(proc, "ru_majflt", "%ld", buf.ru_majflt); + } + + put_close_struct(proc, verbose > 0); + } + put_equals(proc); + put_result(proc); +} + +#define VM_CALL(c) [((VM_ ## c) - VM_RQ_BASE)] + +static const struct call_handler vm_map[] = { + VM_CALL(BRK) = HANDLER("brk", vm_brk_out, default_in), + VM_CALL(MMAP) = HANDLER("mmap", vm_mmap_out, vm_mmap_in), + VM_CALL(MUNMAP) = HANDLER("munmap", vm_munmap_out, default_in), + VM_CALL(GETRUSAGE) = HANDLER("vm_getrusage", vm_getrusage_out, + vm_getrusage_in), +}; + +const struct calls vm_calls = { + .endpt = VM_PROC_NR, + .base = VM_RQ_BASE, + .map = vm_map, + .count = COUNT(vm_map) +}; diff --git a/minix/usr.bin/trace/signal.awk b/minix/usr.bin/trace/signal.awk new file mode 100644 index 000000000..84c54b80c --- /dev/null +++ b/minix/usr.bin/trace/signal.awk @@ -0,0 +1,32 @@ +# This one is a bit trickier than error.awk, because sys/signal.h is not as +# easy to parse. We currently assume that all (userland) signals are listed +# before the first reference to "_KERNEL", and anything else that looks like a +# signal definition (but isn't) is after that first reference. + +BEGIN { + printf("/* This file is automatically generated by signal.awk */\n\n"); + printf("#include \"inc.h\"\n\n"); + printf("static const char *const signals[] = {\n"); +} +/^#define/ { + name = $2; + if (!match(name, "SIG[^_]")) + next; + number = $3; + if (number < 0 || number == "SIGABRT") + next; + printf("\t[%s] = \"%s\",\n", name, name); +} +/_KERNEL/ { + exit; +} +END { + printf("};\n\n"); + printf("const char *\nget_signal_name(int sig)\n{\n\n"); + printf("\tif (sig >= 0 && sig < sizeof(signals) / sizeof(signals[0]) &&\n"); + printf("\t signals[sig] != NULL)\n"); + printf("\t\treturn signals[sig];\n"); + printf("\telse\n"); + printf("\t\treturn NULL;\n"); + printf("}\n"); +} diff --git a/minix/usr.bin/trace/trace.1 b/minix/usr.bin/trace/trace.1 new file mode 100644 index 000000000..239fa9710 --- /dev/null +++ b/minix/usr.bin/trace/trace.1 @@ -0,0 +1,334 @@ +.Dd November 2, 2014 +.Dt TRACE 1 +.Os +.Sh NAME +.Nm trace +.Nd print process system calls and signals +.Sh SYNOPSIS +.Nm +.Op Fl fgNsVv +.Op Fl o Ar file +.Op Fl p Ar pid +.Op Ar command +.Sh DESCRIPTION +The +.Nm +utility shows one or more processes to be traced. +For each traced process, +.Nm +prints the system calls the process makes and the signals +it receives. +The user can let +.Nm +start a +.Ar command +to be traced, and/or attach to one or more existing processes. +.Pp +The utility will run until no processes are left to trace, or until the user +presses the interrupt key (typically Ctrl-C). +Pressing this key once will cause all attached processes to be detached, with +the hope that the command that was started will also terminate cleanly from the +interruption. +Pressing the interrupt key once more kills the command that was started. +.Pp +The following options are available: +.Bl -tag -width XoXfileXX +.It Fl f +Follow forks. +Attach automatically to forked child processes. +Child processes of the started command will be treated as attached processes, +in that upon Ctrl-C presses they will be detached rather than killed. +.It Fl g +Enable call grouping. +With this option, the tracing engine tries to reduce noise from call preemption +by first polling the process that was active last. +This should reduce in cleaner output, but may also cause a single process to be +scheduled repeatedly and thus cause starvation. +.It Fl N +Print all names. +By default, the most structure fields are printed with their name. +This option enables printing of all available names, which also includes +system call parameter names. +This flag may be useful to figure out the meaning of a parameter, and for +automatic processing of the output. +.It Fl s +Print stack traces. +Each system call, and each signal arriving outside a system call, will be +preceded by a line showing the process's current stack trace. +For signals blocked by the target process, the stack trace may not be +meaningful. +Stack traces may not be supported on all platforms. +.It Fl V +Print values only. +If this flag is given once, numerical values will be printed instead of +string constants. +In addition, if it is given twice, the addresses of structures will be printed +instead of their contents. +.It Fl v +Increase verbosity. +By default, the output will be terse, in that not all structure fields are +shown, and strings and arrays are not always printed in full. +If this flag is provided once, more and longer output will be printed. +If it is provided twice, the tracer will print as much as possible. +.It Fl o Ar file +Redirect output. +By default, the output is sent to standard error. +With this option, the output is written to the given +.Ar file +instead. +.It Fl p Ar pid +Attach to a process. +This option makes +.Nm +attach to an existing process with process ID +.Ar pid . +This option may be used multiple times. +When attaching to one or more processes this way, starting a command becomes +optional. +.El +.Pp +If the user presses the information key (typically Ctrl-T), the list of traced +process along with their current status will be printed. +.Sh OUTPUT FORMAT +System calls are printed with the following general output format: +.Bd -literal -offset indent +.Sy name Ns ( Ns Sy parameters Ns ) = Sy result +.Ed +.Pp +Other informational lines may be printed about the status of the process. +These lines typically start with an uppercase letter, while system calls +always start with a lowercase letter or an underscore. +The following example shows the tracer output for a program that prints its +own user ID: +.Bd -literal -offset indent +Tracing printuid (pid 12685) +minix_getinfo() = 0 +getuid() = 0 (euid=1) +write(1, "My uid: 0\en", 10) = 10 +exit(0) +Process exited normally with code 0 +.Ed +.Pp +The first and last lines of the output provide status information about the +traced process. +Some calls return multiple results; extended results are printed in parentheses +after the primary call result, typically in +.Va name Ns = Ns Va value +format for clarity. +System calls that do not return on success, such as +.Fn exit , +are printed without the equals sign and result, unless they fail. +System call failure is printed according to POSIX conventions; that is, the +call is assumed to return -1 with the value of +.Va errno +printed in square brackets after it: +.Bd -literal -offset indent +setuid(0) = -1 [EPERM] +.Ed +.Pp +If a system call ends up in an IPC-level failure, the -1 value will be preceded +by an +.Dq Li +string. +However, this string will be omitted if the system call itself is printed at +the IPC level (that is, as an +.Fn ipc_sendrec +call), generally because +.Nm +has no handler to print the actual system call. +.Pp +Signals are printed as they arrive at the traced process, using two asterisks +on both side of the signal name. +Signals may arrive both during and outside the execution of a system call: +.Bd -literal -offset indent +read(3, ** SIGUSR1 ** &0xeffff867, 4096) = -1 [EINTR] +** SIGUSR2 ** +getpid() = 5278 (ppid=5277) +kill(5278, SIGTERM) = ** SIGTERM ** <..> +Process terminated from signal SIGTERM +.Ed +.Pp +Multiple signals may be printed consecutively. +The above example illustrates a few other important aspects of output +formatting. +Some call parameters may be printed only after the system call returns, in +order to show their actual value. +For the +.Fn read +call, this would be the bytes that were read. +Upon failure, no bytes were read, so the buffer pointer is printed instead. +Finally, if a call that is expected to return (here, +.Fn kill ) +does not return before the process terminates, the line ends with a +.Dq Li <..> +marker. +This is an instance of call preemption; more about that later. +.Pp +Pointers are printed with a +.Sq Li & +prefix, except for NULL, which is printed using its own name. +In general, named constants are used instead of numerical constants wherever +that makes sense. +For pointers of which the address is not available, typically because its +contents are passed by value, +.Dq Li &.. +is shown instead. +.Pp +Data buffers are printed as double-quoted strings, using C-style character +escaping for nontextual bytes. +If either the verbosity level or a copy error prevents the whole data buffer +from being printed, two dots will be printed after the closing quote. +The same is done when printing a string buffer which does not have a null +termination byte within its range. +Path names are shown in full regardless of the verbosity level. +.Pp +Structures are printed as a set of structure fields enclosed in curly brackets. +The +.Va name Ns = Ns Va value +format is used, unless printing names for that structure type would introduce +too much noise and the +.Dq print all names +option is not given. +For many structures, by default only a subset of their fields are printed. +In this case, a +.Dq Li .. +entry is added at the end. +In some cases, an attempt is made to print only the most useful fields: +.Bd -literal -offset indent +stat("/etc/motd", {st_mode=S_IFREG|0755, st_size=747, ..}) = 0 +stat("/dev/tty", {st_mode=S_IFCHR|0666, st_rdev=<5,0>, ..}) = 0 +.Ed +.Pp +As shown in the above example, flag fields are printed as a combination of +named constants, separated by a +.Sq Li | +pipe symbol. +Any leftover numerical bits are printed at the end. +The example also shows the format in which major/minor pairs are printed for +device numbers. +This is a custom format; there are a few other custom formats throughout the +.Nm +output which are supposed to be sufficiently self-explanatory (and rare). +.Pp +Arrays are printed using square brackets. +.Bd -literal -offset indent +pipe2([3, 4], 0) = 0 +getdents(3, [..(45)], 4096) = 1824 +getdents(3, [{d_name="."}, ..(+44)], 4096) = 1824 +getdents(3, [], 4096) = 0 +.Ed +.Pp +If the array contents are not printed as per the settings for the verbosity +level, a single pseudo-element shows how many actual elements were in the array +(the second line in the example). +If the number of printed elements is limited, a final pseudo-element shows how +many additional elements were not printed (the third line in the example). +If a copy error occurs while part of the array has been printed already, a +last +.Dq Li ..(?) +pseudo-element is printed; for immediate failure, the array's pointer is shown. +Empty arrays will be printed as +.Dq Li [] . +.Pp +Bit sets are printed as arrays except with just a space and no comma as +bit separator, closely following the output format of +.Nm Ns 's +original inspiration +.Sy strace . +For signal sets in particular, an inverted bit set may be shown, thus printing +only the bits which are not set; such sets are prefixed with a +.Sq Li ~ +to the opening bracket: +.Bd -literal -offset indent +sigprocmask(SIG_SETMASK, ~[USR1 USR2], []) = 0 +.Ed +.Pp +Note how the +.Dq Li SIG +prefixes are omitted for brevity in this case. +.Pp +When multiple processes are traced at once, each line will have a prefix that +shows the PID of the corresponding process. +When the number of processes drops to one again, one more line is prefixed with +the PID of the remaining process, but using a +.Sq Li ' +instead of a +.Sq Li | +symbol: +.Bd -literal -offset indent +fork() = 25813 +25813| Tracing test*F (pid 25813) +25813| fork() = 0 +25812| waitpid(-1, &.., WNOHANG) = 0 +25813| exit(1) +25813| Process exited normally with code 1 +25812' waitpid(-1, W_EXITED(1), WNOHANG) = 25813 +exit(0) +Process exited normally with code 0 +.Ed +.Pp +If a process is preempted while making a system call, the system call will +be shown as suspended with the +.Dq Li <..> +suffix. +Later, when the system call is resumed, the output so far will be repeated, +either in full or (due to memory limitations) with +.Dq Li <..> +in its body, before the remaining part of the system call is printed. +This time, the line will have a +.Sq Li * +asterisk in its prefix, to indicate that this is not a new system call: +.Bd -literal -offset indent +25812| write(1, "test\en", 5) = <..> +25813| setuid(0) = 0 +25812|*write(1, "test\en", 5) = 5 +.Ed +.Pp +Finally, +.Nm +prints three dashes on their own line whenever the process context (program +counter and/or stack pointer) is changed during a system call. +This feature intends to help identify blocks of code run from signal handlers. +The following example shows a SIGALRM signal handler being invoked. +.Bd -literal -offset indent +sigsuspend([]) = ** SIGALRM ** -1 [EINTR] +--- +sigprocmask(SIG_SETMASK, ~[], [ALRM]) = 0 +sigreturn({sc_mask=[], ..}) +--- +exit(0) +.Ed +.Pp +However, the three dashes are not printed when a signal handler is invoked +while the program is not in a system call, because the tracer does not see such +invocations. +It is however also printed for successful +.Fn execve +calls. +.Sh DIAGNOSTICS +.Ex +.Sh SEE ALSO +.Xr ptrace 2 +.Sh AUTHORS +The +.Nm +utility was written by +.An David van Moolenbroek +.Aq david@minix3.org . +.Sh BUGS +While the utility aims to provide output for all system calls that can possibly +be made by user programs, output printers for a small number of rarely-used +structures and IOCTLs are still missing. In such cases, plain pointers will be +printed instead of actual contents. +.Pp +A signal arrives at the tracing process when sent to the target process, even +when the target process is blocking the signal and will thus receive it later. +This is a limitation of the ptrace infrastructure, although it does ensure that +a target process is not able to block signals generated for tracing purposes. +The result is that signals are not always shown at the time that they are +taken in by the target process, and that stack traces for signals may be off. +.Pp +Attaching to system services is currently not supported, due to limitations of +the ptrace infrastructure. The +.Nm +utility will detect and safely detach from system services, though. diff --git a/minix/usr.bin/trace/trace.c b/minix/usr.bin/trace/trace.c new file mode 100644 index 000000000..32ae37343 --- /dev/null +++ b/minix/usr.bin/trace/trace.c @@ -0,0 +1,817 @@ +/* trace(1) - the MINIX3 system call tracer - by D.C. van Moolenbroek */ + +#include "inc.h" + +#include +#include +#include +#include + +/* Global variables, used only for a subset of the command line options. */ +int allnames; /* FALSE = structure field names, TRUE = all names */ +unsigned int valuesonly; /* 0 = normal, 1 = no symbols, 2 = no structures */ +unsigned int verbose; /* 0 = essentials, 1 = elaborate, 2 = everything */ + +/* Local variables, for signal handling. */ +static int got_signal, got_info; + +/* + * Signal handler for signals that are supposed to make us terminate. Let the + * main loop do the actual work, since it might be in the middle of processing + * a process status change right now. + */ +static void +sig_handler(int __unused sig) +{ + + got_signal = TRUE; + +} + +/* + * Signal handler for the SIGINFO signal. Let the main loop report on all + * processes currenty being traced. Since SIGINFO is sent to the current + * process group, traced children may get the signal as well. This is both + * intentional and impossible to prevent. + */ +static void +info_handler(int __unused sig) +{ + + got_info = TRUE; +} + +/* + * Print a list of traced processes and their call status. We must not + * interfere with actual process output, so perform out-of-band printing + * (with info lines rather than lines prefixed by each process's PID). + */ +static void +list_info(void) +{ + struct trace_proc *proc; + int no_call, in_call; + + put_newline(); + + for (proc = proc_next(NULL); proc != NULL; proc = proc_next(proc)) { + /* + * When attaching to an existing process, there is no way to + * find out whether the process is in a system call or not. + */ + no_call = (proc->trace_flags & TF_NOCALL); + in_call = (proc->trace_flags & TF_INCALL); + assert(!in_call || !no_call); + + put_fmt(NULL, "Tracing %s (pid %d), %s%s%s", proc->name, + proc->pid, no_call ? "call status unknown" : + (in_call ? "in a " : "not in a call"), + in_call ? call_name(proc) : "", + in_call ? " call" : ""); + put_newline(); + } +} + +/* + * Either we have just started or attached to the given process, it the process + * has performed a successful execve() call. Obtain the new process name, and + * print a banner for it. + */ +static void +new_exec(struct trace_proc * proc) +{ + + /* Failure to obtain the process name is worrisome, but not fatal.. */ + if (kernel_get_name(proc->pid, proc->name, sizeof(proc->name)) < 0) + strlcpy(proc->name, "", sizeof(proc->name)); + + put_newline(); + put_fmt(proc, "Tracing %s (pid %d)", proc->name, proc->pid); + put_newline(); +} + +/* + * We have started or attached to a process. Set the appropriate flags, and + * print a banner showing that we are now tracing it. + */ +static void +new_proc(struct trace_proc * proc, int follow_fork) +{ + int fl; + + /* Set the desired tracing options. */ + fl = TO_ALTEXEC; + if (follow_fork) fl |= TO_TRACEFORK; + + (void)ptrace(T_SETOPT, proc->pid, 0, fl); + + /* + * When attaching to an arbitrary process, this process might be in the + * middle of an execve(). Now that we have enabled TO_ALTEXEC, we may + * now get a SIGSTOP signal next. Guard against this by marking the + * first system call as a possible execve(). + */ + if ((proc->trace_flags & (TF_ATTACH | TF_STOPPING)) == TF_ATTACH) + proc->trace_flags |= TF_EXEC; + + new_exec(proc); +} + +/* + * A process has terminated or is being detached. Print the resulting status. + */ +static void +discard_proc(struct trace_proc * proc, int status) +{ + const char *signame; + + /* + * The exit() calls are of type no-return, meaning they are expected + * not to return. However, calls of this type may in fact return an + * error, in which case the error must be printed. Thus, such calls + * are not actually finished until the end of the call-leave phase. + * For exit() calls, a successful call will never get to the call-leave + * phase. The result is that such calls will end up being shown as + * suspended, which is unintuitive. To counter this, we pretend that a + * clean process exit is in fact preceded by a call-leave event, thus + * allowing the call to be printed without suspension. An example: + * + * 3| exit(0) <..> + * 2| setsid() = 2 + * [A] 3| exit(0) + * 3| Process exited normally with code 0 + * + * The [A] line is the result of the following code. + */ + if (WIFEXITED(status) && (proc->trace_flags & TF_INCALL)) + call_leave(proc, TRUE /*skip*/); + + put_newline(); + if (WIFEXITED(status)) { + put_fmt(proc, "Process exited normally with code %d", + WEXITSTATUS(status)); + } else if (WIFSIGNALED(status)) { + if ((signame = get_signal_name(WTERMSIG(status))) != NULL) + put_fmt(proc, "Process terminated from signal %s", + signame); + else + put_fmt(proc, "Process terminated from signal %d", + WTERMSIG(status)); + } else if (WIFSTOPPED(status)) + put_text(proc, "Process detached"); + else + put_fmt(proc, "Bogus wait result (%04x)", status); + put_newline(); + + proc_del(proc); +} + +/* + * The given process has been stopped on a system call, either entering or + * leaving that call. + */ +static void +handle_call(struct trace_proc * proc, int show_stack) +{ + reg_t pc, sp; + int class, skip, new_ctx; + + proc->trace_flags &= ~TF_NOCALL; + + if (proc->trace_flags & TF_SKIP) { + /* Skip the call leave phase after a successful execve(). */ + proc->trace_flags &= ~(TF_INCALL | TF_SKIP); + } else if (!(proc->trace_flags & TF_INCALL)) { + /* + * The call_enter call returns the class of the call: + * TC_NORMAL, TC_EXEC, or TC_SIGRET. TC_EXEC means that an + * execve() call is being performed. This means that if a + * SIGSTOP follows for the current process, the process has + * successfully started a different executable. TC_SIGRET + * means that if successful, the call will have a bogus return + * value. TC_NORMAL means that the call requires no exception. + */ + class = call_enter(proc, show_stack); + + switch (class) { + case TC_NORMAL: + break; + case TC_EXEC: + proc->trace_flags |= TF_EXEC; + break; + case TC_SIGRET: + proc->trace_flags |= TF_CTX_SKIP; + break; + default: + assert(0); + } + + /* Save the current program counter and stack pointer. */ + if (!kernel_get_context(proc->pid, &pc, &sp, NULL /*fp*/)) { + proc->last_pc = pc; + proc->last_sp = sp; + } else + proc->last_pc = proc->last_sp = 0; + + proc->trace_flags |= TF_INCALL; + } else { + /* + * Check if the program counter or stack pointer have changed + * during the system call. If so, this is a strong indication + * that a sigreturn call has succeeded, and thus its result + * must be skipped, since the result register will not contain + * the result of the call. + */ + new_ctx = (proc->last_pc != 0 && + !kernel_get_context(proc->pid, &pc, &sp, NULL /*fp*/) && + (pc != proc->last_pc || sp != proc->last_sp)); + + skip = ((proc->trace_flags & TF_CTX_SKIP) && new_ctx); + + call_leave(proc, skip); + + /* + * On such context changes, also print a short dashed line. + * This helps in identifying signal handler invocations, + * although it is not reliable for that purpose: no dashed line + * will be printed if a signal handler is invoked while the + * process is not making a system call. + */ + if (new_ctx) { + put_text(proc, "---"); + put_newline(); + } + + proc->trace_flags &= ~(TF_INCALL | TF_CTX_SKIP | TF_EXEC); + } +} + +/* + * The given process has received the given signal. Report the receipt. Due + * to the way that signal handling with traced processes works, the signal may + * in fact be delivered to the process much later, or never--a problem inherent + * to the way signals are handled in PM right now (namely, deferring signal + * delivery would let the traced process block signals meant for the tracer). + */ +static void +report_signal(struct trace_proc * proc, int sig, int show_stack) +{ + const char *signame; + + /* + * Print a stack trace only if we are not in a call; otherwise, we + * would simply get the same stack trace twice and mess up the output + * in the process, because call suspension is not expected if we are + * tracing a single process only. + * FIXME: the check should be for whether we actually print the call.. + */ + if (show_stack && !(proc->trace_flags & TF_INCALL)) + kernel_put_stacktrace(proc); + + /* + * If this process is in the middle of a call, the signal will be + * printed within the call. This will always happen on the call split, + * that is, between the call's entering (out) and leaving (in) phases. + * This also means that the recording of the call-enter phase may be + * replayed more than once, and the call may be suspended more than + * once--after all, a signal is not necessarily followed immediately + * by the call result. If the process is not in the middle of a call, + * the signal will end up on a separate line. In both cases, multiple + * consecutive signals may be printed right after one another. The + * following scenario shows a number of possible combinations: + * + * 2| foo(<..> + * 3| ** SIGHUP ** ** SIGUSR1 ** + * 3| bar() = <..> + * 2|*foo(** SIGUSR1 ** ** SIGUSR2 ** <..> + * 3|*bar() = ** SIGCHLD ** 0 + * 2|*foo(** SIGINT ** &0xef852000) = -1 [EINTR] + * 3| kill(3, SIGTERM) = ** SIGTERM ** <..> + * 3| Process terminated from signal SIGTERM + */ + + call_replay(proc); + + if (!valuesonly && (signame = get_signal_name(sig)) != NULL) + put_fmt(proc, "** %s **", signame); + else + put_fmt(proc, "** SIGNAL %d **", sig); + + put_space(proc); + + output_flush(); +} + +/* + * Wait for the given process ID to stop on the given signal. Upon success, + * the function will return zero. Upon failure, it will return -1, and errno + * will be either set to an error code, or to zero in order to indicate that + * the process exited instead. + */ +static int +wait_sig(pid_t pid, int sig) +{ + int status; + + for (;;) { + if (waitpid(pid, &status, 0) == -1) { + if (errno == EINTR) continue; + + return -1; + } + + if (!WIFSTOPPED(status)) { + /* The process terminated just now. */ + errno = 0; + + return -1; + } + + if (WSTOPSIG(status) == sig) + break; + + (void)ptrace(T_RESUME, pid, 0, WSTOPSIG(status)); + } + + return 0; +} + +/* + * Attach to the given process, and wait for the resulting SIGSTOP signal. + * Other signals may arrive first; we pass these on to the process without + * reporting them, thus logically modelling them as having arrived before we + * attached to the process. The process might also exit in the meantime, + * typically as a result of a lethal signal; following the same logical model, + * we pretend the process did not exist in the first place. Since the SIGSTOP + * signal will be pending right after attaching to the process, this procedure + * will never block. + */ +static int +attach(pid_t pid) +{ + + if (ptrace(T_ATTACH, pid, 0, 0) != 0) { + warn("Unable to attach to pid %d", pid); + + return -1; + } + + if (wait_sig(pid, SIGSTOP) != 0) { + /* If the process terminated, report it as not found. */ + if (errno == 0) + errno = ESRCH; + + warn("Unable to attach to pid %d", pid); + + return -1; + } + + /* Verify that we can read values from the kernel at all. */ + if (kernel_check(pid) == FALSE) { + (void)ptrace(T_DETACH, pid, 0, 0); + + warnx("Kernel magic check failed, recompile trace(1)"); + + return -1; + } + + /* + * System services are managed by RS, which prevents them from + * being traced properly by PM. Attaching to a service could + * therefore cause problems, so we should detach immediately. + */ + if (kernel_is_service(pid) == TRUE) { + (void)ptrace(T_DETACH, pid, 0, 0); + + warnx("Cannot attach to system services!"); + + return -1; + } + + return 0; +} + +/* + * Detach from all processes, knowning that they were all processes to which we + * attached explicitly (i.e., not started by us) and are all currently stopped. + */ +static void +detach_stopped(void) +{ + struct trace_proc *proc; + + for (proc = proc_next(NULL); proc != NULL; proc = proc_next(proc)) + (void)ptrace(T_DETACH, proc->pid, 0, 0); +} + +/* + * Start detaching from all processes to which we previously attached. The + * function is expected to return before detaching is completed, and the caller + * must deal with the new situation appropriately. Do not touch any processes + * started by us (to allow graceful termination), unless force is set, in which + * case those processes are killed. + */ +static void +detach_running(int force) +{ + struct trace_proc *proc; + + for (proc = proc_next(NULL); proc != NULL; proc = proc_next(proc)) { + if (proc->trace_flags & TF_ATTACH) { + /* Already detaching? Then do nothing. */ + if (proc->trace_flags & TF_DETACH) + continue; + + if (!(proc->trace_flags & TF_STOPPING)) + (void)kill(proc->pid, SIGSTOP); + + proc->trace_flags |= TF_DETACH | TF_STOPPING; + } else { + /* + * The child processes may be ignoring SIGINTs, so upon + * the second try, force them to terminate. + */ + if (force) + (void)kill(proc->pid, SIGKILL); + } + } +} + +/* + * Print command usage. + */ +static void __dead +usage(void) +{ + + (void)fprintf(stderr, "usage: %s [-fgNsVv] [-o file] [-p pid] " + "[command]\n", getprogname()); + + exit(EXIT_FAILURE); +} + +/* + * The main function of the system call tracer. + */ +int +main(int argc, char * argv[]) +{ + struct trace_proc *proc; + const char *output_file; + int status, sig, follow_fork, show_stack, grouping, first_signal; + pid_t pid, last_pid; + int c, error; + + setprogname(argv[0]); + + proc_init(); + + follow_fork = FALSE; + show_stack = FALSE; + grouping = FALSE; + output_file = NULL; + + allnames = FALSE; + verbose = 0; + valuesonly = 0; + + while ((c = getopt(argc, argv, "fgNsVvo:p:")) != -1) { + switch (c) { + case 'f': + follow_fork = TRUE; + break; + case 'g': + grouping = TRUE; + break; + case 'N': + allnames = TRUE; + break; + case 's': + show_stack = TRUE; + break; + case 'V': + valuesonly++; + break; + case 'v': + verbose++; + break; + case 'o': + output_file = optarg; + break; + case 'p': + pid = atoi(optarg); + if (pid <= 0) + usage(); + + if (proc_get(pid) == NULL && proc_add(pid) == NULL) + err(EXIT_FAILURE, NULL); + + break; + default: + usage(); + } + } + + argv += optind; + argc -= optind; + + first_signal = TRUE; + got_signal = FALSE; + got_info = FALSE; + + signal(SIGINT, sig_handler); + signal(SIGINFO, info_handler); + + /* Attach to any processes for which PIDs were given. */ + for (proc = proc_next(NULL); proc != NULL; proc = proc_next(proc)) { + if (attach(proc->pid) != 0) { + /* + * Detach from the processes that we have attached to + * so far, i.e. the ones with the TF_ATTACH flag. + */ + detach_stopped(); + + return EXIT_FAILURE; + } + + proc->trace_flags = TF_ATTACH | TF_NOCALL; + } + + /* If a command is given, start a child that executes the command. */ + if (argc >= 1) { + pid = fork(); + + switch (pid) { + case -1: + warn("Unable to fork"); + + detach_stopped(); + + return EXIT_FAILURE; + + case 0: + (void)ptrace(T_OK, 0, 0, 0); + + (void)execvp(argv[0], argv); + + err(EXIT_FAILURE, "Unable to start %s", argv[0]); + + default: + break; + } + + /* + * The first signal will now be SIGTRAP from the execvp(), + * unless that fails, in which case the child will terminate. + */ + if (wait_sig(pid, SIGTRAP) != 0) { + /* + * If the child exited, the most likely cause is a + * failure to execute the command. Let the child + * report the error, and do not say anything here. + */ + if (errno != 0) + warn("Unable to start process"); + + detach_stopped(); + + return EXIT_FAILURE; + } + + /* If we haven't already, perform the kernel magic check. */ + if (proc_count() == 0 && kernel_check(pid) == FALSE) { + warnx("Kernel magic check failed, recompile trace(1)"); + + (void)kill(pid, SIGKILL); + + detach_stopped(); + + return EXIT_FAILURE; + } + + if ((proc = proc_add(pid)) == NULL) { + warn(NULL); + + (void)kill(pid, SIGKILL); + + detach_stopped(); + + return EXIT_FAILURE; + } + + proc->trace_flags = 0; + } else + pid = -1; + + /* The user will have to give us at least one process to trace. */ + if (proc_count() == 0) + usage(); + + /* + * Open an alternative output file if needed. After that, standard + * error should no longer be used directly, and all output has to go + * through the output module. + */ + if (output_init(output_file) < 0) { + warn("Unable to open output file"); + + if (pid > 0) + (void)kill(pid, SIGKILL); + + detach_stopped(); + + return EXIT_FAILURE; + } + + /* + * All the traced processes are currently stopped. Initialize, report, + * and resume them. + */ + for (proc = proc_next(NULL); proc != NULL; proc = proc_next(proc)) { + new_proc(proc, follow_fork); + + (void)ptrace(T_SYSCALL, proc->pid, 0, 0); + } + + /* + * Handle events until there are no traced processes left. + */ + last_pid = 0; + error = FALSE; + + for (;;) { + /* If an output error occurred, exit as soon as possible. */ + if (!error && output_error()) { + detach_running(TRUE /*force*/); + + error = TRUE; + } + + /* + * If the user pressed ^C once, start detaching the processes + * that we did not start, if any. If the user pressed ^C + * twice, kill the process that we did start, if any. + */ + if (got_signal) { + detach_running(!first_signal); + + got_signal = FALSE; + first_signal = FALSE; + } + + /* Upon getting SIGINFO, print a list of traced processes. */ + if (got_info) { + list_info(); + + got_info = FALSE; + } + + /* + * Block until something happens to a traced process. If + * enabled from the command line, first try waiting for the + * last process for which we got results, so as to reduce call + * suspensions a bit. + */ + if (grouping && last_pid > 0 && + waitpid(last_pid, &status, WNOHANG) > 0) + pid = last_pid; + else + if ((pid = waitpid(-1, &status, 0)) <= 0) { + if (pid == -1 && errno == EINTR) continue; + if (pid == -1 && errno == ECHILD) break; /* all done */ + + put_fmt(NULL, "Unexpected waitpid failure: %s", + (pid == 0) ? "No result" : strerror(errno)); + put_newline(); + + /* + * We need waitpid to function correctly in order to + * detach from any attached processes, so we can do + * little more than just exit, effectively killing all + * traced processes. + */ + return EXIT_FAILURE; + } + + last_pid = 0; + + /* Get the trace data structure for the process. */ + if ((proc = proc_get(pid)) == NULL) { + /* + * The waitpid() call returned the status of a process + * that we have not yet seen. This must be a newly + * forked child. If it is not stopped, it must have + * died immediately, and we choose not to report it. + */ + if (!WIFSTOPPED(status)) + continue; + + if ((proc = proc_add(pid)) == NULL) { + put_fmt(NULL, + "Error attaching to new child %d: %s", + pid, strerror(errno)); + put_newline(); + + /* + * Out of memory allocating a new child object! + * We can not trace this child, so just let it + * run free by detaching from it. + */ + if (WSTOPSIG(status) != SIGSTOP) { + (void)ptrace(T_RESUME, pid, 0, + WSTOPSIG(status)); + + if (wait_sig(pid, SIGSTOP) != 0) + continue; /* it died.. */ + } + + (void)ptrace(T_DETACH, pid, 0, 0); + + continue; + } + + /* + * We must specify TF_ATTACH here, even though it may + * be a child of a process we started, in which case it + * should be killed when we exit. We do not keep track + * of ancestry though, so better safe than sorry. + */ + proc->trace_flags = TF_ATTACH | TF_STOPPING; + + new_proc(proc, follow_fork); + + /* Repeat entering the fork call for the child. */ + handle_call(proc, show_stack); + } + + /* If the process died, report its status and clean it up. */ + if (!WIFSTOPPED(status)) { + discard_proc(proc, status); + + continue; + } + + sig = WSTOPSIG(status); + + if (sig == SIGSTOP && (proc->trace_flags & TF_STOPPING)) { + /* We expected the process to be stopped; now it is. */ + proc->trace_flags &= ~TF_STOPPING; + + if (proc->trace_flags & TF_DETACH) { + if (ptrace(T_DETACH, proc->pid, 0, 0) == 0) + discard_proc(proc, status); + + /* + * If detaching failed, the process must have + * died, and we'll get notified through wait(). + */ + continue; + } + + sig = 0; + } else if (sig == SIGSTOP && (proc->trace_flags & TF_EXEC)) { + /* The process has performed a successful execve(). */ + call_leave(proc, TRUE /*skip*/); + + put_text(proc, "---"); + + new_exec(proc); + + /* + * A successful execve() has no result, in the sense + * that there is no reply message. We should therefore + * not even try to copy in the reply message from the + * original location, because it will be invalid. + * Thus, we skip the exec's call leave phase entirely. + */ + proc->trace_flags &= ~TF_EXEC; + proc->trace_flags |= TF_SKIP; + + sig = 0; + } else if (sig == SIGTRAP) { + /* The process is entering or leaving a system call. */ + if (!(proc->trace_flags & TF_DETACH)) + handle_call(proc, show_stack); + + sig = 0; + } else { + /* The process has received a signal. */ + report_signal(proc, sig, show_stack); + + /* + * Only in this case do we pass the signal to the + * traced process. + */ + } + + /* + * Resume process execution. If this call fails, the process + * has probably died. We will find out soon enough. + */ + (void)ptrace(T_SYSCALL, proc->pid, 0, sig); + + last_pid = proc->pid; + } + + return (error) ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/minix/usr.bin/trace/type.h b/minix/usr.bin/trace/type.h new file mode 100644 index 000000000..e126a8d74 --- /dev/null +++ b/minix/usr.bin/trace/type.h @@ -0,0 +1,32 @@ + +#define COUNT(s) (sizeof(s) / sizeof(s[0])) + +struct call_handler { + const char *name; + const char *(*namefunc)(const message *m_out); + int (*outfunc)(struct trace_proc *proc, const message *m_out); + void (*infunc)(struct trace_proc *proc, const message *m_out, + const message *m_in, int failed); +}; +#define HANDLER(n,o,i) { .name = n, .outfunc = o, .infunc = i } +#define HANDLER_NAME(n,o,i) { .namefunc = n, .outfunc = o, .infunc = i } + +struct calls { + endpoint_t endpt; + unsigned int base; + const struct call_handler *map; + unsigned int count; +}; + +struct flags { + unsigned int mask; + unsigned int value; + const char *name; +}; +#define FLAG(f) { f, f, #f } +#define FLAG_MASK(m,f) { m, f, #f } +#define FLAG_ZERO(f) { ~0, f, #f } + +/* not great, but it prevents a massive potential for typos.. */ +#define NAME(r) case r: return #r +#define TEXT(v) case v: text = #v; break -- 2.44.0