.PP
\fBservice refresh\fR \fI<pid>\fR
.PP
+\fBservice rescue\fR \fI<dir>\fR
+.PP
\fBservice shutdown\fR
.br
.de FL
a fresh copy of the process. The clean way to restart a process is using the
'refresh' option of the service utility.
.PP
+The rescue call can be used to set the current working directory of the reincarnation server. By using a trusted rescue driver to shadow certain binaries in RAM the reincarnation server can check that rescue directory for binaries if the normal execution of the absolute path fails. This allows recovery from failures of the root file system driver.
+.PP
If the system is to be shutdown, the reincarnation server should know about this event to prevent it from restarting services that are killed during the shutdown
sequence.
.SH EXAMPLES
switch (call_nr) {
case SYN_ALARM:
do_period(&m); /* check drivers status */
- continue; /* no reply is expected */
+ continue;
case SYS_SIG:
- sigset = (sigset_t) m.NOTIFY_ARG;
- if (sigismember(&sigset, SIGCHLD)) {
- do_exit(&m);
- }
- if (sigismember(&sigset, SIGTERM) ||
- sigismember(&sigset, SIGKSTOP)) {
- /* Prevent restarting services. */
- do_shutdown(NULL);
- }
- continue; /* no reply is expected */
+ sigset = (sigset_t) m.NOTIFY_ARG; /* check signals passed */
+ if (sigismember(&sigset, SIGCHLD)) do_exit(&m);
+ if (sigismember(&sigset, SIGTERM)) do_shutdown(NULL);
+ if (sigismember(&sigset, SIGKSTOP)) do_shutdown(NULL);
+ continue;
default: /* heartbeat notification */
- printf("Got heartbeat from %d\n", who);
if (rproc_ptr[who] != NULL) /* mark heartbeat time */
rproc_ptr[who]->r_alive_tm = m.NOTIFY_TIMESTAMP;
}
*/
else {
switch(call_nr) {
- case RS_UP:
- result = do_up(&m);
- break;
- case RS_DOWN:
- result = do_down(&m);
- break;
- case RS_REFRESH:
- result = do_refresh(&m);
- break;
- case RS_SHUTDOWN:
- result = do_shutdown(&m);
- break;
- case GETSYSINFO:
- printf("RS got GETSYSINFO request from %d\n", m.m_source);
- result = do_getsysinfo(&m);
- break;
+ case RS_UP: result = do_up(&m); break;
+ case RS_DOWN: result = do_down(&m); break;
+ case RS_REFRESH: result = do_refresh(&m); break;
+ case RS_RESCUE: result = do_rescue(&m); break;
+ case RS_SHUTDOWN: result = do_shutdown(&m); break;
+ case GETSYSINFO: result = do_getsysinfo(&m); break;
default:
printf("Warning, RS got unexpected request %d from %d\n",
m.m_type, m.m_source);
if ((s = getsysinfo(FS_PROC_NR, SI_DMAP_TAB, dmap)) < 0)
panic("RS","warning: couldn't get copy of dmap table", errno);
- /* Change working directory to /sbin, where the binaries for the programs
- * in the system image are.
+ /* Now initialize the table with the processes in the system image.
+ * Prepend /sbin/ to the binaries so that we can actually find them.
*/
- chdir("/sbin/");
for (s=0; s< NR_BOOT_PROCS; s++) {
ip = &image[s];
if (ip->proc_nr >= 0) {
for(t=0; t< NR_DEVICES; t++)
if (dmap[t].dmap_driver == ip->proc_nr)
rproc[s].r_dev_nr = t;
- strcpy(rproc[s].r_cmd, ip->proc_name);
+ strcpy(rproc[s].r_cmd, "/sbin/");
+ strcpy(rproc[s].r_cmd+6, ip->proc_name);
rproc[s].r_argc = 1;
rproc[s].r_argv[0] = rproc[s].r_cmd;
rproc[s].r_argv[1] = NULL;
return(ESRCH);
}
+/*===========================================================================*
+ * do_rescue *
+ *===========================================================================*/
+PUBLIC int do_rescue(message *m_ptr)
+{
+ char rescue_dir[MAX_RESCUE_DIR_LEN];
+ int s;
+
+ /* Copy rescue directory from user. */
+ if (m_ptr->RS_CMD_LEN > MAX_RESCUE_DIR_LEN) return(E2BIG);
+ if (OK!=(s=sys_datacopy(m_ptr->m_source, (vir_bytes) m_ptr->RS_CMD_ADDR,
+ SELF, (vir_bytes) rescue_dir, m_ptr->RS_CMD_LEN))) return(s);
+ rescue_dir[m_ptr->RS_CMD_LEN] = '\0'; /* ensure it is terminated */
+ if (rescue_dir[0] != '/') return(EINVAL); /* insist on absolute path */
+
+ /* Change RS' directory to the rescue directory. Provided that the needed
+ * binaries are in the rescue dir, this makes recovery possible even if the
+ * (root) file system is no longer available, because no directory lookups
+ * are required. Thus if an absolute path fails, we can try to strip the
+ * path an see if the command is in the rescue dir.
+ */
+ if (chdir(rescue_dir) != 0) return(errno);
+ return(OK);
+}
/*===========================================================================*
* do_shutdown *
*/
int child_proc_nr; /* child process slot */
pid_t child_pid; /* child's process id */
+ char *file_only;
int s;
message m;
return(errno); /* return error */
case 0: /* child process */
+ /* Try to execute the binary that has an absolute path. If this fails,
+ * e.g., because the root file system cannot be read, try to strip of
+ * the path, and see if the command is in RS' current working dir.
+ */
execve(rp->r_argv[0], rp->r_argv, NULL); /* POSIX execute */
- printf("RS: exec failed for %s\n", rp->r_argv[0]);
- report("RS", "warning, exec() failed", errno); /* shouldn't happen */
+ file_only = strrchr(rp->r_argv[0], '/') + 1;
+ printf("Absolute exec failed (%d), trying file only: %s\n",
+ errno, file_only);
+ execve(file_only, rp->r_argv, NULL); /* POSIX execute */
+ printf("RS: exec failed for %s: %d\n", rp->r_argv[0], errno);
exit(EXEC_FAILED); /* terminate child */
default: /* parent process */
/* Space reserved for program and arguments. */
#define MAX_COMMAND_LEN 512 /* maximum argument string length */
#define MAX_NR_ARGS 4 /* maximum number of arguments */
+#define MAX_RESCUE_DIR_LEN 64 /* maximum rescue dir length */
/* Definition of the system process table. This table only has entries for
* the servers and drivers, and thus is not directly indexed by slot number.
_PROTOTYPE( int do_up, (message *m));
_PROTOTYPE( int do_down, (message *m));
_PROTOTYPE( int do_refresh, (message *m));
+_PROTOTYPE( int do_rescue, (message *m));
_PROTOTYPE( int do_shutdown, (message *m));
_PROTOTYPE( void do_period, (message *m));
_PROTOTYPE( void do_exit, (message *m));
"up",
"down",
"refresh",
+ "rescue",
"shutdown",
"catch for illegal requests"
};
*/
#define ARG_NAME 0 /* own application name */
#define ARG_REQUEST 1 /* request to perform */
-#define ARG_PATH 2 /* binary of system service */
+#define ARG_PATH 2 /* rescue dir or system service */
#define ARG_PID 2 /* pid of system service */
#define MIN_ARG_COUNT 2 /* require an action */
app_name, ARG_ARGS, ARG_DEV, ARG_PERIOD);
printf(" %s down <pid>\n", app_name);
printf(" %s refresh <pid>\n", app_name);
+ printf(" %s rescue <dir>\n", app_name);
printf(" %s shutdown\n", app_name);
printf("\n");
}
exit(errno);
}
if ( ! (stat_buf.st_mode & (S_IFBLK | S_IFCHR))) {
- print_usage(argv[ARG_NAME], "special file is not a device node");
+ print_usage(argv[ARG_NAME], "special file is not a device");
exit(EINVAL);
}
req_major = (stat_buf.st_rdev >> MAJOR) & BYTE;
exit(EINVAL);
}
}
+ else if (req_nr == RS_RESCUE) {
+
+ /* Verify argument count. */
+ if (argc - 1 < ARG_PATH) {
+ print_usage(argv[ARG_NAME], "action requires rescue directory");
+ exit(EINVAL);
+ }
+ req_path = argv[ARG_PATH];
+ if (stat(argv[ARG_PATH], &stat_buf) == -1) {
+ print_usage(argv[ARG_NAME], "couldn't get status of directory");
+ exit(errno);
+ }
+ if ( ! (stat_buf.st_mode & S_IFDIR)) {
+ print_usage(argv[ARG_NAME], "file is not a directory");
+ exit(EINVAL);
+ }
+ }
else if (req_nr == RS_SHUTDOWN) {
- /* no extra arguments required */
+ /* no extra arguments required */
}
/* Return the request number if no error were found. */
if (OK != (s=_taskcall(RS_PROC_NR, request, &m)))
failure(s);
break;
+ case RS_RESCUE:
+ m.RS_CMD_ADDR = req_path;
+ m.RS_CMD_LEN = strlen(req_path);
+ if (OK != (s=_taskcall(RS_PROC_NR, request, &m)))
+ failure(s);
+ break;
case RS_SHUTDOWN:
if (OK != (s=_taskcall(RS_PROC_NR, request, &m)))
failure(s);