From: Cristiano Giuffrida Date: Tue, 27 Apr 2010 11:17:30 +0000 (+0000) Subject: Unified crash recovery and live update. X-Git-Tag: v3.1.7~118 X-Git-Url: http://zhaoyanbai.com/repos/%22/xml/v3/zones/static/named-checkconf.html?a=commitdiff_plain;h=0164957abb0f0c87a77d17b031049f427727c3d4;p=minix.git Unified crash recovery and live update. RS CHANGES: - Crash recovery is now implemented like live update. Two instances are kept side by side and the dead version is live updated into the new one. The endpoint doesn't change and the failure is not exposed (by default) to other system services. - The new instance can be created reactively (when a crash is detected) or proactively. In the latter case, RS can be instructed to keep a replica of the system service to perform a hot swap when the service fails. The flag SF_USE_REPL is set in that case. - The new flag SF_USE_REPL is supported for services in the boot image and dynamically started services through the RS interface (i.e. -p option in the service utility). - Fixed a free unallocated memory bug for core system services. --- diff --git a/include/minix/com.h b/include/minix/com.h index 2cb64a76f..9bdf2b15b 100644 --- a/include/minix/com.h +++ b/include/minix/com.h @@ -688,9 +688,10 @@ # define RS_NAME m1_p1 /* name */ # define RS_NAME_LEN m1_i1 /* namelen */ -# define RS_INIT_RESULT m1_i1 /* init result */ -# define RS_INIT_TYPE m1_i2 /* init type */ -# define RS_INIT_RPROCTAB_GID m1_i3 /* init rproc table gid */ +# define RS_INIT_RESULT m7_i1 /* init result */ +# define RS_INIT_TYPE m7_i2 /* init type */ +# define RS_INIT_RPROCTAB_GID m7_i3 /* init rproc table gid */ +# define RS_INIT_OLD_ENDPOINT m7_i4 /* init old endpoint */ # define RS_LU_RESULT m1_i1 /* live update result */ # define RS_LU_STATE m1_i2 /* state required to update */ diff --git a/include/minix/rs.h b/include/minix/rs.h index 4bc28bb7c..c5b1b0e06 100644 --- a/include/minix/rs.h +++ b/include/minix/rs.h @@ -15,12 +15,11 @@ Interface to the reincarnation server #define RSS_NR_IO 16 /* RSS flags. */ -#define RSS_COPY 0x01 /* Copy the brinary into RS to make it possible - * to restart the driver without accessing FS - */ +#define RSS_COPY 0x01 /* keep an in-memory copy of the binary */ #define RSS_IPC_VALID 0x02 /* rss_ipc and rss_ipclen are valid */ #define RSS_REUSE 0x04 /* Try to reuse previously copied binary */ #define RSS_NOBLOCK 0x08 /* unblock caller immediately */ +#define RSS_REPLICA 0x10 /* keep a replica of the service */ /* Common definitions. */ #define RS_NR_CONTROL 8 diff --git a/include/minix/sef.h b/include/minix/sef.h index 6420fdb42..11251ee5d 100644 --- a/include/minix/sef.h +++ b/include/minix/sef.h @@ -29,6 +29,7 @@ _PROTOTYPE( void sef_exit, (int status) ); /* Type definitions. */ typedef struct { cp_grant_id_t rproctab_gid; + endpoint_t old_endpoint; } sef_init_info_t; /* Callback type definitions. */ diff --git a/lib/libsys/sef_init.c b/lib/libsys/sef_init.c index 1c7904a7d..f06a04e47 100644 --- a/lib/libsys/sef_init.c +++ b/lib/libsys/sef_init.c @@ -50,6 +50,7 @@ PUBLIC int do_sef_init_request(message *m_ptr) /* Let the callback code handle the request. */ type = m_ptr->RS_INIT_TYPE; info.rproctab_gid = m_ptr->RS_INIT_RPROCTAB_GID; + info.old_endpoint = m_ptr->RS_INIT_OLD_ENDPOINT; switch(type) { case SEF_INIT_FRESH: r = sef_cbs.sef_cb_init_fresh(type, &info); diff --git a/servers/is/dmp_rs.c b/servers/is/dmp_rs.c index 70c94a478..fff8a1fb6 100644 --- a/servers/is/dmp_rs.c +++ b/servers/is/dmp_rs.c @@ -17,7 +17,7 @@ PUBLIC struct rprocpub rprocpub[NR_SYS_PROCS]; PUBLIC struct rproc rproc[NR_SYS_PROCS]; -FORWARD _PROTOTYPE( char *s_flags_str, (int flags) ); +FORWARD _PROTOTYPE( char *s_flags_str, (int flags, int sys_flags) ); /*===========================================================================* * rproc_dmp * @@ -33,16 +33,16 @@ PUBLIC void rproc_dmp() getsysinfo(RS_PROC_NR, SI_PROC_TAB, rproc); printf("Reincarnation Server (RS) system process table dump\n"); - printf("----label---- endpoint- -pid- flags -dev- -T- alive_tm starts command\n"); + printf("----label---- endpoint- -pid- flags- -dev- -T- alive_tm starts command\n"); for (i=prev_i; ir_flags & RS_IN_USE)) continue; if (++n > 22) break; - printf("%13s %9d %5d %5s %3d/%1d %3u %8u %5dx %s", + printf("%13s %9d %5d %6s %3d/%1d %3u %8u %5dx %s", rpub->label, rpub->endpoint, rp->r_pid, - s_flags_str(rp->r_flags), rpub->dev_nr, rpub->dev_style, - rpub->period, rp->r_alive_tm, rp->r_restarts, + s_flags_str(rp->r_flags, rpub->sys_flags), rpub->dev_nr, + rpub->dev_style, rpub->period, rp->r_alive_tm, rp->r_restarts, rp->r_args ); printf("\n"); @@ -53,15 +53,16 @@ PUBLIC void rproc_dmp() } -PRIVATE char *s_flags_str(int flags) +PRIVATE char *s_flags_str(int flags, int sys_flags) { static char str[10]; - str[0] = (flags & RS_ACTIVE) ? 'A' : '-'; - str[1] = (flags & RS_INITIALIZING) ? 'I' : '-'; - str[2] = (flags & RS_UPDATING) ? 'U' : '-'; - str[3] = (flags & RS_EXITING) ? 'E' : '-'; - str[4] = (flags & RS_NOPINGREPLY) ? 'N' : '-'; - str[5] = '\0'; + str[0] = (flags & RS_ACTIVE) ? 'A' : '-'; + str[1] = (flags & RS_UPDATING) ? 'U' : '-'; + str[2] = (flags & RS_EXITING) ? 'E' : '-'; + str[3] = (flags & RS_NOPINGREPLY) ? 'N' : '-'; + str[4] = (sys_flags & SF_USE_COPY) ? 'C' : '-'; + str[5] = (sys_flags & SF_USE_REPL) ? 'R' : '-'; + str[6] = '\0'; return(str); } diff --git a/servers/rs/Makefile b/servers/rs/Makefile index 635ed9f5a..0eef88733 100644 --- a/servers/rs/Makefile +++ b/servers/rs/Makefile @@ -10,7 +10,7 @@ LDADD+= -lsys MAN= BINDIR?= /usr/sbin -INSTALLFLAGS+= -S 850k +INSTALLFLAGS+= -S 1050k CPPFLAGS= -I${MINIXSRCDIR} diff --git a/servers/rs/const.h b/servers/rs/const.h index f82cdc66d..85e4a980c 100644 --- a/servers/rs/const.h +++ b/servers/rs/const.h @@ -32,8 +32,10 @@ /* Sys flag values. */ #define SF_CORE_SRV 0x001 /* set for core system services */ #define SF_SYNCH_BOOT 0X002 /* set when process needs synch boot init */ -#define SF_NEED_COPY 0x004 /* set when process needs copy to restart */ +#define SF_NEED_COPY 0x004 /* set when process needs copy to start */ #define SF_USE_COPY 0x008 /* set when process has a copy in memory */ +#define SF_NEED_REPL 0x010 /* set when process needs replica to start */ +#define SF_USE_REPL 0x020 /* set when process has a replica */ /* Constants determining RS period and binary exponential backoff. */ #define RS_INIT_T 600 /* allow T ticks for init */ @@ -84,9 +86,9 @@ /* Define sys flags for the various process types. */ #define SRV_SF (SF_CORE_SRV | SF_NEED_COPY) /* system services */ -#define SRVC_SF (SRV_SF | SF_USE_COPY) /* system services with a copy */ +#define SRVR_SF (SRV_SF | SF_NEED_REPL) /* services needing a replica */ #define DSRV_SF (0) /* dynamic system services */ -#define VM_SF (SRV_SF | SF_SYNCH_BOOT) /* vm */ +#define VM_SF (SRVR_SF | SF_SYNCH_BOOT) /* vm */ /* Define device flags for the various process types. */ #define SRV_DF (DRV_FORCED) /* system services */ diff --git a/servers/rs/glo.h b/servers/rs/glo.h index c907f5e8c..a6b897a79 100644 --- a/servers/rs/glo.h +++ b/servers/rs/glo.h @@ -23,6 +23,10 @@ extern struct boot_image_sys boot_image_sys_table[]; */ extern struct boot_image_dev boot_image_dev_table[]; +/* The buffer where the boot image is copied during initialization. */ +EXTERN int boot_image_buffer_size; +EXTERN char *boot_image_buffer; + /* The system process table. This table only has entries for system * services (servers and drivers), and thus is not directly indexed by * slot number. The size of the table must match the size of the privilege diff --git a/servers/rs/main.c b/servers/rs/main.c index 24b6e2240..f9a3d1bc3 100644 --- a/servers/rs/main.c +++ b/servers/rs/main.c @@ -27,10 +27,6 @@ FORWARD _PROTOTYPE(void boot_image_info_lookup, ( endpoint_t endpoint, FORWARD _PROTOTYPE(void catch_boot_init_ready, (endpoint_t endpoint) ); FORWARD _PROTOTYPE(void get_work, (message *m_ptr, int *status_ptr) ); -/* The buffer where the boot image is copied during initialization. */ -PRIVATE int boot_image_buffer_size; -PRIVATE char *boot_image_buffer; - /* Flag set when memory unmapping can be done. */ EXTERN int unmap_ok; @@ -210,6 +206,9 @@ PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info) /* If we must keep a copy of this system service, read the header * and increase the size of the boot image buffer. */ + if(boot_image_sys->flags & SF_USE_REPL) { + boot_image_sys->flags |= SF_USE_COPY; + } if(boot_image_sys->flags & SF_USE_COPY) { if((s = sys_getaoutheader(&header, i)) != OK) { panic("unable to get copy of a.out header: %d", s); @@ -335,7 +334,7 @@ PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info) strcpy(rpub->proc_name, ip->proc_name); /* Get command settings. */ - rp->r_cmd[0]= '\0'; + strcpy(rp->r_cmd, ip->proc_name); rp->r_script[0]= '\0'; build_cmd_dep(rp); @@ -446,6 +445,13 @@ PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info) if(j == NR_PROCS) { panic("unable to get pid"); } + + /* If we must keep a replica of this system service, create it now. */ + if(rpub->sys_flags & SF_USE_REPL) { + if ((s = clone_service(rp)) != OK) { + panic("unable to clone service: %d", s); + } + } } /* @@ -510,18 +516,24 @@ PRIVATE int sef_cb_signal_manager(endpoint_t target, int signo) if(rs_verbose) printf("RS: ignoring spurious signal %d for process %d\n", signo, target); - return OK; /* Since we're ignoring it, we have handled - * the signal without problem. All is OK. - */ + return OK; /* clear the signal */ } rp = rproc_ptr[target_p]; rpub = rp->r_pub; /* Don't bother if a termination signal has already been processed. */ - if(rp->r_flags & RS_TERMINATED) { + if((rp->r_flags & RS_TERMINATED) && !(rp->r_flags & RS_EXITING)) { return EDEADSRCDST; /* process is gone */ } + /* Ignore external signals for inactive service instances. */ + if( !(rp->r_flags & RS_ACTIVE) && !(rp->r_flags & RS_EXITING)) { + if(rs_verbose) + printf("RS: ignoring signal %d for inactive %s\n", + signo, srv_to_string(rp)); + return OK; /* clear the signal */ + } + if(rs_verbose) printf("RS: %s got %s signal %d\n", srv_to_string(rp), SIGS_IS_TERMINATION(signo) ? "termination" : "non-termination",signo); diff --git a/servers/rs/manager.c b/servers/rs/manager.c index 335a55248..e78fb2630 100644 --- a/servers/rs/manager.c +++ b/servers/rs/manager.c @@ -223,6 +223,28 @@ PUBLIC int srv_kill(pid_t pid, int sig) return(_syscall(PM_PROC_NR, SRV_KILL, &m)); } +/*===========================================================================* + * srv_update * + *===========================================================================*/ +PUBLIC int srv_update(endpoint_t src_e, endpoint_t dst_e) +{ + int r; + + /* Ask VM to swap the slots of the two processes and tell the kernel to + * do the same. If VM is the service being updated, only perform the kernel + * part of the call. The new instance of VM will do the rest at + * initialization time. + */ + if(src_e != VM_PROC_NR) { + r = vm_update(src_e, dst_e); + } + else { + r = sys_update(src_e, dst_e); + } + + return r; +} + /*===========================================================================* * update_period * *===========================================================================*/ @@ -287,9 +309,6 @@ PUBLIC void end_update(int result) new_rp->r_old_rp = NULL; old_rp->r_check_tm = 0; - /* Make the version that has to survive as active. */ - activate_service(surviving_rp, exiting_rp); - /* Send a late reply if necessary. */ late_reply(old_rp, result); @@ -297,7 +316,6 @@ PUBLIC void end_update(int result) * version as no longer updating. */ surviving_rp->r_flags &= ~RS_UPDATING; - unpublish_process(exiting_rp); cleanup_service(exiting_rp); if(rs_verbose) @@ -380,19 +398,28 @@ struct rproc *rp; /* Create the given system service. */ int child_proc_nr_e, child_proc_nr_n; /* child process slot */ pid_t child_pid; /* child's process id */ - int s, use_copy; + int s, use_copy, has_replica; extern char **environ; struct rprocpub *rpub; rpub = rp->r_pub; use_copy= (rpub->sys_flags & SF_USE_COPY); + has_replica= (rp->r_prev_rp && !(rp->r_prev_rp->r_flags & RS_TERMINATED)); - /* See if we are not using a copy but we do need one to start the service. */ + /* Do we need an existing replica to create the service? */ + if(!has_replica && (rpub->sys_flags & SF_NEED_REPL)) { + printf("RS: unable to create service '%s' without a replica\n", + rpub->label); + free_slot(rp); + return(EPERM); + } + + /* Do we need an in-memory copy to create the service? */ if(!use_copy && (rpub->sys_flags & SF_NEED_COPY)) { - printf("RS: unable to start service '%s' without an in-memory copy\n", - rpub->label); - free_slot(rp); - return(EPERM); + printf("RS: unable to create service '%s' without an in-memory copy\n", + rpub->label); + free_slot(rp); + return(EPERM); } /* Now fork and branch for parent and child process (and check for error). */ @@ -411,11 +438,6 @@ struct rproc *rp; /* There is now a child process. Update the system process table. */ child_proc_nr_n = _ENDPOINT_P(child_proc_nr_e); rp->r_flags = RS_IN_USE; /* mark slot in use */ - rp->r_restarts += 1; /* raise nr of restarts */ - rp->r_old_rp = NULL; /* no old version yet */ - rp->r_new_rp = NULL; /* no new version yet */ - rp->r_prev_rp = NULL; /* no prev replica yet */ - rp->r_next_rp = NULL; /* no next replica yet */ rpub->endpoint = child_proc_nr_e; /* set child endpoint */ rp->r_pid = child_pid; /* set child pid */ rp->r_check_tm = 0; /* not checked yet */ @@ -425,7 +447,6 @@ struct rproc *rp; rproc_ptr[child_proc_nr_n] = rp; /* mapping for fast access */ rpub->in_use = TRUE; /* public entry is now in use */ - /* Set resources when asked to. */ if (rp->r_set_resources) { /* Initialize privilege structure. */ @@ -483,15 +504,48 @@ struct rproc *rp; return OK; } +/*===========================================================================* + * clone_service * + *===========================================================================*/ +PUBLIC int clone_service(rp) +struct rproc *rp; +{ +/* Clone the given system service instance. */ + struct rproc *replica_rp; + int r; + + if(rs_verbose) + printf("RS: creating a replica for %s\n", srv_to_string(rp)); + + /* Clone slot. */ + if((r = clone_slot(rp, &replica_rp)) != OK) { + return r; + } + + /* Link the two slots. */ + rp->r_next_rp = replica_rp; + replica_rp->r_prev_rp = rp; + + /* Create a new replica of the service. */ + r = create_service(replica_rp); + if(r != OK) { + rp->r_next_rp = NULL; + return r; + } + + return OK; +} + /*===========================================================================* * publish_service * *===========================================================================*/ PUBLIC int publish_service(rp) struct rproc *rp; /* pointer to service slot */ { -/* Publish service-wide properties of a service. */ +/* Publish a service. */ int r; struct rprocpub *rpub; + struct rs_pci pci_acl; rpub = rp->r_pub; @@ -509,25 +563,13 @@ struct rproc *rp; /* pointer to service slot */ } } - if(rs_verbose) - printf("RS: %s service-wide properties published\n", - srv_to_string(rp)); - - return OK; -} - -/*===========================================================================* - * publish_process * - *===========================================================================*/ -PUBLIC int publish_process(rp) -struct rproc *rp; /* pointer to service slot */ -{ -/* Publish process-wide properties of a service. */ - int r; - struct rprocpub *rpub; - struct rs_pci pci_acl; - - rpub = rp->r_pub; + /* Tell VM about allowed calls, if any. */ + if(rpub->vm_call_mask[0]) { + r = vm_set_priv(rpub->endpoint, &rpub->vm_call_mask[0]); + if (r != OK) { + return kill_service(rp, "vm_set_priv call failed", r); + } + } /* If PCI properties are set, inform the PCI driver about the new service. */ if(rpub->pci_acl.rsp_nr_device || rpub->pci_acl.rsp_nr_class) { @@ -541,17 +583,8 @@ struct rproc *rp; /* pointer to service slot */ } } - /* Tell VM about allowed calls, if any. */ - if(rpub->vm_call_mask[0]) { - r = vm_set_priv(rpub->endpoint, &rpub->vm_call_mask[0]); - if (r != OK) { - return kill_service(rp, "vm_set_priv call failed", r); - } - } - if(rs_verbose) - printf("RS: %s process-wide properties published\n", - srv_to_string(rp)); + printf("RS: %s published\n", srv_to_string(rp)); return OK; } @@ -562,7 +595,7 @@ struct rproc *rp; /* pointer to service slot */ PUBLIC int unpublish_service(rp) struct rproc *rp; /* pointer to service slot */ { -/* Unpublish service-wide properties of a service. */ +/* Unpublish a service. */ struct rprocpub *rpub; int r, result; @@ -576,27 +609,7 @@ struct rproc *rp; /* pointer to service slot */ result = r; } - /* No need to inform VFS, cleanup is performed on exit automatically. */ - - if(rs_verbose) - printf("RS: %s service-wide properties unpublished\n", - srv_to_string(rp)); - - return result; -} - -/*===========================================================================* - * unpublish_process * - *===========================================================================*/ -PUBLIC int unpublish_process(rp) -struct rproc *rp; /* pointer to service slot */ -{ -/* Unpublish process-wide properties of a service. */ - struct rprocpub *rpub; - int r, result; - - rpub = rp->r_pub; - result = OK; + /* No need to inform VFS and VM, cleanup is done on exit automatically. */ /* If PCI properties are set, inform the PCI driver. */ if(rpub->pci_acl.rsp_nr_device || rpub->pci_acl.rsp_nr_class) { @@ -607,11 +620,8 @@ struct rproc *rp; /* pointer to service slot */ } } - /* No need to inform VM, cleanup is performed on exit automatically. */ - if(rs_verbose) - printf("RS: %s process-wide properties unpublished\n", - srv_to_string(rp)); + printf("RS: %s unpublished\n", srv_to_string(rp)); return result; } @@ -658,33 +668,26 @@ struct rproc *rp; rpub = rp->r_pub; - /* Create. */ + /* Create and make active. */ r = create_service(rp); + activate_service(rp, NULL); if(r != OK) { return r; } /* Publish service properties. */ - r = publish_process(rp); - if (r != OK) { - return r; - } r = publish_service(rp); if (r != OK) { return r; } /* Run. */ - init_type = rp->r_restarts > 0 ? SEF_INIT_RESTART : SEF_INIT_FRESH; + init_type = SEF_INIT_FRESH; r = run_service(rp, init_type); if(r != OK) { return r; } - /* The system service now has been successfully started. The only thing - * that can go wrong now, is that execution fails at the child. If that's - * the case, the child will exit. - */ if(rs_verbose) printf("RS: %s started with major %d\n", srv_to_string(rp), rpub->dev_nr); @@ -739,10 +742,8 @@ struct rproc **dst_rpp; printf("RS: %s updating into %s\n", srv_to_string(src_rp), srv_to_string(dst_rp)); - /* Ask VM to swap the slots of the two processes and tell the kernel to - * do the same. - */ - r = vm_update(src_rpub->endpoint, dst_rpub->endpoint); + /* Swap the slots of the two processes. */ + r = srv_update(src_rpub->endpoint, dst_rpub->endpoint); if(r != OK) { return r; } @@ -764,6 +765,9 @@ struct rproc **dst_rpp; *src_rpp = src_rp; *dst_rpp = dst_rp; + /* Make the new version active. */ + activate_service(dst_rp, src_rp); + if(rs_verbose) printf("RS: %s updated into %s\n", srv_to_string(src_rp), srv_to_string(dst_rp)); @@ -840,7 +844,6 @@ PUBLIC void terminate_service(struct rproc *rp) /* Unpublish the service. */ unpublish_service(rp); - unpublish_process(rp); /* Cleanup all the instances of the service. */ get_service_instances(rp, &rps, &nr_rps); @@ -857,9 +860,6 @@ PUBLIC void terminate_service(struct rproc *rp) * that just exited will continue executing. */ if(rp->r_flags & RS_UPDATING) { - if(! (rp->r_flags & RS_ACTIVE) ) { - return; /* ignore unexpected signals */ - } end_update(ERESTART); } @@ -945,42 +945,43 @@ PUBLIC void restart_service(struct rproc *rp) /* See if a late reply has to be sent. */ late_reply(rp, OK); + /* Run a recovery script if available. */ if (rp->r_script[0] != '\0') { - /* Run a recovery script. */ run_script(rp); + return; } - else { - /* Unpublish the service. */ - unpublish_service(rp); - unpublish_process(rp); - /* Clone slots. */ - if((r = clone_slot(rp, &replica_rp)) != OK) { + /* Restart directly. We need a replica if not already available. */ + if(rp->r_next_rp == NULL) { + /* Create the replica. */ + r = clone_service(rp); + if(r != OK) { kill_service(rp, "unable to clone service", r); return; } + } + replica_rp = rp->r_next_rp; - if(rs_verbose) - printf("RS: %s restarting into %s\n", - srv_to_string(rp), srv_to_string(replica_rp)); - - /* Swap slots. */ - swap_slot(&rp, &replica_rp); + /* Update the service into the replica. */ + r = update_service(&rp, &replica_rp); + if(r != OK) { + kill_service(rp, "unable to update into new replica", r); + return; + } - /* Direct restart. */ - if((r = start_service(replica_rp)) != OK) { - kill_service(rp, "unable to restart service", r); - return; - } + /* Let the new replica run. */ + r = run_service(replica_rp, SEF_INIT_RESTART); + if(r != OK) { + kill_service(rp, "unable to let the replica run", r); + return; + } - /* Link the two slots. */ - rp->r_next_rp = replica_rp; - replica_rp->r_prev_rp = rp; + /* Increase the number of restarts. */ + replica_rp->r_restarts += 1; - if(rs_verbose) - printf("RS: %s restarted into %s\n", - srv_to_string(rp), srv_to_string(replica_rp)); - } + if(rs_verbose) + printf("RS: %s restarted into %s\n", + srv_to_string(rp), srv_to_string(replica_rp)); } /*===========================================================================* @@ -1108,7 +1109,7 @@ PUBLIC void free_exec(rp) struct rproc *rp; { /* Free an exec image. */ - int slot_nr, has_shared_exec; + int slot_nr, has_shared_exec, is_boot_image_mem; struct rproc *other_rp; /* Search for some other slot sharing the same exec image. */ @@ -1122,11 +1123,22 @@ struct rproc *rp; } } - /* If nobody uses our copy of the exec image, we can get rid of it. */ + /* If nobody uses our copy of the exec image, we can try to get rid of it. */ if(!has_shared_exec) { - if(rs_verbose) - printf("RS: %s frees exec image\n", srv_to_string(rp)); - free(rp->r_exec); + is_boot_image_mem = (rp->r_exec >= boot_image_buffer + && rp->r_exec < boot_image_buffer + boot_image_buffer_size); + + /* Free memory only if not part of the boot image buffer. */ + if(is_boot_image_mem) { + if(rs_verbose) + printf("RS: %s has exec image in the boot image buffer\n", + srv_to_string(rp)); + } + else { + if(rs_verbose) + printf("RS: %s frees exec image\n", srv_to_string(rp)); + free(rp->r_exec); + } } else { if(rs_verbose) @@ -1239,6 +1251,7 @@ endpoint_t source; else rp->r_ipc_list[0]= '\0'; + /* Set system flags. */ rpub->sys_flags = DSRV_SF; rp->r_exec= NULL; if (rs_start->rss_flags & RSS_COPY) { @@ -1274,6 +1287,9 @@ endpoint_t source; rpub->sys_flags |= SF_USE_COPY; } + if (rs_start->rss_flags & RSS_REPLICA) { + rpub->sys_flags |= SF_USE_REPL; + } /* All dynamically created services get the same privilege flags, and * allowed traps, and signal manager. Other privilege settings can be @@ -1364,8 +1380,12 @@ endpoint_t source; /* Initialize some fields. */ rpub->period = rs_start->rss_period; - rp->r_restarts = -1; /* will be incremented */ + rp->r_restarts = 0; /* no restarts yet */ rp->r_set_resources= 1; /* set resources */ + rp->r_old_rp = NULL; /* no old version yet */ + rp->r_new_rp = NULL; /* no new version yet */ + rp->r_prev_rp = NULL; /* no prev replica yet */ + rp->r_next_rp = NULL; /* no next replica yet */ /* Copy VM call mask. Inherit basic VM calls. */ memcpy(rpub->vm_call_mask, rs_start->rss_vm, @@ -1404,7 +1424,7 @@ struct rproc **clone_rpp; /* Deep copy. */ clone_rp->r_flags &= ~RS_ACTIVE; /* the clone is not active yet */ clone_rp->r_pid = -1; /* no pid yet */ - clone_rpub->endpoint = -1; /* no endpoint yet */ + clone_rpub->endpoint = -1; /* no endpoint yet */ clone_rp->r_pub = clone_rpub; /* restore pointer to public entry */ build_cmd_dep(clone_rp); /* rebuild cmd dependencies */ if(clone_rpub->sys_flags & SF_USE_COPY) { diff --git a/servers/rs/proto.h b/servers/rs/proto.h index d6c824a99..89d37174d 100644 --- a/servers/rs/proto.h +++ b/servers/rs/proto.h @@ -36,6 +36,7 @@ _PROTOTYPE( int copy_label, (endpoint_t src_e, char *src_label, size_t src_len, _PROTOTYPE( void build_cmd_dep, (struct rproc *rp) ); _PROTOTYPE( int srv_fork, (void) ); _PROTOTYPE( int srv_kill, (pid_t pid, int sig) ); +_PROTOTYPE( int srv_update, (endpoint_t src_e, endpoint_t dst_e) ); #define kill_service(rp, errstr, err) \ kill_service_debug(__FILE__, __LINE__, rp, errstr, err) _PROTOTYPE( int kill_service_debug, (char *file, int line, struct rproc *rp, @@ -48,10 +49,9 @@ _PROTOTYPE( int crash_service_debug, (char *file, int line, struct rproc *rp) ); _PROTOTYPE( void cleanup_service_debug, (char *file, int line, struct rproc *rp) ); _PROTOTYPE( int create_service, (struct rproc *rp) ); +_PROTOTYPE( int clone_service, (struct rproc *rp) ); _PROTOTYPE( int publish_service, (struct rproc *rp) ); -_PROTOTYPE( int publish_process, (struct rproc *rp) ); _PROTOTYPE( int unpublish_service, (struct rproc *rp) ); -_PROTOTYPE( int unpublish_process, (struct rproc *rp) ); _PROTOTYPE( int run_service, (struct rproc *rp, int init_type) ); _PROTOTYPE( int start_service, (struct rproc *rp) ); _PROTOTYPE( void stop_service, (struct rproc *rp,int how) ); diff --git a/servers/rs/request.c b/servers/rs/request.c index cfe696acf..d18d25ccd 100755 --- a/servers/rs/request.c +++ b/servers/rs/request.c @@ -58,7 +58,6 @@ message *m_ptr; /* request message pointer */ /* All information was gathered. Now try to start the system service. */ r = start_service(rp); - activate_service(rp, NULL); if(r != OK) { return r; } @@ -114,7 +113,6 @@ PUBLIC int do_down(message *m_ptr) if(rs_verbose) printf("RS: recovery script performs service down...\n"); unpublish_service(rp); - unpublish_process(rp); cleanup_service(rp); return(OK); } @@ -255,6 +253,7 @@ PUBLIC int do_init_ready(message *m_ptr) struct rproc *rp; struct rprocpub *rpub; int result; + int r; who_p = _ENDPOINT_P(m_ptr->m_source); rp = rproc_ptr[who_p]; @@ -303,7 +302,6 @@ PUBLIC int do_init_ready(message *m_ptr) * make the new instance active and cleanup the old replica. */ if(rp->r_prev_rp) { - activate_service(rp, rp->r_prev_rp); cleanup_service(rp->r_prev_rp); rp->r_prev_rp = NULL; @@ -311,6 +309,13 @@ PUBLIC int do_init_ready(message *m_ptr) printf("RS: %s completed restart\n", srv_to_string(rp)); } + /* If we must keep a replica of this system service, create it now. */ + if(rpub->sys_flags & SF_USE_REPL) { + if ((r = clone_service(rp)) != OK) { + printf("RS: warning: unable to clone %s\n", srv_to_string(rp)); + } + } + return(OK); } @@ -404,13 +409,6 @@ PUBLIC int do_update(message *m_ptr) return s; } - /* Publish process-wide properties. */ - s = publish_process(new_rp); - if (s != OK) { - printf("RS: do_update: publish_process failed: %d\n", s); - return s; - } - /* Link old version to new version and mark both as updating. */ rp->r_new_rp = new_rp; new_rp->r_old_rp = rp; diff --git a/servers/rs/service/service.c b/servers/rs/service/service.c index 0bc0c2eaa..4acbc7415 100644 --- a/servers/rs/service/service.c +++ b/servers/rs/service/service.c @@ -152,12 +152,13 @@ PRIVATE int parse_arguments(int argc, char **argv) char *hz, *buff; int req_nr; int c, i, j; - int c_flag, r_flag, n_flag; + int c_flag, r_flag, n_flag, p_flag; c_flag = 0; r_flag = 0; n_flag = 0; - while (c= getopt(argc, argv, "rcn?"), c != -1) + p_flag = 0; + while (c= getopt(argc, argv, "rcnp?"), c != -1) { switch(c) { @@ -174,6 +175,9 @@ PRIVATE int parse_arguments(int argc, char **argv) case 'n': n_flag = 1; break; + case 'p': + p_flag = 1; + break; default: fprintf(stderr, "%s: getopt failed: %c\n", argv[ARG_NAME], c); @@ -220,6 +224,9 @@ PRIVATE int parse_arguments(int argc, char **argv) if(n_flag) rs_start.rss_flags |= RSS_NOBLOCK; + if(p_flag) + rs_start.rss_flags |= RSS_REPLICA; + if (do_run) { /* Set default recovery script for RUN */ diff --git a/servers/rs/table.c b/servers/rs/table.c index 7eff31ff5..b66b89f8a 100644 --- a/servers/rs/table.c +++ b/servers/rs/table.c @@ -76,11 +76,13 @@ PUBLIC struct boot_image_priv boot_image_priv_table[] = { /* Definition of the boot image sys table. */ PUBLIC struct boot_image_sys boot_image_sys_table[] = { /*endpoint, flags */ - { RS_PROC_NR, SRV_SF }, + { RS_PROC_NR, SRVR_SF }, { VM_PROC_NR, VM_SF }, - { LOG_PROC_NR, SRVC_SF }, - { MFS_PROC_NR, SF_USE_COPY | SF_NEED_COPY }, - { PFS_PROC_NR, SRVC_SF }, + { PM_PROC_NR, SRVR_SF }, + { VFS_PROC_NR, SRVR_SF }, + { LOG_PROC_NR, SRV_SF | SF_USE_REPL }, + { MFS_PROC_NR, SF_NEED_COPY | SF_USE_COPY }, + { PFS_PROC_NR, SRV_SF | SF_USE_COPY }, { DEFAULT_BOOT_NR, SRV_SF } /* default entry */ }; diff --git a/servers/rs/utility.c b/servers/rs/utility.c index 30dfb78e1..a9e3e1c39 100644 --- a/servers/rs/utility.c +++ b/servers/rs/utility.c @@ -16,15 +16,27 @@ int type; /* type of initialization */ int r; message m; struct rprocpub *rpub; + endpoint_t old_endpoint; rpub = rp->r_pub; rp->r_flags |= RS_INITIALIZING; /* now initializing */ rp->r_check_tm = rp->r_alive_tm + 1; /* expect reply within period */ + /* Determine the old endpoint if this is a new instance. */ + old_endpoint = NONE; + if(rp->r_old_rp) { + old_endpoint = rp->r_old_rp->r_pub->endpoint; + } + else if(rp->r_prev_rp) { + old_endpoint = rp->r_prev_rp->r_pub->endpoint; + } + + /* Send initialization message. */ m.m_type = RS_INIT; m.RS_INIT_TYPE = type; m.RS_INIT_RPROCTAB_GID = rinit.rproctab_gid; + m.RS_INIT_OLD_ENDPOINT = old_endpoint; r = asynsend(rpub->endpoint, &m); return r; diff --git a/servers/vm/proto.h b/servers/vm/proto.h index 243bd2e41..6cc85fb90 100644 --- a/servers/vm/proto.h +++ b/servers/vm/proto.h @@ -44,6 +44,7 @@ _PROTOTYPE( void reserve_proc_mem, (struct memory *mem_chunks, _PROTOTYPE( int vm_isokendpt, (endpoint_t ep, int *proc) ); _PROTOTYPE( int get_stack_ptr, (int proc_nr, vir_bytes *sp) ); _PROTOTYPE( int do_info, (message *) ); +_PROTOTYPE( int swap_proc, (endpoint_t src_e, endpoint_t dst_e) ); /* exit.c */ _PROTOTYPE( void clear_proc, (struct vmproc *vmp) ); diff --git a/servers/vm/rs.c b/servers/vm/rs.c index fd453eb40..dcda8c98b 100644 --- a/servers/vm/rs.c +++ b/servers/vm/rs.c @@ -27,8 +27,6 @@ #include "util.h" #include "region.h" -#define LU_DEBUG 0 - /*===========================================================================* * do_rs_set_priv * *===========================================================================*/ @@ -63,10 +61,7 @@ PUBLIC int do_rs_set_priv(message *m) PUBLIC int do_rs_update(message *m_ptr) { endpoint_t src_e, dst_e; - struct vmproc *src_vmp, *dst_vmp; - struct vmproc orig_src_vmproc, orig_dst_vmproc; - int src_p, dst_p, r; - struct vir_region *vr; + int r; src_e = m_ptr->VM_RS_SRC_ENDPT; dst_e = m_ptr->VM_RS_DST_ENDPT; @@ -77,71 +72,9 @@ PUBLIC int do_rs_update(message *m_ptr) return r; } - /* Lookup slots for source and destination process. */ - if(vm_isokendpt(src_e, &src_p) != OK) { - printf("do_rs_update: bad src endpoint %d\n", src_e); - return EINVAL; - } - src_vmp = &vmproc[src_p]; - if(vm_isokendpt(dst_e, &dst_p) != OK) { - printf("do_rs_update: bad dst endpoint %d\n", dst_e); - return EINVAL; - } - dst_vmp = &vmproc[dst_p]; - -#if LU_DEBUG - printf("do_rs_update: updating %d (%d, %d) into %d (%d, %d)\n", - src_vmp->vm_endpoint, src_p, src_vmp->vm_slot, - dst_vmp->vm_endpoint, dst_p, dst_vmp->vm_slot); - - printf("do_rs_update: map_printmap for source before updating:\n"); - map_printmap(src_vmp); - printf("do_rs_update: map_printmap for destination before updating:\n"); - map_printmap(dst_vmp); -#endif - - /* Save existing data. */ - orig_src_vmproc = *src_vmp; - orig_dst_vmproc = *dst_vmp; - - /* Swap slots. */ - *src_vmp = orig_dst_vmproc; - *dst_vmp = orig_src_vmproc; - - /* Preserve endpoints and slot numbers. */ - src_vmp->vm_endpoint = orig_src_vmproc.vm_endpoint; - src_vmp->vm_slot = orig_src_vmproc.vm_slot; - dst_vmp->vm_endpoint = orig_dst_vmproc.vm_endpoint; - dst_vmp->vm_slot = orig_dst_vmproc.vm_slot; - - /* Preserve vir_region's parents. */ - for(vr = src_vmp->vm_regions; vr; vr = vr->next) { - vr->parent = src_vmp; - } - for(vr = dst_vmp->vm_regions; vr; vr = vr->next) { - vr->parent = dst_vmp; - } - - /* Adjust page tables. */ - assert(src_vmp->vm_flags & VMF_HASPT); - assert(dst_vmp->vm_flags & VMF_HASPT); - pt_bind(&src_vmp->vm_pt, src_vmp); - pt_bind(&dst_vmp->vm_pt, dst_vmp); - if((r=sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) { - panic("do_rs_update: VMCTL_FLUSHTLB failed: %d", r); - } - -#if LU_DEBUG - printf("do_rs_update: updated %d (%d, %d) into %d (%d, %d)\n", - src_vmp->vm_endpoint, src_p, src_vmp->vm_slot, - dst_vmp->vm_endpoint, dst_p, dst_vmp->vm_slot); - - printf("do_rs_update: map_printmap for source after updating:\n"); - map_printmap(src_vmp); - printf("do_rs_update: map_printmap for destination after updating:\n"); - map_printmap(dst_vmp); -#endif + /* Do the update in VM now. */ + r = swap_proc(src_e, dst_e); - return OK; + return r; } diff --git a/servers/vm/utility.c b/servers/vm/utility.c index dfe77c264..2ca3e13c5 100644 --- a/servers/vm/utility.c +++ b/servers/vm/utility.c @@ -24,10 +24,12 @@ #include #include #include +#include #include "proto.h" #include "glo.h" #include "util.h" +#include "region.h" #include #include "kernel/const.h" @@ -35,6 +37,8 @@ #include "kernel/type.h" #include "kernel/proc.h" +#define SWAP_PROC_DEBUG 0 + /*===========================================================================* * get_mem_map * *===========================================================================*/ @@ -244,3 +248,81 @@ PUBLIC int do_info(message *m) (vir_bytes) vmp->vm_endpoint, ptr, size); } +/*===========================================================================* + * swap_proc * + *===========================================================================*/ +PUBLIC int swap_proc(endpoint_t src_e, endpoint_t dst_e) +{ + struct vmproc *src_vmp, *dst_vmp; + struct vmproc orig_src_vmproc, orig_dst_vmproc; + int src_p, dst_p, r; + struct vir_region *vr; + + /* Lookup slots for source and destination process. */ + if(vm_isokendpt(src_e, &src_p) != OK) { + printf("swap_proc: bad src endpoint %d\n", src_e); + return EINVAL; + } + src_vmp = &vmproc[src_p]; + if(vm_isokendpt(dst_e, &dst_p) != OK) { + printf("swap_proc: bad dst endpoint %d\n", dst_e); + return EINVAL; + } + dst_vmp = &vmproc[dst_p]; + +#if SWAP_PROC_DEBUG + printf("swap_proc: swapping %d (%d, %d) and %d (%d, %d)\n", + src_vmp->vm_endpoint, src_p, src_vmp->vm_slot, + dst_vmp->vm_endpoint, dst_p, dst_vmp->vm_slot); + + printf("swap_proc: map_printmap for source before swapping:\n"); + map_printmap(src_vmp); + printf("swap_proc: map_printmap for destination before swapping:\n"); + map_printmap(dst_vmp); +#endif + + /* Save existing data. */ + orig_src_vmproc = *src_vmp; + orig_dst_vmproc = *dst_vmp; + + /* Swap slots. */ + *src_vmp = orig_dst_vmproc; + *dst_vmp = orig_src_vmproc; + + /* Preserve endpoints and slot numbers. */ + src_vmp->vm_endpoint = orig_src_vmproc.vm_endpoint; + src_vmp->vm_slot = orig_src_vmproc.vm_slot; + dst_vmp->vm_endpoint = orig_dst_vmproc.vm_endpoint; + dst_vmp->vm_slot = orig_dst_vmproc.vm_slot; + + /* Preserve vir_region's parents. */ + for(vr = src_vmp->vm_regions; vr; vr = vr->next) { + vr->parent = src_vmp; + } + for(vr = dst_vmp->vm_regions; vr; vr = vr->next) { + vr->parent = dst_vmp; + } + + /* Adjust page tables. */ + assert(src_vmp->vm_flags & VMF_HASPT); + assert(dst_vmp->vm_flags & VMF_HASPT); + pt_bind(&src_vmp->vm_pt, src_vmp); + pt_bind(&dst_vmp->vm_pt, dst_vmp); + if((r=sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) { + panic("swap_proc: VMCTL_FLUSHTLB failed: %d", r); + } + +#if SWAP_PROC_DEBUG + printf("swap_proc: swapped %d (%d, %d) and %d (%d, %d)\n", + src_vmp->vm_endpoint, src_p, src_vmp->vm_slot, + dst_vmp->vm_endpoint, dst_p, dst_vmp->vm_slot); + + printf("swap_proc: map_printmap for source after swapping:\n"); + map_printmap(src_vmp); + printf("swap_proc: map_printmap for destination after swapping:\n"); + map_printmap(dst_vmp); +#endif + + return OK; +} +