From: Cristiano Giuffrida Date: Fri, 9 Jul 2010 18:29:04 +0000 (+0000) Subject: RS live update support. X-Git-Tag: v3.1.8~267 X-Git-Url: http://zhaoyanbai.com/repos/%22http:/www.isc.org/icons/man.named-checkzone.html?a=commitdiff_plain;h=8427d774b6f57de77c3d01e343665ec52017c881;p=minix.git RS live update support. --- diff --git a/include/minix/com.h b/include/minix/com.h index 4deb30c20..8f7b95934 100644 --- a/include/minix/com.h +++ b/include/minix/com.h @@ -531,6 +531,7 @@ #define SYS_PRIV_ADD_IRQ 7 /* Add IRQ */ #define SYS_PRIV_QUERY_MEM 8 /* Verify memory privilege. */ #define SYS_PRIV_UPDATE_SYS 9 /* Update a sys privilege structure. */ +#define SYS_PRIV_YIELD 10 /* Allow process to run and suspend */ /* Field names for SYS_SETGRANT */ #define SG_ADDR m2_p1 /* address */ diff --git a/include/minix/debug.h b/include/minix/debug.h index 808d73583..b7e2b23c4 100644 --- a/include/minix/debug.h +++ b/include/minix/debug.h @@ -9,5 +9,14 @@ } \ } +#define NOT_REACHABLE do { \ + panic("NOT_REACHABLE at %s:%d", __FILE__, __LINE__); \ + for(;;); \ +} while(0) + +#define NOT_IMPLEMENTED do { \ + panic("NOT_IMPLEMENTED at %s:%d", __FILE__, __LINE__); \ +} while(0) + #endif /* _MINIX_DEBUG_H */ diff --git a/include/minix/sef.h b/include/minix/sef.h index 70c504e78..3cb4041a2 100644 --- a/include/minix/sef.h +++ b/include/minix/sef.h @@ -35,26 +35,32 @@ typedef struct { /* Callback type definitions. */ typedef int(*sef_cb_init_t)(int type, sef_init_info_t *info); +typedef int(*sef_cb_init_response_t)(message *m_ptr); /* Callback registration helpers. */ _PROTOTYPE( void sef_setcb_init_fresh, (sef_cb_init_t cb)); _PROTOTYPE( void sef_setcb_init_lu, (sef_cb_init_t cb)); _PROTOTYPE( void sef_setcb_init_restart, (sef_cb_init_t cb)); +_PROTOTYPE( void sef_setcb_init_response, (sef_cb_init_response_t cb) ); /* Predefined callback implementations. */ _PROTOTYPE( int sef_cb_init_null, (int type, sef_init_info_t *info) ); +_PROTOTYPE( int sef_cb_init_response_null, (message *m_ptr) ); _PROTOTYPE( int sef_cb_init_fail, (int type, sef_init_info_t *info) ); _PROTOTYPE( int sef_cb_init_crash, (int type, sef_init_info_t *info) ); +_PROTOTYPE( int sef_cb_init_response_rs_reply, (message *m_ptr) ); /* Macros for predefined callback implementations. */ #define SEF_CB_INIT_FRESH_NULL sef_cb_init_null #define SEF_CB_INIT_LU_NULL sef_cb_init_null #define SEF_CB_INIT_RESTART_NULL sef_cb_init_null +#define SEF_CB_INIT_RESPONSE_NULL sef_cb_init_response_null #define SEF_CB_INIT_FRESH_DEFAULT sef_cb_init_null #define SEF_CB_INIT_LU_DEFAULT sef_cb_init_null #define SEF_CB_INIT_RESTART_DEFAULT sef_cb_init_null +#define SEF_CB_INIT_RESPONSE_DEFAULT sef_cb_init_response_rs_reply /* Init types. */ #define SEF_INIT_FRESH 0 /* init fresh */ @@ -121,6 +127,7 @@ typedef int(*sef_cb_lu_state_isvalid_t)(int); typedef void(*sef_cb_lu_state_changed_t)(int, int); typedef void(*sef_cb_lu_state_dump_t)(int); typedef int(*sef_cb_lu_state_save_t)(int); +typedef int(*sef_cb_lu_response_t)(message *m_ptr); /* Callback registration helpers. */ _PROTOTYPE( void sef_setcb_lu_prepare, (sef_cb_lu_prepare_t cb) ); @@ -128,6 +135,7 @@ _PROTOTYPE( void sef_setcb_lu_state_isvalid, (sef_cb_lu_state_isvalid_t cb) ); _PROTOTYPE( void sef_setcb_lu_state_changed, (sef_cb_lu_state_changed_t cb) ); _PROTOTYPE( void sef_setcb_lu_state_dump, (sef_cb_lu_state_dump_t cb) ); _PROTOTYPE( void sef_setcb_lu_state_save, (sef_cb_lu_state_save_t cb) ); +_PROTOTYPE( void sef_setcb_lu_response, (sef_cb_lu_response_t cb) ); /* Predefined callback implementations. */ _PROTOTYPE( int sef_cb_lu_prepare_null, (int state) ); @@ -135,12 +143,14 @@ _PROTOTYPE( int sef_cb_lu_state_isvalid_null, (int state) ); _PROTOTYPE( void sef_cb_lu_state_changed_null, (int old_state, int state) ); _PROTOTYPE( void sef_cb_lu_state_dump_null, (int state) ); _PROTOTYPE( int sef_cb_lu_state_save_null, (int state) ); +_PROTOTYPE( int sef_cb_lu_response_null, (message *m_ptr) ); _PROTOTYPE( int sef_cb_lu_prepare_always_ready, (int state) ); _PROTOTYPE( int sef_cb_lu_prepare_never_ready, (int state) ); _PROTOTYPE( int sef_cb_lu_prepare_crash, (int state) ); _PROTOTYPE( int sef_cb_lu_state_isvalid_standard, (int state) ); _PROTOTYPE( int sef_cb_lu_state_isvalid_workfree, (int state) ); +_PROTOTYPE( int sef_cb_lu_response_rs_reply, (message *m_ptr) ); /* Macros for predefined callback implementations. */ #define SEF_CB_LU_PREPARE_NULL sef_cb_lu_prepare_null @@ -148,12 +158,14 @@ _PROTOTYPE( int sef_cb_lu_state_isvalid_workfree, (int state) ); #define SEF_CB_LU_STATE_CHANGED_NULL sef_cb_lu_state_changed_null #define SEF_CB_LU_STATE_DUMP_NULL sef_cb_lu_state_dump_null #define SEF_CB_LU_STATE_SAVE_NULL sef_cb_lu_state_save_null +#define SEF_CB_LU_RESPONSE_NULL sef_cb_lu_response_null #define SEF_CB_LU_PREPARE_DEFAULT sef_cb_lu_prepare_null #define SEF_CB_LU_STATE_ISVALID_DEFAULT sef_cb_lu_state_isvalid_null #define SEF_CB_LU_STATE_CHANGED_DEFAULT sef_cb_lu_state_changed_null #define SEF_CB_LU_STATE_DUMP_DEFAULT sef_cb_lu_state_dump_null #define SEF_CB_LU_STATE_SAVE_DEFAULT sef_cb_lu_state_save_null +#define SEF_CB_LU_RESPONSE_DEFAULT sef_cb_lu_response_rs_reply /* Standard live update states. */ #define SEF_LU_STATE_NULL 0 /* null state */ diff --git a/kernel/debug.h b/kernel/debug.h index 78cce42d3..575e0b4ef 100644 --- a/kernel/debug.h +++ b/kernel/debug.h @@ -55,15 +55,6 @@ #define TRACE(code, statement) #endif -#define NOT_REACHABLE do { \ - panic("NOT_REACHABLE at %s:%d", __FILE__, __LINE__); \ - for(;;); \ -} while(0) - -#define NOT_IMPLEMENTED do { \ - panic("NOT_IMPLEMENTED at %s:%d", __FILE__, __LINE__); \ -} while(0) - #ifdef CONFIG_BOOT_VERBOSE #define BOOT_VERBOSE(x) x #else diff --git a/kernel/system/do_privctl.c b/kernel/system/do_privctl.c index a4f4b67b7..071917401 100644 --- a/kernel/system/do_privctl.c +++ b/kernel/system/do_privctl.c @@ -59,6 +59,15 @@ PUBLIC int do_privctl(struct proc * caller, message * m_ptr) RTS_UNSET(rp, RTS_NO_PRIV); return(OK); + case SYS_PRIV_YIELD: + /* Allow process to run and suspend the caller. */ + if (!RTS_ISSET(rp, RTS_NO_PRIV) || priv(rp)->s_proc_nr == NONE) { + return(EPERM); + } + RTS_SET(caller, RTS_NO_PRIV); + RTS_UNSET(rp, RTS_NO_PRIV); + return(OK); + case SYS_PRIV_DISALLOW: /* Disallow process from running. */ if (RTS_ISSET(rp, RTS_NO_PRIV)) return(EPERM); diff --git a/kernel/system/do_safecopy.c b/kernel/system/do_safecopy.c index 0a55ef4b5..cccaae8c7 100644 --- a/kernel/system/do_safecopy.c +++ b/kernel/system/do_safecopy.c @@ -27,7 +27,7 @@ FORWARD _PROTOTYPE(int safecopy, (struct proc *, endpoint_t, endpoint_t, cp_grant_id_t, int, int, size_t, vir_bytes, vir_bytes, int)); #define HASGRANTTABLE(gr) \ - (!RTS_ISSET(gr, RTS_NO_PRIV) && priv(gr) && priv(gr)->s_grant_table > 0) + (priv(gr) && priv(gr)->s_grant_table) /*===========================================================================* * verify_grant * @@ -67,7 +67,12 @@ endpoint_t *e_granter; /* new granter (magic grants) */ * priv. structure, or the grant table in the priv. structure * is too small for the grant, return EPERM. */ - if(!HASGRANTTABLE(granter_proc)) return EPERM; + if(!HASGRANTTABLE(granter_proc)) { + printf( + "grant verify failed: granter %d has no grant table\n", + granter); + return(EPERM); + } if(priv(granter_proc)->s_grant_entries <= grant) { printf( @@ -244,7 +249,11 @@ int access; /* CPF_READ for a copy from granter to grantee, CPF_WRITE /* See if there is a reasonable grant table. */ if(!(granter_p = endpoint_lookup(granter))) return EINVAL; - if(!HASGRANTTABLE(granter_p)) return EPERM; + if(!HASGRANTTABLE(granter_p)) { + printf( + "safecopy failed: granter %d has no grant table\n", granter); + return(EPERM); + } /* Decide who is src and who is dst. */ if(access & CPF_READ) { diff --git a/lib/libsys/sef.c b/lib/libsys/sef.c index a987e06e8..428b076fd 100644 --- a/lib/libsys/sef.c +++ b/lib/libsys/sef.c @@ -8,6 +8,7 @@ PUBLIC char sef_self_name[SEF_SELF_NAME_MAXLEN]; PUBLIC endpoint_t sef_self_endpoint; PUBLIC int sef_self_priv_flags; +PUBLIC int sef_self_first_receive_done; /* Debug. */ #define SEF_DEBUG_HEADER_MAXLEN 32 @@ -41,14 +42,16 @@ PUBLIC void sef_startup() /* SEF startup interface for system services. */ int r, status; endpoint_t old_endpoint; + int priv_flags; /* Get information about self. */ r = sys_whoami(&sef_self_endpoint, sef_self_name, SEF_SELF_NAME_MAXLEN, - &sef_self_priv_flags); + &priv_flags); if ( r != OK) { sef_self_endpoint = SELF; sprintf(sef_self_name, "%s", "Unknown"); } + sef_self_priv_flags = priv_flags; old_endpoint = NONE; /* RS may wake up with the wrong endpoint, perfom the update in that case. */ @@ -92,6 +95,10 @@ PUBLIC void sef_startup() } } #endif + + /* (Re)initialize SEF variables. */ + sef_self_first_receive_done = FALSE; + sef_self_priv_flags = priv_flags; } /*===========================================================================* @@ -112,6 +119,7 @@ PUBLIC int sef_receive_status(endpoint_t src, message *m_ptr, int *status_ptr) /* Receive and return in case of error. */ r = receive(src, m_ptr, &status); if(status_ptr) *status_ptr = status; + if(!sef_self_first_receive_done) sef_self_first_receive_done = TRUE; if(r != OK) { return r; } diff --git a/lib/libsys/sef_init.c b/lib/libsys/sef_init.c index 87aa73280..783b04ad0 100644 --- a/lib/libsys/sef_init.c +++ b/lib/libsys/sef_init.c @@ -8,10 +8,12 @@ PRIVATE struct sef_cbs { sef_cb_init_t sef_cb_init_fresh; sef_cb_init_t sef_cb_init_lu; sef_cb_init_t sef_cb_init_restart; + sef_cb_init_response_t sef_cb_init_response; } sef_cbs = { SEF_CB_INIT_FRESH_DEFAULT, SEF_CB_INIT_LU_DEFAULT, - SEF_CB_INIT_RESTART_DEFAULT + SEF_CB_INIT_RESTART_DEFAULT, + SEF_CB_INIT_RESPONSE_DEFAULT }; /* SEF Init prototypes for sef_startup(). */ @@ -31,7 +33,8 @@ EXTERN endpoint_t sef_self_priv_flags; PRIVATE int process_init(int type, sef_init_info_t *info) { /* Process initialization. */ - int r; + int r, result; + message m; /* Debug. */ #if SEF_INIT_DEBUG @@ -44,21 +47,26 @@ PRIVATE int process_init(int type, sef_init_info_t *info) /* Let the callback code handle the specific initialization type. */ switch(type) { case SEF_INIT_FRESH: - r = sef_cbs.sef_cb_init_fresh(type, info); + result = sef_cbs.sef_cb_init_fresh(type, info); break; case SEF_INIT_LU: - r = sef_cbs.sef_cb_init_lu(type, info); + result = sef_cbs.sef_cb_init_lu(type, info); break; case SEF_INIT_RESTART: - r = sef_cbs.sef_cb_init_restart(type, info); + result = sef_cbs.sef_cb_init_restart(type, info); break; default: /* Not a valid SEF init type. */ - r = EINVAL; + result = EINVAL; break; } + m.m_source = sef_self_endpoint; + m.m_type = RS_INIT; + m.RS_INIT_RESULT = result; + r = sef_cbs.sef_cb_init_response(&m); + return r; } @@ -109,10 +117,6 @@ PUBLIC int do_sef_init_request(message *m_ptr) /* Peform initialization. */ r = process_init(type, &info); - /* Report back to RS. */ - m_ptr->RS_INIT_RESULT = r; - r = sendrec(RS_PROC_NR, m_ptr); - return r; } @@ -143,6 +147,15 @@ PUBLIC void sef_setcb_init_restart(sef_cb_init_t cb) sef_cbs.sef_cb_init_restart = cb; } +/*===========================================================================* + * sef_setcb_init_response * + *===========================================================================*/ +PUBLIC void sef_setcb_init_response(sef_cb_init_response_t cb) +{ + assert(cb != NULL); + sef_cbs.sef_cb_init_response = cb; +} + /*===========================================================================* * sef_cb_init_null * *===========================================================================*/ @@ -152,6 +165,14 @@ PUBLIC int sef_cb_init_null(int UNUSED(type), return OK; } +/*===========================================================================* + * sef_cb_init_response_null * + *===========================================================================*/ +PUBLIC int sef_cb_init_response_null(message * UNUSED(m_ptr)) +{ + return ENOSYS; +} + /*===========================================================================* * sef_cb_init_fail * *===========================================================================*/ @@ -170,3 +191,16 @@ PUBLIC int sef_cb_init_crash(int UNUSED(type), sef_init_info_t *UNUSED(info)) return OK; } +/*===========================================================================* + * sef_cb_init_response_rs_reply * + *===========================================================================*/ +PUBLIC int sef_cb_init_response_rs_reply(message *m_ptr) +{ + int r; + + /* Inform RS that we completed initialization with the given result. */ + r = sendrec(RS_PROC_NR, m_ptr); + + return r; +} + diff --git a/lib/libsys/sef_liveupdate.c b/lib/libsys/sef_liveupdate.c index cf2a312de..18cf5bc8c 100644 --- a/lib/libsys/sef_liveupdate.c +++ b/lib/libsys/sef_liveupdate.c @@ -3,7 +3,7 @@ #include /* SEF Live update variables. */ -PRIVATE int sef_lu_state = SEF_LU_STATE_NULL; +PRIVATE int sef_lu_state; /* SEF Live update callbacks. */ PRIVATE struct sef_cbs { @@ -12,12 +12,14 @@ PRIVATE struct sef_cbs { sef_cb_lu_state_changed_t sef_cb_lu_state_changed; sef_cb_lu_state_dump_t sef_cb_lu_state_dump; sef_cb_lu_state_save_t sef_cb_lu_state_save; + sef_cb_lu_response_t sef_cb_lu_response; } sef_cbs = { SEF_CB_LU_PREPARE_DEFAULT, SEF_CB_LU_STATE_ISVALID_DEFAULT, SEF_CB_LU_STATE_CHANGED_DEFAULT, SEF_CB_LU_STATE_DUMP_DEFAULT, SEF_CB_LU_STATE_SAVE_DEFAULT, + SEF_CB_LU_RESPONSE_DEFAULT }; /* SEF Live update prototypes for sef_receive(). */ @@ -31,6 +33,10 @@ PRIVATE _PROTOTYPE( void sef_lu_ready, (int result) ); EXTERN _PROTOTYPE( char* sef_debug_header, (void) ); PRIVATE int sef_lu_debug_cycle = 0; +/* Information about SELF. */ +EXTERN endpoint_t sef_self_endpoint; +EXTERN int sef_self_first_receive_done; + /*===========================================================================* * do_sef_lu_before_receive * *===========================================================================*/ @@ -39,6 +45,11 @@ PUBLIC void do_sef_lu_before_receive() /* Handle SEF Live update before receive events. */ int r; + /* Initialize on first receive. */ + if(!sef_self_first_receive_done) { + sef_lu_state = SEF_LU_STATE_NULL; + } + /* Nothing to do if we are not preparing for a live update. */ if(sef_lu_state == SEF_LU_STATE_NULL) { return; @@ -113,7 +124,7 @@ PUBLIC int do_sef_lu_request(message *m_ptr) PRIVATE void sef_lu_ready(int result) { message m; - int old_state, rs_result, r; + int old_state, r; #if SEF_LU_DEBUG sef_lu_debug_begin(); @@ -134,22 +145,22 @@ PRIVATE void sef_lu_ready(int result) } } - /* Inform RS that we're ready with the given result. */ + /* Let the callback code produce a live update response and block. + * We should get beyond this point only if either result is an error or + * something else goes wrong in the callback code. + */ + m.m_source = sef_self_endpoint; m.m_type = RS_LU_PREPARE; m.RS_LU_STATE = sef_lu_state; m.RS_LU_RESULT = result; - r = sendrec(RS_PROC_NR, &m); - if ( r != OK) { - panic("sendrec failed: %d", r); - } + r = sef_cbs.sef_cb_lu_response(&m); #if SEF_LU_DEBUG - rs_result = m.m_type == RS_LU_PREPARE ? EINTR : m.m_type; sef_lu_debug_begin(); sef_lu_dprint("%s, cycle=%d. The %s aborted the update with result %d!\n", sef_debug_header(), sef_lu_debug_cycle, (result == OK ? "server" : "client"), - (result == OK ? rs_result : result)); /* EINTR if update was canceled. */ + (result == OK ? r : result)); /* EINTR if update was canceled. */ sef_lu_debug_end(); #endif @@ -208,6 +219,15 @@ PUBLIC void sef_setcb_lu_state_save(sef_cb_lu_state_save_t cb) sef_cbs.sef_cb_lu_state_save = cb; } +/*===========================================================================* + * sef_setcb_lu_response * + *===========================================================================*/ +PUBLIC void sef_setcb_lu_response(sef_cb_lu_response_t cb) +{ + assert(cb != NULL); + sef_cbs.sef_cb_lu_response = cb; +} + /*===========================================================================* * sef_cb_lu_prepare_null * *===========================================================================*/ @@ -248,6 +268,14 @@ PUBLIC int sef_cb_lu_state_save_null(int UNUSED(result)) return OK; } +/*===========================================================================* + * sef_cb_lu_response_null * + *===========================================================================*/ +PUBLIC int sef_cb_lu_response_null(message * UNUSED(m_ptr)) +{ + return ENOSYS; +} + /*===========================================================================* * sef_cb_lu_prepare_always_ready * *===========================================================================*/ @@ -297,3 +325,19 @@ PUBLIC int sef_cb_lu_state_isvalid_workfree(int state) return (state == SEF_LU_STATE_WORK_FREE); } +/*===========================================================================* + * sef_cb_lu_response_rs_reply * + *===========================================================================*/ +PUBLIC int sef_cb_lu_response_rs_reply(message *m_ptr) +{ + int r; + + /* Inform RS that we're ready with the given result. */ + r = sendrec(RS_PROC_NR, m_ptr); + if ( r != OK) { + return r; + } + + return m_ptr->m_type == RS_LU_PREPARE ? EINTR : m_ptr->m_type; +} + diff --git a/servers/rs/const.h b/servers/rs/const.h index 485d7dbf7..ce05735a7 100644 --- a/servers/rs/const.h +++ b/servers/rs/const.h @@ -97,5 +97,9 @@ #define RS_DONTREPLY 0 #define RS_REPLY 1 +/* Swap flags. */ +#define RS_DONTSWAP 0 +#define RS_SWAP 1 + #endif /* RS_CONST_H */ diff --git a/servers/rs/main.c b/servers/rs/main.c index 69c7d27cd..7aa41d8e2 100644 --- a/servers/rs/main.c +++ b/servers/rs/main.c @@ -126,7 +126,7 @@ PUBLIC int main(void) /* Finally send reply message, unless disabled. */ if (result != EDONTREPLY) { m.m_type = result; - reply(who_e, &m); + reply(who_e, NULL, &m); } } } @@ -138,9 +138,13 @@ PUBLIC int main(void) PRIVATE void sef_local_startup() { /* Register init callbacks. */ + sef_setcb_init_response(do_init_ready); sef_setcb_init_fresh(sef_cb_init_fresh); sef_setcb_init_restart(sef_cb_init_fail); + /* Register live update callbacks. */ + sef_setcb_lu_response(do_upd_ready); + /* Register signal callbacks. */ sef_setcb_signal_handler(sef_cb_signal_handler); sef_setcb_signal_manager(sef_cb_signal_manager); @@ -349,11 +353,6 @@ PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info) continue; } - /* Ignore RS. */ - if(boot_image_priv->endpoint == RS_PROC_NR) { - continue; - } - /* Kernel-scheduled processes first */ if ((boot_image_priv->sched == KERNEL) ? usersched : !usersched) { continue; @@ -363,14 +362,21 @@ PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info) rp = &rproc[boot_image_priv - boot_image_priv_table]; rpub = rp->r_pub; - /* Allow the service to run. */ - if ((s = sys_privctl(rpub->endpoint, SYS_PRIV_ALLOW, NULL)) != OK) { - panic("unable to initialize privileges: %d", s); + /* RS is already running as we speak. */ + if(boot_image_priv->endpoint == RS_PROC_NR) { + if ((s = init_service(rp, SEF_INIT_FRESH)) != OK) { + panic("unable to initialize RS: %d", s); + } + continue; } + /* Allow the service to run. */ if ((s = sched_init_proc(rp)) != OK) { panic("unable to initialize scheduling: %d", s); } + if ((s = sys_privctl(rpub->endpoint, SYS_PRIV_ALLOW, NULL)) != OK) { + panic("unable to initialize privileges: %d", s); + } /* Initialize service. We assume every service will always get * back to us here at boot time. @@ -456,14 +462,8 @@ PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info) if(pid == 0) { /* New RS instance running. */ - /* Synchronize with the old instance. */ - s = sef_receive(RS_PROC_NR, &m); - if(s != OK) { - panic("sef_receive failed: %d", s); - } - /* Live update the old instance into the new one. */ - s = update_service(&rp, &replica_rp); + s = update_service(&rp, &replica_rp, RS_SWAP); if(s != OK) { panic("unable to live update RS: %d", s); } @@ -485,26 +485,18 @@ PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info) /* Old RS instance running. */ /* Set up privileges for the new instance and let it run. */ - set_sys_bit(replica_rp->r_priv.s_ipc_to, static_priv_id(RS_PROC_NR)); s = sys_privctl(replica_endpoint, SYS_PRIV_SET_SYS, &(replica_rp->r_priv)); if(s != OK) { panic("unable to set privileges for the new RS instance: %d", s); } - s = sys_privctl(replica_endpoint, SYS_PRIV_ALLOW, NULL); - if(s != OK) { - panic("unable to let the new RS instance run: %d", s); - } if ((s = sched_init_proc(replica_rp)) != OK) { panic("unable to initialize RS replica scheduling: %d", s); } - - /* Synchronize with the new instance and go to sleep. */ - m.m_type = RS_INIT; - s = sendrec(replica_endpoint, &m); + s = sys_privctl(replica_endpoint, SYS_PRIV_YIELD, NULL); if(s != OK) { - panic("sendrec failed: %d", s); + panic("unable to yield control to the new RS instance: %d", s); } - /* Not reachable */ + NOT_REACHABLE; } return(OK); @@ -690,7 +682,7 @@ endpoint_t endpoint; /* Send a reply to unblock the service. */ m.m_type = OK; - reply(m.m_source, &m); + reply(m.m_source, rp, &m); /* Mark the slot as no longer initializing. */ rp->r_flags &= ~RS_INITIALIZING; diff --git a/servers/rs/manager.c b/servers/rs/manager.c index d3596e1c9..691640e58 100644 --- a/servers/rs/manager.c +++ b/servers/rs/manager.c @@ -100,11 +100,6 @@ struct rproc *rp; return EPERM; } - /* Disallow RS_UPDATE for RS. */ - if(rpub->endpoint == RS_PROC_NR) { - if(call == RS_UPDATE) return EPERM; - } - /* Disallow the call if another call is in progress for the service. */ if(rp->r_flags & RS_LATEREPLY || rp->r_flags & RS_INITIALIZING) { return EBUSY; @@ -278,7 +273,14 @@ PUBLIC void update_period(message *m_ptr) /* Prepare cancel request. */ m.m_type = RS_LU_PREPARE; m.RS_LU_STATE = SEF_LU_STATE_NULL; - asynsend(rpub->endpoint, &m); + if(rpub->endpoint == RS_PROC_NR) { + /* RS can process the request directly. */ + do_sef_lu_request(&m); + } + else { + /* Send request message to the system service. */ + asynsend(rpub->endpoint, &m); + } } } @@ -327,10 +329,8 @@ PUBLIC void end_update(int result, int reply_flag) surviving_rp->r_flags &= ~RS_UPDATING; if(reply_flag == RS_REPLY) { message m; - if(rs_verbose) - printf("RS: %s being replied to\n", srv_to_string(surviving_rp)); m.m_type = result; - reply(surviving_rp->r_pub->endpoint, &m); + reply(surviving_rp->r_pub->endpoint, surviving_rp, &m); } /* Cleanup the version that has to die out. */ @@ -382,7 +382,7 @@ struct rproc *rp; /* RS should simply exit() directly. */ if(rpub->endpoint == RS_PROC_NR) { - exit(0); + exit(1); } return sys_kill(rpub->endpoint, SIGKILL); @@ -437,7 +437,8 @@ struct rproc *rp; rpub = rp->r_pub; use_copy= (rpub->sys_flags & SF_USE_COPY); - has_replica= (rp->r_prev_rp && !(rp->r_prev_rp->r_flags & RS_TERMINATED)); + has_replica= (rp->r_old_rp + || (rp->r_prev_rp && !(rp->r_prev_rp->r_flags & RS_TERMINATED))); /* Do we need an existing replica to create the service? */ if(!has_replica && (rpub->sys_flags & SF_NEED_REPL)) { @@ -623,20 +624,15 @@ int instance_flag; rs_flags = (ROOT_SYS_PROC | RST_SYS_PROC); if((replica_rp->r_priv.s_flags & rs_flags) == rs_flags) { rs_rp = rproc_ptr[_ENDPOINT_P(RS_PROC_NR)]; - if(rs_verbose) - printf("RS: %s gets a backup signal manager\n", srv_to_string(rs_rp)); - /* Update privilege structures. */ - rs_rp->r_priv.s_bak_sig_mgr = replica_rpub->endpoint; - replica_rp->r_priv.s_sig_mgr = SELF; - r = sys_privctl(RS_PROC_NR, SYS_PRIV_UPDATE_SYS, &rs_rp->r_priv); + /* Update signal managers. */ + r = update_sig_mgrs(rs_rp, SELF, replica_rpub->endpoint); if(r == OK) { - r = sys_privctl(replica_rpub->endpoint, SYS_PRIV_UPDATE_SYS, - &replica_rp->r_priv); + r = update_sig_mgrs(replica_rp, SELF, NONE); } if(r != OK) { *rp_link = NULL; - return kill_service(replica_rp, "sys_privctl call failed", r); + return kill_service(replica_rp, "update_sig_mgrs failed", r); } } @@ -829,9 +825,10 @@ PUBLIC void stop_service(struct rproc *rp,int how) /*===========================================================================* * update_service * *===========================================================================*/ -PUBLIC int update_service(src_rpp, dst_rpp) +PUBLIC int update_service(src_rpp, dst_rpp, swap_flag) struct rproc **src_rpp; struct rproc **dst_rpp; +int swap_flag; { /* Update an existing service. */ int r; @@ -851,10 +848,11 @@ struct rproc **dst_rpp; printf("RS: %s updating into %s\n", srv_to_string(src_rp), srv_to_string(dst_rp)); - /* Swap the slots of the two processes. */ - r = srv_update(src_rpub->endpoint, dst_rpub->endpoint); - if(r != OK) { - return r; + /* Swap the slots of the two processes when asked to. */ + if(swap_flag == RS_SWAP) { + if((r = srv_update(src_rpub->endpoint, dst_rpub->endpoint)) != OK) { + return r; + } } /* Swap slots here as well. */ @@ -933,7 +931,8 @@ PUBLIC void terminate_service(struct rproc *rp) new_rp = rp; old_rp = new_rp->r_old_rp; new_rp->r_flags &= ~RS_INITIALIZING; - update_service(&new_rp, &old_rp); /* can't fail */ + r = update_service(&new_rp, &old_rp, RS_SWAP); + assert(r == OK); /* can't fail */ end_update(ERESTART, RS_REPLY); return; } @@ -1072,7 +1071,7 @@ PUBLIC void restart_service(struct rproc *rp) replica_rp = rp->r_next_rp; /* Update the service into the replica. */ - r = update_service(&rp, &replica_rp); + r = update_service(&rp, &replica_rp, RS_SWAP); if(r != OK) { kill_service(rp, "unable to update into new replica", r); return; @@ -1085,9 +1084,6 @@ PUBLIC void restart_service(struct rproc *rp) return; } - /* Increase the number of restarts. */ - replica_rp->r_restarts += 1; - if(rs_verbose) printf("RS: %s restarted into %s\n", srv_to_string(rp), srv_to_string(replica_rp)); @@ -1129,7 +1125,7 @@ struct rproc *rp; struct rproc ***rps; int *length; { -/* Retrieve all the service instances of a give service. */ +/* Retrieve all the service instances of a given service. */ static struct rproc *instances[5]; int nr_instances; @@ -1573,6 +1569,10 @@ struct rproc **clone_rpp; if(clone_rpub->sys_flags & SF_USE_COPY) { share_exec(clone_rp, rp); /* share exec image */ } + clone_rp->r_old_rp = NULL; /* no old version yet */ + clone_rp->r_new_rp = NULL; /* no new version yet */ + clone_rp->r_prev_rp = NULL; /* no prev replica yet */ + clone_rp->r_next_rp = NULL; /* no next replica yet */ /* Force dynamic privilege id. */ clone_rp->r_priv.s_flags |= DYN_PRIV_ID; diff --git a/servers/rs/proto.h b/servers/rs/proto.h index 1d2bb0874..f7f5fe447 100644 --- a/servers/rs/proto.h +++ b/servers/rs/proto.h @@ -58,7 +58,7 @@ _PROTOTYPE( int run_service, (struct rproc *rp, int init_type) ); _PROTOTYPE( int start_service, (struct rproc *rp) ); _PROTOTYPE( void stop_service, (struct rproc *rp,int how) ); _PROTOTYPE( int update_service, (struct rproc **src_rpp, - struct rproc **dst_rpp) ); + struct rproc **dst_rpp, int swap_flag) ); _PROTOTYPE( void activate_service, (struct rproc *rp, struct rproc *ex_rp) ); _PROTOTYPE( void terminate_service, (struct rproc *rp)); _PROTOTYPE( void restart_service, (struct rproc *rp) ); @@ -92,10 +92,12 @@ _PROTOTYPE( int init_service, (struct rproc *rp, int type)); _PROTOTYPE(void fill_call_mask, ( int *calls, int tot_nr_calls, bitchunk_t *call_mask, int call_base, int is_init)); _PROTOTYPE( char* srv_to_string, (struct rproc *rp)); -_PROTOTYPE( void reply, (endpoint_t who, message *m_ptr)); +_PROTOTYPE( void reply, (endpoint_t who, struct rproc *rp, message *m_ptr)); _PROTOTYPE( void late_reply, (struct rproc *rp, int code)); _PROTOTYPE( int rs_isokendpt, (endpoint_t endpoint, int *proc)); _PROTOTYPE( int sched_init_proc, (struct rproc *rp)); +_PROTOTYPE( int update_sig_mgrs, (struct rproc *rp, endpoint_t sig_mgr, + endpoint_t bak_sig_mgr)); /* error.c */ _PROTOTYPE( char * init_strerror, (int errnum) ); diff --git a/servers/rs/request.c b/servers/rs/request.c index 24d8fb72d..a0dcac369 100755 --- a/servers/rs/request.c +++ b/servers/rs/request.c @@ -363,22 +363,30 @@ PUBLIC int do_shutdown(message *m_ptr) PUBLIC int do_init_ready(message *m_ptr) { int who_p; + message m; struct rproc *rp; struct rprocpub *rpub; - int result; + int result, is_rs; int r; + is_rs = (m_ptr->m_source == RS_PROC_NR); who_p = _ENDPOINT_P(m_ptr->m_source); + result = m_ptr->RS_INIT_RESULT; + + /* Check for RS failing initialization first. */ + if(is_rs && result != OK) { + return result; + } + rp = rproc_ptr[who_p]; rpub = rp->r_pub; - result = m_ptr->RS_INIT_RESULT; /* Make sure the originating service was requested to initialize. */ if(! (rp->r_flags & RS_INITIALIZING) ) { if(rs_verbose) printf("RS: do_init_ready: got unexpected init ready msg from %d\n", m_ptr->m_source); - return(EDONTREPLY); + return EINVAL; } /* Check if something went wrong and the service failed to init. @@ -389,7 +397,7 @@ PUBLIC int do_init_ready(message *m_ptr) printf("RS: %s initialization error: %s\n", srv_to_string(rp), init_strerror(result)); crash_service(rp); /* simulate crash */ - return(EDONTREPLY); + return EDONTREPLY; } /* Mark the slot as no longer initializing. */ @@ -397,6 +405,10 @@ PUBLIC int do_init_ready(message *m_ptr) rp->r_check_tm = 0; getuptime(&rp->r_alive_tm); + /* Reply and unblock the service before doing anything else. */ + m.m_type = OK; + reply(rpub->endpoint, rp, &m); + /* See if a late reply has to be sent. */ late_reply(rp, OK); @@ -417,6 +429,7 @@ PUBLIC int do_init_ready(message *m_ptr) if(rp->r_prev_rp) { cleanup_service(rp->r_prev_rp); rp->r_prev_rp = NULL; + rp->r_restarts += 1; if(rs_verbose) printf("RS: %s completed restart\n", srv_to_string(rp)); @@ -429,7 +442,7 @@ PUBLIC int do_init_ready(message *m_ptr) } } - return(OK); + return is_rs ? OK : EDONTREPLY; /* return what the caller expects */ } /*===========================================================================* @@ -563,19 +576,42 @@ PUBLIC int do_update(message *m_ptr) if(rs_verbose) printf("RS: %s updating\n", srv_to_string(rp)); - /* Request to update. */ - m_ptr->m_type = RS_LU_PREPARE; - asynsend3(rpub->endpoint, m_ptr, AMF_NOREPLY); + /* If RS is updating, set up signal managers for the new instance. + * The current RS instance must be made the backup signal manager to + * support rollback in case of a crash during initialization. + */ + if(rp->r_priv.s_flags & ROOT_SYS_PROC) { + new_rp = rp->r_new_rp; + + s = update_sig_mgrs(new_rp, SELF, new_rp->r_pub->endpoint); + if(s != OK) { + cleanup_service(new_rp); + return s; + } + } - /* Unblock the caller immediately if requested. */ if(noblock) { - return OK; + /* Unblock the caller immediately if requested. */ + m_ptr->m_type = OK; + reply(m_ptr->m_source, NULL, m_ptr); + } + else { + /* Send a reply when the new version completes initialization. */ + rp->r_flags |= RS_LATEREPLY; + rp->r_caller = m_ptr->m_source; + rp->r_caller_request = RS_UPDATE; } - /* Late reply - send a reply when the new version completes initialization. */ - rp->r_flags |= RS_LATEREPLY; - rp->r_caller = m_ptr->m_source; - rp->r_caller_request = RS_UPDATE; + /* Request to update. */ + m_ptr->m_type = RS_LU_PREPARE; + if(rpub->endpoint == RS_PROC_NR) { + /* RS can process the request directly. */ + do_sef_lu_request(m_ptr); + } + else { + /* Send request message to the system service. */ + asynsend3(rpub->endpoint, m_ptr, AMF_NOREPLY); + } return EDONTREPLY; } @@ -588,18 +624,20 @@ PUBLIC int do_upd_ready(message *m_ptr) struct rproc *rp, *old_rp, *new_rp; int who_p; int result; + int is_rs; int r; who_p = _ENDPOINT_P(m_ptr->m_source); rp = rproc_ptr[who_p]; result = m_ptr->RS_LU_RESULT; + is_rs = (m_ptr->m_source == RS_PROC_NR); /* Make sure the originating service was requested to prepare for update. */ if(rp != rupdate.rp) { if(rs_verbose) printf("RS: do_upd_ready: got unexpected update ready msg from %d\n", m_ptr->m_source); - return(EINVAL); + return EINVAL; } /* Check if something went wrong and the service failed to prepare @@ -610,13 +648,31 @@ PUBLIC int do_upd_ready(message *m_ptr) end_update(result, RS_REPLY); printf("RS: update failed: %s\n", lu_strerror(result)); - return EDONTREPLY; + return is_rs ? result : EDONTREPLY; /* return what the caller expects */ } - /* Perform the update. */ old_rp = rp; new_rp = rp->r_new_rp; - r = update_service(&old_rp, &new_rp); + + /* If RS itself is updating, yield control to the new version immediately. */ + if(is_rs) { + r = init_service(new_rp, SEF_INIT_LU); + if(r != OK) { + panic("unable to initialize the new RS instance: %d", r); + } + r = sys_privctl(new_rp->r_pub->endpoint, SYS_PRIV_YIELD, NULL); + if(r != OK) { + panic("unable to yield control to the new RS instance: %d", r); + } + /* If we get this far, the new version failed to initialize. Rollback. */ + r = srv_update(RS_PROC_NR, new_rp->r_pub->endpoint); + assert(r == OK); /* can't fail */ + end_update(ERESTART, RS_REPLY); + return ERESTART; + } + + /* Perform the update. */ + r = update_service(&old_rp, &new_rp, RS_SWAP); if(r != OK) { end_update(r, RS_REPLY); printf("RS: update failed: error %d\n", r); @@ -626,7 +682,9 @@ PUBLIC int do_upd_ready(message *m_ptr) /* Let the new version run. */ r = run_service(new_rp, SEF_INIT_LU); if(r != OK) { - update_service(&new_rp, &old_rp); /* rollback, can't fail. */ + /* Something went wrong. Rollback. */ + r = update_service(&new_rp, &old_rp, RS_SWAP); + assert(r == OK); /* can't fail */ end_update(r, RS_REPLY); printf("RS: update failed: error %d\n", r); return EDONTREPLY; @@ -752,13 +810,13 @@ PUBLIC void do_sigchld() * free slots for all the service instances and send a late * reply if necessary. */ - get_service_instances(rp, &rps, &nr_rps); - for(i=0;ir_flags |= RS_INITIALIZING; /* now initializing */ rp->r_check_tm = rp->r_alive_tm + 1; /* expect reply within period */ + /* In case of RS initialization, we are done. */ + if(rp->r_priv.s_flags & ROOT_SYS_PROC) { + return OK; + } + /* Determine the old endpoint if this is a new instance. */ old_endpoint = NONE; if(rp->r_old_rp) { @@ -129,12 +134,21 @@ struct rproc *rp; /* pointer to process slot */ /*===========================================================================* * reply * *===========================================================================*/ -PUBLIC void reply(who, m_ptr) +PUBLIC void reply(who, rp, m_ptr) endpoint_t who; /* replyee */ +struct rproc *rp; /* replyee slot (if any) */ message *m_ptr; /* reply message */ { int r; /* send status */ + /* No need to actually reply to RS */ + if(who == RS_PROC_NR) { + return; + } + + if(rs_verbose && rp) + printf("RS: %s being replied to\n", srv_to_string(rp)); + r = sendnb(who, m_ptr); /* send the message */ if (r != OK) printf("RS: unable to send reply to %d: %d\n", who, r); @@ -159,7 +173,7 @@ int code; /* status code */ printf("RS: %s late reply %d to %d for request %d\n", srv_to_string(rp), code, rp->r_caller, rp->r_caller_request); - reply(rp->r_caller, &m); + reply(rp->r_caller, NULL, &m); rp->r_flags &= ~RS_LATEREPLY; } } @@ -212,3 +226,41 @@ PUBLIC int sched_init_proc(struct rproc *rp) return OK; } + +/*===========================================================================* + * update_sig_mgrs * + *===========================================================================*/ +PUBLIC int update_sig_mgrs(struct rproc *rp, endpoint_t sig_mgr, + endpoint_t bak_sig_mgr) +{ + int r; + struct rprocpub *rpub; + + rpub = rp->r_pub; + + if(rs_verbose) + printf("RS: %s updates signal managers: %d%s / %d\n", srv_to_string(rp), + sig_mgr == SELF ? rpub->endpoint : sig_mgr, + sig_mgr == SELF ? "(SELF)" : "", + bak_sig_mgr == NONE ? -1 : bak_sig_mgr); + + /* Synch privilege structure with the kernel. */ + if ((r = sys_getpriv(&rp->r_priv, rpub->endpoint)) != OK) { + printf("unable to synch privilege structure: %d", r); + return r; + } + + /* Set signal managers. */ + rp->r_priv.s_sig_mgr = sig_mgr; + rp->r_priv.s_bak_sig_mgr = bak_sig_mgr; + + /* Update privilege structure. */ + r = sys_privctl(rpub->endpoint, SYS_PRIV_UPDATE_SYS, &rp->r_priv); + if(r != OK) { + printf("unable to update privilege structure: %d", r); + return r; + } + + return OK; +} +