From af424b4e433027ff58179145634df3d49ae2cb79 Mon Sep 17 00:00:00 2001 From: Cristiano Giuffrida Date: Tue, 20 Jul 2010 01:50:33 +0000 Subject: [PATCH] Don't always assume NOPINGREPLY as a failure in RS --- servers/rs/manager.c | 26 ++++++++++++++++++++++++++ servers/rs/proto.h | 1 + servers/rs/request.c | 17 ++++++++++++++--- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/servers/rs/manager.c b/servers/rs/manager.c index 374521304..9ffa357c0 100644 --- a/servers/rs/manager.c +++ b/servers/rs/manager.c @@ -1730,6 +1730,32 @@ PUBLIC struct rproc* lookup_slot_by_dev_nr(dev_t dev_nr) return NULL; } +/*===========================================================================* + * lookup_slot_by_flags * + *===========================================================================*/ +PUBLIC struct rproc* lookup_slot_by_flags(int flags) +{ +/* Lookup a service slot matching the given flags. */ + int slot_nr; + struct rproc *rp; + + if(!flags) { + return NULL; + } + + for (slot_nr = 0; slot_nr < NR_SYS_PROCS; slot_nr++) { + rp = &rproc[slot_nr]; + if (!(rp->r_flags & RS_IN_USE)) { + continue; + } + if (rp->r_flags & flags) { + return rp; + } + } + + return NULL; +} + /*===========================================================================* * alloc_slot * *===========================================================================*/ diff --git a/servers/rs/proto.h b/servers/rs/proto.h index f7f5fe447..ccadb4724 100644 --- a/servers/rs/proto.h +++ b/servers/rs/proto.h @@ -78,6 +78,7 @@ _PROTOTYPE( void swap_slot, (struct rproc **src_rpp, struct rproc **dst_rpp) ); _PROTOTYPE( struct rproc* lookup_slot_by_label, (char *label) ); _PROTOTYPE( struct rproc* lookup_slot_by_pid, (pid_t pid) ); _PROTOTYPE( struct rproc* lookup_slot_by_dev_nr, (dev_t dev_nr) ); +_PROTOTYPE( struct rproc* lookup_slot_by_flags, (int flags) ); _PROTOTYPE( int alloc_slot, (struct rproc **rpp) ); _PROTOTYPE( void free_slot, (struct rproc *rp) ); _PROTOTYPE( char *get_next_label, (char *ptr, char *label, char *caller_label)); diff --git a/servers/rs/request.c b/servers/rs/request.c index 3b2e78643..0282a3510 100755 --- a/servers/rs/request.c +++ b/servers/rs/request.c @@ -781,14 +781,25 @@ message *m_ptr; /* Check if an answer to a status request is still pending. If * the service didn't respond within time, kill it to simulate * a crash. The failure will be detected and the service will - * be restarted automatically. + * be restarted automatically. Give the service a free pass if + * somebody is initializing. There may be some weird dependencies + * if another service is, for example, restarting at the same + * time. */ if (rp->r_alive_tm < rp->r_check_tm) { if (now - rp->r_alive_tm > 2*period && rp->r_pid > 0 && !(rp->r_flags & RS_NOPINGREPLY)) { if(rs_verbose) - printf("RS: %s reported late\n", - srv_to_string(rp)); + printf("RS: %s reported late\n", srv_to_string(rp)); + if(lookup_slot_by_flags(RS_INITIALIZING)) { + /* Skip for now. */ + if(rs_verbose) + printf("RS: %s gets a free pass\n", + srv_to_string(rp)); + rp->r_alive_tm = now; + rp->r_check_tm = now+1; + continue; + } rp->r_flags |= RS_NOPINGREPLY; crash_service(rp); /* simulate crash */ } -- 2.44.0