From: David van Moolenbroek Date: Sat, 18 Jul 2015 11:04:34 +0000 (+0200) Subject: SEF: query VM about holes during state transfer X-Git-Url: http://zhaoyanbai.com/repos/?a=commitdiff_plain;h=2867e60addd3b3319adf332c8112dff145e112c8;p=minix.git SEF: query VM about holes during state transfer The 'memory' service has holes in its data section, which causes problems during state transfer. Since VM cannot handle page faults during a multicomponent-with-VM live update, the state transfer must ensure that no page faults occur during copying. Therefore, we now query VM about the regions to copy, thus skipping holes. While the solution is not ideal, it is sufficiently generic that it can be used for the data section state transfer of all processes, and possibly for state transfer of other regions in the future as well. Change-Id: I2a71383a18643ebd36956c396fbd22c8fd137202 --- diff --git a/minix/drivers/storage/memory/memory.c b/minix/drivers/storage/memory/memory.c index 5c4c7f138..2dc6b7f51 100644 --- a/minix/drivers/storage/memory/memory.c +++ b/minix/drivers/storage/memory/memory.c @@ -114,8 +114,8 @@ static void sef_local_startup() { /* Register init callbacks. */ sef_setcb_init_fresh(sef_cb_init_fresh); - sef_setcb_init_lu(sef_cb_init_fresh); - sef_setcb_init_restart(sef_cb_init_fresh); + sef_setcb_init_lu(SEF_CB_INIT_LU_DEFAULT); + sef_setcb_init_restart(SEF_CB_INIT_RESTART_STATEFUL); /* Let SEF perform startup. */ sef_startup(); diff --git a/minix/include/minix/ipc_filter.h b/minix/include/minix/ipc_filter.h index 62d787a2d..ecac829bb 100644 --- a/minix/include/minix/ipc_filter.h +++ b/minix/include/minix/ipc_filter.h @@ -12,7 +12,7 @@ #define ANY_TSK _ENDPOINT(3, _ENDPOINT_P(ANY)) /* IPC filter constants. */ -#define IPCF_MAX_ELEMENTS NR_SYS_PROCS +#define IPCF_MAX_ELEMENTS (NR_SYS_PROCS * 2) /* IPC filter flags. */ #define IPCF_MATCH_M_SOURCE 0x1 diff --git a/minix/include/minix/sef.h b/minix/include/minix/sef.h index 061502e1e..3fa3375a1 100644 --- a/minix/include/minix/sef.h +++ b/minix/include/minix/sef.h @@ -337,7 +337,7 @@ void sef_setcb_gcov(sef_cb_gcov_t cb); int sef_copy_state_region_ctl(sef_init_info_t *info, vir_bytes *src_address, vir_bytes *dst_address); int sef_copy_state_region(sef_init_info_t *info, - vir_bytes address, size_t size, vir_bytes dst_address); + vir_bytes address, size_t size, vir_bytes dst_address, int may_have_holes); int sef_st_state_transfer(sef_init_info_t *info); /* Callback prototypes to be passed to the State Transfer framwork. */ diff --git a/minix/lib/libsys/sef_init.c b/minix/lib/libsys/sef_init.c index 425727e6c..d64b4cdde 100644 --- a/minix/lib/libsys/sef_init.c +++ b/minix/lib/libsys/sef_init.c @@ -1,9 +1,12 @@ -#include "syslib.h" #include #include -#include #include +#include + +#include + +#include "syslib.h" /* SEF Init callbacks. */ static struct sef_init_cbs { sef_cb_init_t sef_cb_init_fresh; @@ -356,11 +359,11 @@ int sef_cb_init_identity_state_transfer(int type, sef_init_info_t *info) /* Transfer data. */ size = (size_t)(_brksize - data_start); - r = sef_copy_state_region(info, data_start, size, data_start); - if(r != OK) { - printf("sef_cb_init_identity_state_transfer: data transfer failed\n"); + + r = sef_copy_state_region(info, data_start, size, data_start, + TRUE /*may_have_holes*/); + if (r != OK) return r; - } new_brksize = _brksize; @@ -384,7 +387,7 @@ int sef_cb_init_identity_state_transfer(int type, sef_init_info_t *info) assert(_brksize == new_brksize); size = (size_t)(_brksize - old_brksize); r = sef_copy_state_region(info, (vir_bytes) old_brksize, size, - (vir_bytes) old_brksize); + (vir_bytes) old_brksize, FALSE /*may_have_holes*/); if(r != OK) { printf("sef_cb_init_identity_state_transfer: extended heap transfer failed\n"); return r; diff --git a/minix/lib/libsys/sef_st.c b/minix/lib/libsys/sef_st.c index 3189d9a22..4b57ef786 100644 --- a/minix/lib/libsys/sef_st.c +++ b/minix/lib/libsys/sef_st.c @@ -4,12 +4,15 @@ #include #include #include +#include #include "kernel/config.h" #include "kernel/const.h" #include "kernel/type.h" #include "kernel/proc.h" +EXTERN endpoint_t sef_self_endpoint; + /* SEF Live update prototypes for sef_receive(). */ void do_sef_st_before_receive(void); @@ -43,9 +46,14 @@ int sef_copy_state_region_ctl(sef_init_info_t *info, vir_bytes *src_address, vir * sef_copy_state_region * *===========================================================================*/ int sef_copy_state_region(sef_init_info_t *info, - vir_bytes address, size_t size, vir_bytes dst_address) + vir_bytes address, size_t size, vir_bytes dst_address, int may_have_holes) { + vir_bytes base, top, target; + struct vm_region_info vri; int r; + + base = address; + if(sef_copy_state_region_ctl(info, &address, &dst_address)) { #if STATE_TRANS_DEBUG printf("sef_copy_state_region: memcpy %d bytes, addr = 0x%08x -> 0x%08x...\n", @@ -53,19 +61,80 @@ int sef_copy_state_region(sef_init_info_t *info, #endif /* memcpy region from current state */ memcpy((void*) dst_address, (void *)address, size); + } else if (may_have_holes && sef_self_endpoint != VM_PROC_NR && + vm_info_region(info->old_endpoint, &vri, 1, &base) == 1) { + /* Perform a safe copy of a region of the old state. The section may + * contain holes, so ask VM for the actual regions within the data + * section and transfer each one separately. The alternative, just + * copying until a page fault happens, is not possible in the multi- + * component-with-VM live update case, where VM may not receive page + * faults during the live update window. For now, we use the region + * iteration approach for the data section only; other cases have not + * been tested, but may work as well. + */ +#if STATE_TRANS_DEBUG + printf("sef_copy_state_region: copying %d bytes, addr = 0x%08x -> " + "0x%08x, gid = %d, source = %d, with holes...\n", size, address, + dst_address, SEF_STATE_TRANSFER_GID, info->old_endpoint); +#endif + + /* The following is somewhat of a hack: the start of the data section + * may in fact not be page-aligned and may be part of the last page of + * of the preceding (text) section. Therefore, if the first region we + * find starts above the known base address, blindly copy the area in + * between. + */ + if (vri.vri_addr > address) { + if ((r = sys_safecopyfrom(info->old_endpoint, SEF_STATE_TRANSFER_GID, + address, dst_address, vri.vri_addr - address)) != OK) { +#if STATE_TRANS_DEBUG + printf("sef_copy_state_region: sys_safecopyfrom failed\n"); +#endif + return r; + } + } + + top = address + size; + do { + assert(vri.vri_addr >= address); + if (vri.vri_addr >= top) + break; + if (vri.vri_length > top - vri.vri_addr) + vri.vri_length = top - vri.vri_addr; + target = dst_address + (vri.vri_addr - address); + if ((r = sys_safecopyfrom(info->old_endpoint, + SEF_STATE_TRANSFER_GID, vri.vri_addr, target, + vri.vri_length)) != OK) { +#if STATE_TRANS_DEBUG + printf("sef_copy_state_region: sys_safecopyfrom failed\n"); +#endif + return r; + } + /* Save on a VM call if the next address is already too high. */ + if (base >= top) + break; + } while (vm_info_region(info->old_endpoint, &vri, 1, &base) == 1); } else { + /* Perform a safe copy of a region of the old state, without taking into + * account any holes. This is the default for anything but the data + * section, with a few additioanl exceptions: VM can't query VM, so + * simply assume there are no holes; also, if we fail to get one region + * for the old process (and this is presumably possible if its heap is + * so small it fits in the last text page, see above), we also just + * blindly copy over the entire data section. + */ #if STATE_TRANS_DEBUG - printf("sef_copy_state_region: copying %d bytes, addr = 0x%08x -> 0x%08x, gid = %d, source = %d...\n", - size, address, dst_address, SEF_STATE_TRANSFER_GID, info->old_endpoint); + printf("sef_copy_state_region: copying %d bytes, addr = 0x%08x -> " + "0x%08x, gid = %d, source = %d, without holes...\n", size, address, + dst_address, SEF_STATE_TRANSFER_GID, info->old_endpoint); #endif - /* Perform a safe copy of a region of the old state. */ - if((r = sys_safecopyfrom(info->old_endpoint, SEF_STATE_TRANSFER_GID, address, - dst_address, size)) != OK) { + if ((r = sys_safecopyfrom(info->old_endpoint, SEF_STATE_TRANSFER_GID, + address, dst_address, size)) != OK) { #if STATE_TRANS_DEBUG printf("sef_copy_state_region: sys_safecopyfrom failed\n"); #endif return r; - } + } } return OK; @@ -85,7 +154,7 @@ int sef_copy_state_region(sef_init_info_t *info, } if (sef_copy_state_region(info, old_priv.s_state_table - , sef_llvm_state_table_size(), (vir_bytes) addr)) + , sef_llvm_state_table_size(), (vir_bytes) addr, FALSE /*may_have_holes*/)) { printf("ERROR. state table transfer failed\n"); return EGENERIC; @@ -111,7 +180,8 @@ int sef_copy_state_region_opaque(void *info_opaque, uint32_t address, { assert(info_opaque != NULL && "Invalid info_opaque pointer."); return sef_copy_state_region((sef_init_info_t *)(info_opaque), - (vir_bytes) address, size, (vir_bytes) dst_address); + (vir_bytes) address, size, (vir_bytes) dst_address, + FALSE /*may_have_holes*/); } /*===========================================================================* diff --git a/minix/servers/vm/main.c b/minix/servers/vm/main.c index 0ccd6159e..5fb5fb6a2 100644 --- a/minix/servers/vm/main.c +++ b/minix/servers/vm/main.c @@ -601,7 +601,13 @@ static int sef_cb_init_vm_multi_lu(int type, sef_init_info_t *info) m.m_source = VM_PROC_NR; for(i=0;i < NR_SYS_PROCS;i++) { if(rprocpub[i].in_use && rprocpub[i].old_endpoint != NONE) { - if(num_elements <= IPCF_MAX_ELEMENTS-3) { + if(num_elements <= IPCF_MAX_ELEMENTS-5) { + /* VM_BRK is needed for normal operation during the live + * update. VM_INFO is needed for state transfer in the + * light of holes. Pagefaults and handle-memory requests + * are blocked intentionally, as handling these would + * prevent VM from being able to roll back. + */ ipc_filter[num_elements].flags = IPCF_MATCH_M_SOURCE; ipc_filter[num_elements].m_source = rprocpub[i].old_endpoint; if(!(info->flags & SEF_LU_UNSAFE)) { @@ -616,6 +622,14 @@ static int sef_cb_init_vm_multi_lu(int type, sef_init_info_t *info) ipc_filter[num_elements].m_type = VM_BRK; } num_elements++; + if(!(info->flags & SEF_LU_UNSAFE)) { + ipc_filter[num_elements].flags = IPCF_MATCH_M_SOURCE | IPCF_MATCH_M_TYPE; + ipc_filter[num_elements].m_source = rprocpub[i].old_endpoint; + ipc_filter[num_elements++].m_type = VM_INFO; + ipc_filter[num_elements].flags = IPCF_MATCH_M_SOURCE | IPCF_MATCH_M_TYPE; + ipc_filter[num_elements].m_source = rprocpub[i].new_endpoint; + ipc_filter[num_elements++].m_type = VM_INFO; + } /* Make sure we can talk to any RS instance. */ if(rprocpub[i].old_endpoint == RS_PROC_NR) { ipc_filter[num_elements].flags = IPCF_MATCH_M_SOURCE;