]> Zhao Yanbai Git Server - minix.git/commitdiff
SEF: query VM about holes during state transfer 23/3123/1
authorDavid van Moolenbroek <david@minix3.org>
Sat, 18 Jul 2015 11:04:34 +0000 (13:04 +0200)
committerDavid van Moolenbroek <david@minix3.org>
Thu, 17 Sep 2015 13:43:06 +0000 (13:43 +0000)
The 'memory' service has holes in its data section, which causes
problems during state transfer.  Since VM cannot handle page faults
during a multicomponent-with-VM live update, the state transfer must
ensure that no page faults occur during copying.  Therefore, we now
query VM about the regions to copy, thus skipping holes.  While the
solution is not ideal, it is sufficiently generic that it can be used
for the data section state transfer of all processes, and possibly
for state transfer of other regions in the future as well.

Change-Id: I2a71383a18643ebd36956c396fbd22c8fd137202

minix/drivers/storage/memory/memory.c
minix/include/minix/ipc_filter.h
minix/include/minix/sef.h
minix/lib/libsys/sef_init.c
minix/lib/libsys/sef_st.c
minix/servers/vm/main.c

index 5c4c7f13824a88d3ce762817e83a5d52c905f298..2dc6b7f5149d5c7208499e50c03cf90c8dc9c278 100644 (file)
@@ -114,8 +114,8 @@ static void sef_local_startup()
 {
   /* Register init callbacks. */
   sef_setcb_init_fresh(sef_cb_init_fresh);
-  sef_setcb_init_lu(sef_cb_init_fresh);
-  sef_setcb_init_restart(sef_cb_init_fresh);
+  sef_setcb_init_lu(SEF_CB_INIT_LU_DEFAULT);
+  sef_setcb_init_restart(SEF_CB_INIT_RESTART_STATEFUL);
 
   /* Let SEF perform startup. */
   sef_startup();
index 62d787a2d0d2c4e1313ea123ff3c2ce3ed79cc11..ecac829bb30a49383650bc88664a74cef95732d3 100644 (file)
@@ -12,7 +12,7 @@
 #define ANY_TSK                _ENDPOINT(3, _ENDPOINT_P(ANY))
 
 /* IPC filter constants. */
-#define IPCF_MAX_ELEMENTS       NR_SYS_PROCS
+#define IPCF_MAX_ELEMENTS       (NR_SYS_PROCS * 2)
 
 /* IPC filter flags. */
 #define IPCF_MATCH_M_SOURCE    0x1
index 061502e1ecc6d143bf8766d59209768eb2bda927..3fa3375a131086b27719f5463eefe9ef74b18aff 100644 (file)
@@ -337,7 +337,7 @@ void sef_setcb_gcov(sef_cb_gcov_t cb);
 int sef_copy_state_region_ctl(sef_init_info_t *info,
     vir_bytes *src_address, vir_bytes *dst_address);
 int sef_copy_state_region(sef_init_info_t *info,
-    vir_bytes address, size_t size, vir_bytes dst_address);
+    vir_bytes address, size_t size, vir_bytes dst_address, int may_have_holes);
 int sef_st_state_transfer(sef_init_info_t *info);
 
 /* Callback prototypes to be passed to the State Transfer framwork. */
index 425727e6ce0b167e5d932502842269cf1a083e4d..d64b4cdde9efbf3b83ea4dfda6ed0ac2c059b147 100644 (file)
@@ -1,9 +1,12 @@
-#include "syslib.h"
 #include <assert.h>
 #include <unistd.h>
-#include <minix/sysutil.h>
 #include <string.h>
 
+#include <machine/vmparam.h>
+
+#include <minix/sysutil.h>
+
+#include "syslib.h"
 /* SEF Init callbacks. */
 static struct sef_init_cbs {
     sef_cb_init_t                       sef_cb_init_fresh;
@@ -356,11 +359,11 @@ int sef_cb_init_identity_state_transfer(int type, sef_init_info_t *info)
 
   /* Transfer data. */
   size = (size_t)(_brksize - data_start);
-  r = sef_copy_state_region(info, data_start, size, data_start);
-  if(r != OK) {
-      printf("sef_cb_init_identity_state_transfer: data transfer failed\n");
+
+  r = sef_copy_state_region(info, data_start, size, data_start,
+    TRUE /*may_have_holes*/);
+  if (r != OK)
       return r;
-  }
 
   new_brksize = _brksize;
 
@@ -384,7 +387,7 @@ int sef_cb_init_identity_state_transfer(int type, sef_init_info_t *info)
       assert(_brksize == new_brksize);
       size = (size_t)(_brksize - old_brksize);
       r = sef_copy_state_region(info, (vir_bytes) old_brksize, size,
-          (vir_bytes) old_brksize);
+          (vir_bytes) old_brksize, FALSE /*may_have_holes*/);
       if(r != OK) {
           printf("sef_cb_init_identity_state_transfer: extended heap transfer failed\n");
           return r;
index 3189d9a22ab2b31ed64d13a124f49419cd9161af..4b57ef786f0530dad006b1ab0805e25b9f551833 100644 (file)
@@ -4,12 +4,15 @@
 #include <machine/archtypes.h>
 #include <minix/timers.h>
 #include <minix/sysutil.h>
+#include <minix/vm.h>
 
 #include "kernel/config.h"
 #include "kernel/const.h"
 #include "kernel/type.h"
 #include "kernel/proc.h"
 
+EXTERN endpoint_t sef_self_endpoint;
+
 /* SEF Live update prototypes for sef_receive(). */
 void do_sef_st_before_receive(void);
 
@@ -43,9 +46,14 @@ int sef_copy_state_region_ctl(sef_init_info_t *info, vir_bytes *src_address, vir
  *                          sef_copy_state_region                           *
  *===========================================================================*/
 int sef_copy_state_region(sef_init_info_t *info,
-    vir_bytes address, size_t size, vir_bytes dst_address)
+    vir_bytes address, size_t size, vir_bytes dst_address, int may_have_holes)
 {
+  vir_bytes base, top, target;
+  struct vm_region_info vri;
   int r;
+
+  base = address;
+
   if(sef_copy_state_region_ctl(info, &address, &dst_address)) {
 #if STATE_TRANS_DEBUG
       printf("sef_copy_state_region: memcpy %d bytes, addr = 0x%08x -> 0x%08x...\n",
@@ -53,19 +61,80 @@ int sef_copy_state_region(sef_init_info_t *info,
 #endif
       /* memcpy region from current state */
       memcpy((void*) dst_address, (void *)address, size);
+  } else if (may_have_holes && sef_self_endpoint != VM_PROC_NR &&
+    vm_info_region(info->old_endpoint, &vri, 1, &base) == 1) {
+      /* Perform a safe copy of a region of the old state.  The section may
+       * contain holes, so ask VM for the actual regions within the data
+       * section and transfer each one separately.  The alternative, just
+       * copying until a page fault happens, is not possible in the multi-
+       * component-with-VM live update case, where VM may not receive page
+       * faults during the live update window.  For now, we use the region
+       * iteration approach for the data section only; other cases have not
+       * been tested, but may work as well.
+       */
+#if STATE_TRANS_DEBUG
+      printf("sef_copy_state_region: copying %d bytes, addr = 0x%08x -> "
+        "0x%08x, gid = %d, source = %d, with holes...\n", size, address,
+        dst_address, SEF_STATE_TRANSFER_GID, info->old_endpoint);
+#endif
+
+      /* The following is somewhat of a hack: the start of the data section
+       * may in fact not be page-aligned and may be part of the last page of
+       * of the preceding (text) section.  Therefore, if the first region we
+       * find starts above the known base address, blindly copy the area in
+       * between.
+       */
+      if (vri.vri_addr > address) {
+          if ((r = sys_safecopyfrom(info->old_endpoint, SEF_STATE_TRANSFER_GID,
+            address, dst_address, vri.vri_addr - address)) != OK) {
+#if STATE_TRANS_DEBUG
+              printf("sef_copy_state_region: sys_safecopyfrom failed\n");
+#endif
+              return r;
+          }
+      }
+
+      top = address + size;
+      do {
+          assert(vri.vri_addr >= address);
+          if (vri.vri_addr >= top)
+              break;
+          if (vri.vri_length > top - vri.vri_addr)
+              vri.vri_length = top - vri.vri_addr;
+          target = dst_address + (vri.vri_addr - address);
+          if ((r = sys_safecopyfrom(info->old_endpoint,
+            SEF_STATE_TRANSFER_GID, vri.vri_addr, target,
+            vri.vri_length)) != OK) {
+#if STATE_TRANS_DEBUG
+              printf("sef_copy_state_region: sys_safecopyfrom failed\n");
+#endif
+              return r;
+          }
+          /* Save on a VM call if the next address is already too high. */
+          if (base >= top)
+              break;
+      } while (vm_info_region(info->old_endpoint, &vri, 1, &base) == 1);
   } else {
+      /* Perform a safe copy of a region of the old state, without taking into
+       * account any holes.  This is the default for anything but the data
+       * section, with a few additioanl exceptions:  VM can't query VM, so
+       * simply assume there are no holes;  also, if we fail to get one region
+       * for the old process (and this is presumably possible if its heap is
+       * so small it fits in the last text page, see above), we also just
+       * blindly copy over the entire data section.
+       */
 #if STATE_TRANS_DEBUG
-      printf("sef_copy_state_region: copying %d bytes, addr = 0x%08x -> 0x%08x, gid = %d, source = %d...\n",
-              size, address, dst_address, SEF_STATE_TRANSFER_GID, info->old_endpoint);
+      printf("sef_copy_state_region: copying %d bytes, addr = 0x%08x -> "
+        "0x%08x, gid = %d, source = %d, without holes...\n", size, address,
+        dst_address, SEF_STATE_TRANSFER_GID, info->old_endpoint);
 #endif
-      /* Perform a safe copy of a region of the old state. */
-      if((r = sys_safecopyfrom(info->old_endpoint, SEF_STATE_TRANSFER_GID, address,
-        dst_address, size)) != OK) {
+      if ((r = sys_safecopyfrom(info->old_endpoint, SEF_STATE_TRANSFER_GID,
+        address, dst_address, size)) != OK) {
 #if STATE_TRANS_DEBUG
           printf("sef_copy_state_region: sys_safecopyfrom failed\n");
 #endif
           return r;
-    }
+      }
   }
 
   return OK;
@@ -85,7 +154,7 @@ int sef_copy_state_region(sef_init_info_t *info,
   }
 
   if (sef_copy_state_region(info, old_priv.s_state_table
-    , sef_llvm_state_table_size(), (vir_bytes) addr))
+    , sef_llvm_state_table_size(), (vir_bytes) addr, FALSE /*may_have_holes*/))
   {
       printf("ERROR. state table transfer failed\n");
       return EGENERIC;
@@ -111,7 +180,8 @@ int sef_copy_state_region_opaque(void *info_opaque, uint32_t address,
 {
   assert(info_opaque != NULL && "Invalid info_opaque pointer.");
   return sef_copy_state_region((sef_init_info_t *)(info_opaque),
-      (vir_bytes) address, size, (vir_bytes) dst_address);
+      (vir_bytes) address, size, (vir_bytes) dst_address,
+      FALSE /*may_have_holes*/);
 }
 
 /*===========================================================================*
index 0ccd6159e6a6dcd4496e6f64ee21b40038ea8353..5fb5fb6a2b8a1e03a9774982fef4cf6d4576966c 100644 (file)
@@ -601,7 +601,13 @@ static int sef_cb_init_vm_multi_lu(int type, sef_init_info_t *info)
        m.m_source = VM_PROC_NR;
        for(i=0;i < NR_SYS_PROCS;i++) {
            if(rprocpub[i].in_use && rprocpub[i].old_endpoint != NONE) {
-               if(num_elements <= IPCF_MAX_ELEMENTS-3) {
+               if(num_elements <= IPCF_MAX_ELEMENTS-5) {
+                    /* VM_BRK is needed for normal operation during the live
+                     * update.  VM_INFO is needed for state transfer in the
+                     * light of holes.  Pagefaults and handle-memory requests
+                     * are blocked intentionally, as handling these would
+                     * prevent VM from being able to roll back.
+                     */
                    ipc_filter[num_elements].flags = IPCF_MATCH_M_SOURCE;
                    ipc_filter[num_elements].m_source = rprocpub[i].old_endpoint;
                    if(!(info->flags & SEF_LU_UNSAFE)) {
@@ -616,6 +622,14 @@ static int sef_cb_init_vm_multi_lu(int type, sef_init_info_t *info)
                        ipc_filter[num_elements].m_type = VM_BRK;
                    }
                    num_elements++;
+                   if(!(info->flags & SEF_LU_UNSAFE)) {
+                       ipc_filter[num_elements].flags = IPCF_MATCH_M_SOURCE | IPCF_MATCH_M_TYPE;
+                       ipc_filter[num_elements].m_source = rprocpub[i].old_endpoint;
+                       ipc_filter[num_elements++].m_type = VM_INFO;
+                       ipc_filter[num_elements].flags = IPCF_MATCH_M_SOURCE | IPCF_MATCH_M_TYPE;
+                       ipc_filter[num_elements].m_source = rprocpub[i].new_endpoint;
+                       ipc_filter[num_elements++].m_type = VM_INFO;
+                   }
                    /* Make sure we can talk to any RS instance. */
                    if(rprocpub[i].old_endpoint == RS_PROC_NR) {
                        ipc_filter[num_elements].flags = IPCF_MATCH_M_SOURCE;