# define VM_RS_SRC_ENDPT m1_i1
# define VM_RS_DST_ENDPT m1_i2
+#define VM_RS_MEMCTL (VM_RQ_BASE+42)
+# define VM_RS_CTL_ENDPT m1_i1
+# define VM_RS_CTL_REQ m1_i2
+# define VM_RS_MEM_PIN 0 /* pin memory */
+
/* Total. */
-#define NR_VM_CALLS 42
+#define NR_VM_CALLS 43
#define VM_CALL_MASK_SIZE BITMAP_CHUNKS(NR_VM_CALLS)
/* not handled as a normal VM call, thus at the end of the reserved rage */
_PROTOTYPE( int vm_unmap_phys, (endpoint_t who, void *vaddr, size_t len));
_PROTOTYPE( int vm_notify_sig, (endpoint_t ep, endpoint_t ipc_ep));
-_PROTOTYPE( int vm_ctl, (int what, int param));
_PROTOTYPE( int vm_set_priv, (int procnr, void *buf));
_PROTOTYPE( int vm_update, (endpoint_t src_e, endpoint_t dst_e));
+_PROTOTYPE( int vm_memctl, (endpoint_t ep, int req));
_PROTOTYPE( int vm_query_exit, (int *endpt));
_PROTOTYPE( int vm_forgetblock, (u64_t id));
_PROTOTYPE( void vm_forgetblocks, (void));
#include "glo.h" /* global variables */
#include "ipc.h" /* IPC constants */
#include "profile.h" /* system profiling */
+#include "perf.h" /* performance-related definitions */
#include "debug.h" /* debugging, MUST be last kernel header */
#endif /* __ASSEMBLY__ */
#define MEM_TOP 0xFFFFFFFFUL
-#define USE_COW_SAFECOPY 0
-
FORWARD _PROTOTYPE(int safecopy, (struct proc *, endpoint_t, endpoint_t,
cp_grant_id_t, int, int, size_t, vir_bytes, vir_bytes, int));
endpoint_t new_granter, *src, *dst;
struct proc *granter_p;
int r;
-#if USE_COW_SAFECOPY
+#if PERF_USE_COW_SAFECOPY
vir_bytes size;
#endif
}
/* Do the regular copy. */
-#if USE_COW_SAFECOPY
+#if PERF_USE_COW_SAFECOPY
if(v_offset % CLICK_SIZE != addr % CLICK_SIZE || bytes < CLICK_SIZE) {
/* Give up on COW immediately when offsets are not aligned
* or we are copying less than a page.
PUBLIC char *t_stack[TOT_STACK_SPACE / sizeof(char *)];
/* Define boot process flags. */
-#define BVM_F (PROC_FULLVM) /* boot processes with VM */
+#define BVM_F (PROC_FULLVM) /* boot processes with VM */
+#define OVM_F (PERF_SYS_CORE_FULLVM ? PROC_FULLVM : 0) /* critical boot
+ * processes with
+ * optional VM.
+ */
/* The system image table lists all programs that are part of the boot image.
* The order of the entries here MUST agree with the order of the programs
{DS_PROC_NR, BVM_F, 50, 4, 0, "ds" },
{RS_PROC_NR, 0, 50, 4, 0, "rs" },
-{PM_PROC_NR, 0,500, 4, 0, "pm" },
-{SCHED_PROC_NR, 0,500, 4, 0, "sched" },
-{VFS_PROC_NR, 0,500, 5, 0, "vfs" },
+{PM_PROC_NR, OVM_F,500, 4, 0, "pm" },
+{SCHED_PROC_NR,OVM_F,500, 4, 0, "sched" },
+{VFS_PROC_NR, OVM_F,500, 5, 0, "vfs" },
{MEM_PROC_NR, BVM_F, 50, 3, 0, "memory"},
{LOG_PROC_NR, BVM_F, 50, 2, 0, "log" },
{TTY_PROC_NR, BVM_F, 50, 1, 0, "tty" },
_svrctl.c \
_sysuname.c \
_vm_dmacalls.c \
+ _vm_memctl.c \
_vm_set_priv.c \
_vm_update.c \
_vm_query_exit.c \
munmap.S \
vm_getphys.S \
vm_getrefcount.S \
+ vm_memctl.S \
vm_remap.S \
vm_unmap.S \
vm_set_priv.S \
int s,i,j;
int nr_image_srvs, nr_image_priv_srvs, nr_uncaught_init_srvs;
struct rproc *rp;
+ struct rproc *replica_rp;
struct rprocpub *rpub;
struct boot_image image[NR_BOOT_PROCS];
struct mproc mproc[NR_PROCS];
struct boot_image_priv *boot_image_priv;
struct boot_image_sys *boot_image_sys;
struct boot_image_dev *boot_image_dev;
+ message m;
+ int pid, replica_pid;
+ endpoint_t replica_endpoint;
/* See if we run in verbose mode. */
env_parse("rs_verbose", "d", 0, &rs_verbose, 0, 1);
/* Get heartbeat period. */
rpub->period = boot_image_priv->period;
- if(boot_image_priv->endpoint != RS_PROC_NR) {
- /* Force a static priv id for system services in the boot image. */
- rp->r_priv.s_id = static_priv_id(
- _ENDPOINT_P(boot_image_priv->endpoint));
-
- /* Initialize privilege bitmaps and signal manager. */
- rp->r_priv.s_flags = boot_image_priv->flags; /* priv flags */
- rp->r_priv.s_trap_mask = boot_image_priv->trap_mask; /* traps */
- memcpy(&rp->r_priv.s_ipc_to, &boot_image_priv->ipc_to,
- sizeof(rp->r_priv.s_ipc_to)); /* targets */
- rp->r_priv.s_sig_mgr = boot_image_priv->sig_mgr; /* sig mgr */
-
- /* Initialize kernel call mask bitmap from unordered set. */
- fill_call_mask(boot_image_priv->k_calls, NR_SYS_CALLS,
- rp->r_priv.s_k_call_mask, KERNEL_CALL, TRUE);
+ /* Force a static priv id for system services in the boot image. */
+ rp->r_priv.s_id = static_priv_id(
+ _ENDPOINT_P(boot_image_priv->endpoint));
+
+ /* Initialize privilege bitmaps and signal manager. */
+ rp->r_priv.s_flags = boot_image_priv->flags; /* priv flags */
+ rp->r_priv.s_trap_mask = boot_image_priv->trap_mask; /* traps */
+ memcpy(&rp->r_priv.s_ipc_to, &boot_image_priv->ipc_to,
+ sizeof(rp->r_priv.s_ipc_to)); /* targets */
+ rp->r_priv.s_sig_mgr = boot_image_priv->sig_mgr; /* sig mgr */
+
+ /* Initialize kernel call mask bitmap from unordered set. */
+ fill_call_mask(boot_image_priv->k_calls, NR_SYS_CALLS,
+ rp->r_priv.s_k_call_mask, KERNEL_CALL, TRUE);
/* Set the privilege structure. */
+ if(boot_image_priv->endpoint != RS_PROC_NR) {
if ((s = sys_privctl(ip->endpoint, SYS_PRIV_SET_SYS, &(rp->r_priv)))
!= OK) {
panic("unable to set privilege structure: %d", s);
if (OK != (s=sys_setalarm(RS_DELTA_T, 0)))
panic("couldn't set alarm: %d", s);
- /* Map out our own text and data. This is normally done in crtso.o
- * but RS is an exception - we don't get to talk to VM so early on.
- * That's why we override munmap() and munmap_text() in utility.c.
- *
- * _minix_unmapzero() is the same code in crtso.o that normally does
- * it on startup. It's best that it's there as crtso.o knows exactly
- * what the ranges are of the filler data.
- */
- unmap_ok = 1;
- _minix_unmapzero();
+ /* Now create a new RS instance with a private page table and let the current
+ * instance live update into the replica. Clone RS' own slot first.
+ */
+ rp = rproc_ptr[_ENDPOINT_P(RS_PROC_NR)];
+ if((s = clone_slot(rp, &replica_rp)) != OK) {
+ panic("unable to clone current RS instance: %d", s);
+ }
+
+ /* Fork a new RS instance. */
+ pid = srv_fork();
+ if(pid == -1) {
+ panic("unable to fork a new RS instance");
+ }
+ replica_pid = pid ? pid : getpid();
+ replica_endpoint = getnprocnr(replica_pid);
+ replica_rp->r_pid = replica_pid;
+ replica_rp->r_pub->endpoint = replica_endpoint;
+
+ if(pid == 0) {
+ /* New RS instance running. */
+
+ /* Synchronize with the old instance. */
+ s = sef_receive(RS_PROC_NR, &m);
+ if(s != OK) {
+ panic("sef_receive failed: %d", s);
+ }
+
+ /* Live update the old instance into the new one. */
+ s = update_service(&rp, &replica_rp);
+ if(s != OK) {
+ panic("unable to live update RS: %d", s);
+ }
+ cpf_reload();
+
+ /* Clean up the old RS instance, the new instance will take over. */
+ cleanup_service(rp);
+
+ /* Map out our own text and data. */
+ unmap_ok = 1;
+ _minix_unmapzero();
+ }
+ else {
+ /* Old RS instance running. */
+
+ /* Ask VM to pin memory for the new RS instance. */
+ s = vm_memctl(replica_endpoint, VM_RS_MEM_PIN);
+ if(s != OK) {
+ panic("unable to pin memory for the new RS instance: %d", s);
+ }
+
+ /* Set up privileges for the new instance and let it run. */
+ set_sys_bit(replica_rp->r_priv.s_ipc_to, static_priv_id(RS_PROC_NR));
+ s = sys_privctl(replica_endpoint, SYS_PRIV_SET_SYS, &(replica_rp->r_priv));
+ if(s != OK) {
+ panic("unable to set privileges for the new RS instance: %d", s);
+ }
+ s = sys_privctl(replica_endpoint, SYS_PRIV_ALLOW, NULL);
+ if(s != OK) {
+ panic("unable to let the new RS instance run: %d", s);
+ }
+
+ /* Synchronize with the new instance and go to sleep. */
+ m.m_type = RS_INIT;
+ s = sendrec(replica_endpoint, &m);
+ if(s != OK) {
+ panic("sendrec failed: %d", s);
+ }
+ /* Not reachable */
+ }
return(OK);
}
rpub = rp->r_pub;
clone_rpub = clone_rp->r_pub;
+ /* Synch the privilege structure of the source with the kernel. */
+ if ((r = sys_getpriv(&(rp->r_priv), rpub->endpoint)) != OK) {
+ panic("unable to synch privilege structure: %d", r);
+ }
+
/* Shallow copy. */
*clone_rp = *rp;
*clone_rpub = *rpub;
VM_NOTIFY_SIG, SYS_NULL_C },
sched_vmc[] ={ VM_BASIC_CALLS, SYS_NULL_C },
vfs_vmc[] = { VM_BASIC_CALLS, SYS_NULL_C },
- rs_vmc[] = { VM_BASIC_CALLS, VM_RS_SET_PRIV, VM_RS_UPDATE, SYS_NULL_C },
+ rs_vmc[] = { VM_BASIC_CALLS, VM_RS_SET_PRIV, VM_RS_UPDATE, VM_RS_MEMCTL,
+ SYS_NULL_C },
ds_vmc[] = { VM_BASIC_CALLS, SYS_NULL_C },
vm_vmc[] = { SYS_NULL_C },
tty_vmc[] = { VM_BASIC_CALLS, SYS_NULL_C },
if(vmp->vm_flags & VMF_HAS_DMA) {
release_dma(vmp);
- } else {
- assert(vmp->vm_flags & VMF_HASPT);
+ } else if(vmp->vm_flags & VMF_HASPT) {
/* Free pagetable and pages allocated by pt code. */
SANITYCHECK(SCL_DETAIL);
free_proc(vmp);
SANITYCHECK(SCL_DETAIL);
+ } else {
+ /* Free the data and stack segments. */
+ free_mem(vmp->vm_arch.vm_seg[D].mem_phys,
+ vmp->vm_arch.vm_seg[S].mem_vir +
+ vmp->vm_arch.vm_seg[S].mem_len -
+ vmp->vm_arch.vm_seg[D].mem_vir);
}
SANITYCHECK(SCL_DETAIL);
/* Calls from RS */
CALLMAP(VM_RS_SET_PRIV, do_rs_set_priv);
CALLMAP(VM_RS_UPDATE, do_rs_update);
+ CALLMAP(VM_RS_MEMCTL, do_rs_memctl);
/* Generic calls. */
CALLMAP(VM_REMAP, do_remap);
_PROTOTYPE(struct vir_region *map_lookup,(struct vmproc *vmp, vir_bytes addr));
_PROTOTYPE(int map_pf,(struct vmproc *vmp,
struct vir_region *region, vir_bytes offset, int write));
+_PROTOTYPE(int map_pin_memory,(struct vmproc *vmp));
_PROTOTYPE(int map_handle_memory,(struct vmproc *vmp,
struct vir_region *region, vir_bytes offset, vir_bytes len, int write));
_PROTOTYPE(void map_printmap, (struct vmproc *vmp));
/* rs.c */
_PROTOTYPE(int do_rs_set_priv, (message *m));
_PROTOTYPE(int do_rs_update, (message *m));
+_PROTOTYPE(int do_rs_memctl, (message *m));
/* queryexit.c */
_PROTOTYPE(int do_query_exit, (message *m));
return r;
}
+/*===========================================================================*
+ * map_pin_memory *
+ *===========================================================================*/
+PUBLIC int map_pin_memory(struct vmproc *vmp)
+{
+ struct vir_region *vr;
+ int offset, r;
+
+ /* Scan all memory regions. */
+ for(vr = vmp->vm_regions; vr; vr = vr->next) {
+ vir_bytes offset;
+ /* Skip regions that can't pagefault. */
+ if((vr->flags & VR_NOPF) || (vr->flags & VR_SHARED)) {
+ continue;
+ }
+ /* Map other regions. */
+ for(offset=0;offset<vr->length;offset += VM_PAGE_SIZE) {
+ if((r=map_pf(vmp, vr, offset, 1 /* write */))
+ != OK) {
+ printf("VM: map_pf failed\n");
+ return r;
+ }
+ }
+ }
+
+ return OK;
+}
+
/*===========================================================================*
* map_handle_memory *
*===========================================================================*/
nr = m->VM_RS_NR;
if ((r = vm_isokendpt(nr, &n)) != OK) {
- printf("do_rs_set_priv: message from strange source %d\n", nr);
+ printf("do_rs_set_priv: bad endpoint %d\n", nr);
return EINVAL;
}
*===========================================================================*/
PUBLIC int do_rs_update(message *m_ptr)
{
- endpoint_t src_e, dst_e;
+ endpoint_t src_e, dst_e, reply_e;
int r;
src_e = m_ptr->VM_RS_SRC_ENDPT;
/* Do the update in VM now. */
r = swap_proc(src_e, dst_e);
+ if(r != OK) {
+ return r;
+ }
+
+ /* Reply, update-aware. */
+ reply_e = m_ptr->m_source;
+ if(reply_e == src_e) reply_e = dst_e;
+ if(reply_e == dst_e) reply_e = src_e;
+ m_ptr->m_type = OK;
+ r = send(reply_e, m_ptr);
+ if(r != OK) {
+ panic("send() error");
+ }
- return r;
+ return SUSPEND;
+}
+
+/*===========================================================================*
+ * do_rs_memctl *
+ *===========================================================================*/
+PUBLIC int do_rs_memctl(message *m_ptr)
+{
+ endpoint_t ep;
+ int req, r, proc_nr;
+ struct vmproc *vmp;
+
+ ep = m_ptr->VM_RS_CTL_ENDPT;
+ req = m_ptr->VM_RS_CTL_REQ;
+
+ /* Lookup endpoint. */
+ if ((r = vm_isokendpt(ep, &proc_nr)) != OK) {
+ printf("do_rs_memctl: bad endpoint %d\n", ep);
+ return EINVAL;
+ }
+ vmp = &vmproc[proc_nr];
+
+ /* Process request. */
+ switch(req)
+ {
+ case VM_RS_MEM_PIN:
+ r = map_pin_memory(vmp);
+ return r;
+
+ default:
+ printf("do_rs_memctl: bad request %d\n", req);
+ return EINVAL;
+ }
}
}
/* Adjust page tables. */
- assert(src_vmp->vm_flags & VMF_HASPT);
- assert(dst_vmp->vm_flags & VMF_HASPT);
- pt_bind(&src_vmp->vm_pt, src_vmp);
- pt_bind(&dst_vmp->vm_pt, dst_vmp);
+ if(src_vmp->vm_flags & VMF_HASPT)
+ pt_bind(&src_vmp->vm_pt, src_vmp);
+ if(dst_vmp->vm_flags & VMF_HASPT)
+ pt_bind(&dst_vmp->vm_pt, dst_vmp);
if((r=sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) {
panic("swap_proc: VMCTL_FLUSHTLB failed: %d", r);
}