From 06700d05d10ab99aad206cd7a21620359bdf8724 Mon Sep 17 00:00:00 2001 From: Cristiano Giuffrida Date: Mon, 28 Jun 2010 21:53:37 +0000 Subject: [PATCH] Give RS a page table. --- include/minix/com.h | 7 ++- include/minix/vm.h | 2 +- kernel/kernel.h | 1 + kernel/system/do_safecopy.c | 6 +- kernel/table.c | 12 ++-- lib/libc/other/Makefile.inc | 1 + lib/libc/syscall/Makefile.inc | 1 + servers/rs/main.c | 113 ++++++++++++++++++++++++++-------- servers/rs/manager.c | 5 ++ servers/rs/table.c | 3 +- servers/vm/exit.c | 9 ++- servers/vm/main.c | 1 + servers/vm/proto.h | 2 + servers/vm/region.c | 28 +++++++++ servers/vm/rs.c | 51 ++++++++++++++- servers/vm/utility.c | 8 +-- 16 files changed, 205 insertions(+), 45 deletions(-) diff --git a/include/minix/com.h b/include/minix/com.h index 4b3c1df34..b8efd12c1 100644 --- a/include/minix/com.h +++ b/include/minix/com.h @@ -1030,8 +1030,13 @@ # define VM_RS_SRC_ENDPT m1_i1 # define VM_RS_DST_ENDPT m1_i2 +#define VM_RS_MEMCTL (VM_RQ_BASE+42) +# define VM_RS_CTL_ENDPT m1_i1 +# define VM_RS_CTL_REQ m1_i2 +# define VM_RS_MEM_PIN 0 /* pin memory */ + /* Total. */ -#define NR_VM_CALLS 42 +#define NR_VM_CALLS 43 #define VM_CALL_MASK_SIZE BITMAP_CHUNKS(NR_VM_CALLS) /* not handled as a normal VM call, thus at the end of the reserved rage */ diff --git a/include/minix/vm.h b/include/minix/vm.h index c37839ba5..2a65ffa5c 100644 --- a/include/minix/vm.h +++ b/include/minix/vm.h @@ -23,9 +23,9 @@ _PROTOTYPE( void *vm_map_phys, (endpoint_t who, void *physaddr, size_t len)); _PROTOTYPE( int vm_unmap_phys, (endpoint_t who, void *vaddr, size_t len)); _PROTOTYPE( int vm_notify_sig, (endpoint_t ep, endpoint_t ipc_ep)); -_PROTOTYPE( int vm_ctl, (int what, int param)); _PROTOTYPE( int vm_set_priv, (int procnr, void *buf)); _PROTOTYPE( int vm_update, (endpoint_t src_e, endpoint_t dst_e)); +_PROTOTYPE( int vm_memctl, (endpoint_t ep, int req)); _PROTOTYPE( int vm_query_exit, (int *endpt)); _PROTOTYPE( int vm_forgetblock, (u64_t id)); _PROTOTYPE( void vm_forgetblocks, (void)); diff --git a/kernel/kernel.h b/kernel/kernel.h index 6652a27c7..968dbce7a 100644 --- a/kernel/kernel.h +++ b/kernel/kernel.h @@ -49,6 +49,7 @@ #include "glo.h" /* global variables */ #include "ipc.h" /* IPC constants */ #include "profile.h" /* system profiling */ +#include "perf.h" /* performance-related definitions */ #include "debug.h" /* debugging, MUST be last kernel header */ #endif /* __ASSEMBLY__ */ diff --git a/kernel/system/do_safecopy.c b/kernel/system/do_safecopy.c index 752bc1cc4..0a55ef4b5 100644 --- a/kernel/system/do_safecopy.c +++ b/kernel/system/do_safecopy.c @@ -23,8 +23,6 @@ #define MEM_TOP 0xFFFFFFFFUL -#define USE_COW_SAFECOPY 0 - FORWARD _PROTOTYPE(int safecopy, (struct proc *, endpoint_t, endpoint_t, cp_grant_id_t, int, int, size_t, vir_bytes, vir_bytes, int)); @@ -240,7 +238,7 @@ int access; /* CPF_READ for a copy from granter to grantee, CPF_WRITE endpoint_t new_granter, *src, *dst; struct proc *granter_p; int r; -#if USE_COW_SAFECOPY +#if PERF_USE_COW_SAFECOPY vir_bytes size; #endif @@ -290,7 +288,7 @@ int access; /* CPF_READ for a copy from granter to grantee, CPF_WRITE } /* Do the regular copy. */ -#if USE_COW_SAFECOPY +#if PERF_USE_COW_SAFECOPY if(v_offset % CLICK_SIZE != addr % CLICK_SIZE || bytes < CLICK_SIZE) { /* Give up on COW immediately when offsets are not aligned * or we are copying less than a page. diff --git a/kernel/table.c b/kernel/table.c index f1d19e9ca..708214dce 100644 --- a/kernel/table.c +++ b/kernel/table.c @@ -46,7 +46,11 @@ PUBLIC char *t_stack[TOT_STACK_SPACE / sizeof(char *)]; /* Define boot process flags. */ -#define BVM_F (PROC_FULLVM) /* boot processes with VM */ +#define BVM_F (PROC_FULLVM) /* boot processes with VM */ +#define OVM_F (PERF_SYS_CORE_FULLVM ? PROC_FULLVM : 0) /* critical boot + * processes with + * optional VM. + */ /* The system image table lists all programs that are part of the boot image. * The order of the entries here MUST agree with the order of the programs @@ -74,9 +78,9 @@ PUBLIC struct boot_image image[] = { {DS_PROC_NR, BVM_F, 50, 4, 0, "ds" }, {RS_PROC_NR, 0, 50, 4, 0, "rs" }, -{PM_PROC_NR, 0,500, 4, 0, "pm" }, -{SCHED_PROC_NR, 0,500, 4, 0, "sched" }, -{VFS_PROC_NR, 0,500, 5, 0, "vfs" }, +{PM_PROC_NR, OVM_F,500, 4, 0, "pm" }, +{SCHED_PROC_NR,OVM_F,500, 4, 0, "sched" }, +{VFS_PROC_NR, OVM_F,500, 5, 0, "vfs" }, {MEM_PROC_NR, BVM_F, 50, 3, 0, "memory"}, {LOG_PROC_NR, BVM_F, 50, 2, 0, "log" }, {TTY_PROC_NR, BVM_F, 50, 1, 0, "tty" }, diff --git a/lib/libc/other/Makefile.inc b/lib/libc/other/Makefile.inc index 835fc4244..7f96f8ee4 100644 --- a/lib/libc/other/Makefile.inc +++ b/lib/libc/other/Makefile.inc @@ -32,6 +32,7 @@ SRCS+= \ _svrctl.c \ _sysuname.c \ _vm_dmacalls.c \ + _vm_memctl.c \ _vm_set_priv.c \ _vm_update.c \ _vm_query_exit.c \ diff --git a/lib/libc/syscall/Makefile.inc b/lib/libc/syscall/Makefile.inc index 64a5ba652..f0d177fb0 100644 --- a/lib/libc/syscall/Makefile.inc +++ b/lib/libc/syscall/Makefile.inc @@ -73,6 +73,7 @@ SRCS+= \ munmap.S \ vm_getphys.S \ vm_getrefcount.S \ + vm_memctl.S \ vm_remap.S \ vm_unmap.S \ vm_set_priv.S \ diff --git a/servers/rs/main.c b/servers/rs/main.c index e862e6ffb..27723f003 100644 --- a/servers/rs/main.c +++ b/servers/rs/main.c @@ -159,6 +159,7 @@ PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info) int s,i,j; int nr_image_srvs, nr_image_priv_srvs, nr_uncaught_init_srvs; struct rproc *rp; + struct rproc *replica_rp; struct rprocpub *rpub; struct boot_image image[NR_BOOT_PROCS]; struct mproc mproc[NR_PROCS]; @@ -166,6 +167,9 @@ PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info) struct boot_image_priv *boot_image_priv; struct boot_image_sys *boot_image_sys; struct boot_image_dev *boot_image_dev; + message m; + int pid, replica_pid; + endpoint_t replica_endpoint; /* See if we run in verbose mode. */ env_parse("rs_verbose", "d", 0, &rs_verbose, 0, 1); @@ -293,23 +297,23 @@ PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info) /* Get heartbeat period. */ rpub->period = boot_image_priv->period; - if(boot_image_priv->endpoint != RS_PROC_NR) { - /* Force a static priv id for system services in the boot image. */ - rp->r_priv.s_id = static_priv_id( - _ENDPOINT_P(boot_image_priv->endpoint)); - - /* Initialize privilege bitmaps and signal manager. */ - rp->r_priv.s_flags = boot_image_priv->flags; /* priv flags */ - rp->r_priv.s_trap_mask = boot_image_priv->trap_mask; /* traps */ - memcpy(&rp->r_priv.s_ipc_to, &boot_image_priv->ipc_to, - sizeof(rp->r_priv.s_ipc_to)); /* targets */ - rp->r_priv.s_sig_mgr = boot_image_priv->sig_mgr; /* sig mgr */ - - /* Initialize kernel call mask bitmap from unordered set. */ - fill_call_mask(boot_image_priv->k_calls, NR_SYS_CALLS, - rp->r_priv.s_k_call_mask, KERNEL_CALL, TRUE); + /* Force a static priv id for system services in the boot image. */ + rp->r_priv.s_id = static_priv_id( + _ENDPOINT_P(boot_image_priv->endpoint)); + + /* Initialize privilege bitmaps and signal manager. */ + rp->r_priv.s_flags = boot_image_priv->flags; /* priv flags */ + rp->r_priv.s_trap_mask = boot_image_priv->trap_mask; /* traps */ + memcpy(&rp->r_priv.s_ipc_to, &boot_image_priv->ipc_to, + sizeof(rp->r_priv.s_ipc_to)); /* targets */ + rp->r_priv.s_sig_mgr = boot_image_priv->sig_mgr; /* sig mgr */ + + /* Initialize kernel call mask bitmap from unordered set. */ + fill_call_mask(boot_image_priv->k_calls, NR_SYS_CALLS, + rp->r_priv.s_k_call_mask, KERNEL_CALL, TRUE); /* Set the privilege structure. */ + if(boot_image_priv->endpoint != RS_PROC_NR) { if ((s = sys_privctl(ip->endpoint, SYS_PRIV_SET_SYS, &(rp->r_priv))) != OK) { panic("unable to set privilege structure: %d", s); @@ -474,16 +478,75 @@ PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info) if (OK != (s=sys_setalarm(RS_DELTA_T, 0))) panic("couldn't set alarm: %d", s); - /* Map out our own text and data. This is normally done in crtso.o - * but RS is an exception - we don't get to talk to VM so early on. - * That's why we override munmap() and munmap_text() in utility.c. - * - * _minix_unmapzero() is the same code in crtso.o that normally does - * it on startup. It's best that it's there as crtso.o knows exactly - * what the ranges are of the filler data. - */ - unmap_ok = 1; - _minix_unmapzero(); + /* Now create a new RS instance with a private page table and let the current + * instance live update into the replica. Clone RS' own slot first. + */ + rp = rproc_ptr[_ENDPOINT_P(RS_PROC_NR)]; + if((s = clone_slot(rp, &replica_rp)) != OK) { + panic("unable to clone current RS instance: %d", s); + } + + /* Fork a new RS instance. */ + pid = srv_fork(); + if(pid == -1) { + panic("unable to fork a new RS instance"); + } + replica_pid = pid ? pid : getpid(); + replica_endpoint = getnprocnr(replica_pid); + replica_rp->r_pid = replica_pid; + replica_rp->r_pub->endpoint = replica_endpoint; + + if(pid == 0) { + /* New RS instance running. */ + + /* Synchronize with the old instance. */ + s = sef_receive(RS_PROC_NR, &m); + if(s != OK) { + panic("sef_receive failed: %d", s); + } + + /* Live update the old instance into the new one. */ + s = update_service(&rp, &replica_rp); + if(s != OK) { + panic("unable to live update RS: %d", s); + } + cpf_reload(); + + /* Clean up the old RS instance, the new instance will take over. */ + cleanup_service(rp); + + /* Map out our own text and data. */ + unmap_ok = 1; + _minix_unmapzero(); + } + else { + /* Old RS instance running. */ + + /* Ask VM to pin memory for the new RS instance. */ + s = vm_memctl(replica_endpoint, VM_RS_MEM_PIN); + if(s != OK) { + panic("unable to pin memory for the new RS instance: %d", s); + } + + /* Set up privileges for the new instance and let it run. */ + set_sys_bit(replica_rp->r_priv.s_ipc_to, static_priv_id(RS_PROC_NR)); + s = sys_privctl(replica_endpoint, SYS_PRIV_SET_SYS, &(replica_rp->r_priv)); + if(s != OK) { + panic("unable to set privileges for the new RS instance: %d", s); + } + s = sys_privctl(replica_endpoint, SYS_PRIV_ALLOW, NULL); + if(s != OK) { + panic("unable to let the new RS instance run: %d", s); + } + + /* Synchronize with the new instance and go to sleep. */ + m.m_type = RS_INIT; + s = sendrec(replica_endpoint, &m); + if(s != OK) { + panic("sendrec failed: %d", s); + } + /* Not reachable */ + } return(OK); } diff --git a/servers/rs/manager.c b/servers/rs/manager.c index 423a1232d..6aa0684c7 100644 --- a/servers/rs/manager.c +++ b/servers/rs/manager.c @@ -1435,6 +1435,11 @@ struct rproc **clone_rpp; rpub = rp->r_pub; clone_rpub = clone_rp->r_pub; + /* Synch the privilege structure of the source with the kernel. */ + if ((r = sys_getpriv(&(rp->r_priv), rpub->endpoint)) != OK) { + panic("unable to synch privilege structure: %d", r); + } + /* Shallow copy. */ *clone_rp = *rp; *clone_rpub = *rpub; diff --git a/servers/rs/table.c b/servers/rs/table.c index 25f7dfa07..9a5650f34 100644 --- a/servers/rs/table.c +++ b/servers/rs/table.c @@ -44,7 +44,8 @@ PRIVATE int VM_NOTIFY_SIG, SYS_NULL_C }, sched_vmc[] ={ VM_BASIC_CALLS, SYS_NULL_C }, vfs_vmc[] = { VM_BASIC_CALLS, SYS_NULL_C }, - rs_vmc[] = { VM_BASIC_CALLS, VM_RS_SET_PRIV, VM_RS_UPDATE, SYS_NULL_C }, + rs_vmc[] = { VM_BASIC_CALLS, VM_RS_SET_PRIV, VM_RS_UPDATE, VM_RS_MEMCTL, + SYS_NULL_C }, ds_vmc[] = { VM_BASIC_CALLS, SYS_NULL_C }, vm_vmc[] = { SYS_NULL_C }, tty_vmc[] = { VM_BASIC_CALLS, SYS_NULL_C }, diff --git a/servers/vm/exit.c b/servers/vm/exit.c index 9afee36e4..43d0763f9 100644 --- a/servers/vm/exit.c +++ b/servers/vm/exit.c @@ -70,12 +70,17 @@ SANITYCHECK(SCL_FUNCTIONS); if(vmp->vm_flags & VMF_HAS_DMA) { release_dma(vmp); - } else { - assert(vmp->vm_flags & VMF_HASPT); + } else if(vmp->vm_flags & VMF_HASPT) { /* Free pagetable and pages allocated by pt code. */ SANITYCHECK(SCL_DETAIL); free_proc(vmp); SANITYCHECK(SCL_DETAIL); + } else { + /* Free the data and stack segments. */ + free_mem(vmp->vm_arch.vm_seg[D].mem_phys, + vmp->vm_arch.vm_seg[S].mem_vir + + vmp->vm_arch.vm_seg[S].mem_len - + vmp->vm_arch.vm_seg[D].mem_vir); } SANITYCHECK(SCL_DETAIL); diff --git a/servers/vm/main.c b/servers/vm/main.c index fd9490a8d..03540a62a 100644 --- a/servers/vm/main.c +++ b/servers/vm/main.c @@ -344,6 +344,7 @@ PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info) /* Calls from RS */ CALLMAP(VM_RS_SET_PRIV, do_rs_set_priv); CALLMAP(VM_RS_UPDATE, do_rs_update); + CALLMAP(VM_RS_MEMCTL, do_rs_memctl); /* Generic calls. */ CALLMAP(VM_REMAP, do_remap); diff --git a/servers/vm/proto.h b/servers/vm/proto.h index fb20c424d..0c52d3510 100644 --- a/servers/vm/proto.h +++ b/servers/vm/proto.h @@ -141,6 +141,7 @@ _PROTOTYPE(int map_proc_copy,(struct vmproc *dst, struct vmproc *src)); _PROTOTYPE(struct vir_region *map_lookup,(struct vmproc *vmp, vir_bytes addr)); _PROTOTYPE(int map_pf,(struct vmproc *vmp, struct vir_region *region, vir_bytes offset, int write)); +_PROTOTYPE(int map_pin_memory,(struct vmproc *vmp)); _PROTOTYPE(int map_handle_memory,(struct vmproc *vmp, struct vir_region *region, vir_bytes offset, vir_bytes len, int write)); _PROTOTYPE(void map_printmap, (struct vmproc *vmp)); @@ -185,6 +186,7 @@ _PROTOTYPE( vir_bytes arch_addrok, (struct vmproc *vmp, vir_bytes addr)); /* rs.c */ _PROTOTYPE(int do_rs_set_priv, (message *m)); _PROTOTYPE(int do_rs_update, (message *m)); +_PROTOTYPE(int do_rs_memctl, (message *m)); /* queryexit.c */ _PROTOTYPE(int do_query_exit, (message *m)); diff --git a/servers/vm/region.c b/servers/vm/region.c index c9299d922..f6594c84d 100644 --- a/servers/vm/region.c +++ b/servers/vm/region.c @@ -1116,6 +1116,34 @@ int write; return r; } +/*===========================================================================* + * map_pin_memory * + *===========================================================================*/ +PUBLIC int map_pin_memory(struct vmproc *vmp) +{ + struct vir_region *vr; + int offset, r; + + /* Scan all memory regions. */ + for(vr = vmp->vm_regions; vr; vr = vr->next) { + vir_bytes offset; + /* Skip regions that can't pagefault. */ + if((vr->flags & VR_NOPF) || (vr->flags & VR_SHARED)) { + continue; + } + /* Map other regions. */ + for(offset=0;offsetlength;offset += VM_PAGE_SIZE) { + if((r=map_pf(vmp, vr, offset, 1 /* write */)) + != OK) { + printf("VM: map_pf failed\n"); + return r; + } + } + } + + return OK; +} + /*===========================================================================* * map_handle_memory * *===========================================================================*/ diff --git a/servers/vm/rs.c b/servers/vm/rs.c index dcda8c98b..45e85e644 100644 --- a/servers/vm/rs.c +++ b/servers/vm/rs.c @@ -38,7 +38,7 @@ PUBLIC int do_rs_set_priv(message *m) nr = m->VM_RS_NR; if ((r = vm_isokendpt(nr, &n)) != OK) { - printf("do_rs_set_priv: message from strange source %d\n", nr); + printf("do_rs_set_priv: bad endpoint %d\n", nr); return EINVAL; } @@ -60,7 +60,7 @@ PUBLIC int do_rs_set_priv(message *m) *===========================================================================*/ PUBLIC int do_rs_update(message *m_ptr) { - endpoint_t src_e, dst_e; + endpoint_t src_e, dst_e, reply_e; int r; src_e = m_ptr->VM_RS_SRC_ENDPT; @@ -74,7 +74,52 @@ PUBLIC int do_rs_update(message *m_ptr) /* Do the update in VM now. */ r = swap_proc(src_e, dst_e); + if(r != OK) { + return r; + } + + /* Reply, update-aware. */ + reply_e = m_ptr->m_source; + if(reply_e == src_e) reply_e = dst_e; + if(reply_e == dst_e) reply_e = src_e; + m_ptr->m_type = OK; + r = send(reply_e, m_ptr); + if(r != OK) { + panic("send() error"); + } - return r; + return SUSPEND; +} + +/*===========================================================================* + * do_rs_memctl * + *===========================================================================*/ +PUBLIC int do_rs_memctl(message *m_ptr) +{ + endpoint_t ep; + int req, r, proc_nr; + struct vmproc *vmp; + + ep = m_ptr->VM_RS_CTL_ENDPT; + req = m_ptr->VM_RS_CTL_REQ; + + /* Lookup endpoint. */ + if ((r = vm_isokendpt(ep, &proc_nr)) != OK) { + printf("do_rs_memctl: bad endpoint %d\n", ep); + return EINVAL; + } + vmp = &vmproc[proc_nr]; + + /* Process request. */ + switch(req) + { + case VM_RS_MEM_PIN: + r = map_pin_memory(vmp); + return r; + + default: + printf("do_rs_memctl: bad request %d\n", req); + return EINVAL; + } } diff --git a/servers/vm/utility.c b/servers/vm/utility.c index 40147f638..54c137083 100644 --- a/servers/vm/utility.c +++ b/servers/vm/utility.c @@ -286,10 +286,10 @@ PUBLIC int swap_proc(endpoint_t src_e, endpoint_t dst_e) } /* Adjust page tables. */ - assert(src_vmp->vm_flags & VMF_HASPT); - assert(dst_vmp->vm_flags & VMF_HASPT); - pt_bind(&src_vmp->vm_pt, src_vmp); - pt_bind(&dst_vmp->vm_pt, dst_vmp); + if(src_vmp->vm_flags & VMF_HASPT) + pt_bind(&src_vmp->vm_pt, src_vmp); + if(dst_vmp->vm_flags & VMF_HASPT) + pt_bind(&dst_vmp->vm_pt, dst_vmp); if((r=sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) { panic("swap_proc: VMCTL_FLUSHTLB failed: %d", r); } -- 2.44.0