#define VM_RS_MEMCTL (VM_RQ_BASE+42)
# define VM_RS_CTL_ENDPT m1_i1
# define VM_RS_CTL_REQ m1_i2
-# define VM_RS_MEM_PIN 0 /* pin memory */
+# define VM_RS_MEM_PIN 0 /* pin memory */
+# define VM_RS_MEM_MAKE_VM 1 /* make VM instance */
/* Total. */
#define NR_VM_CALLS 43
*/
setuid(0);
+ /* If this is a VM instance, let VM know now. */
+ if(rp->r_priv.s_flags & VM_SYS_PROC) {
+ if(rs_verbose)
+ printf("RS: informing VM of instance %s\n", srv_to_string(rp));
+
+ s = vm_memctl(rpub->endpoint, VM_RS_MEM_MAKE_VM);
+ if(s != OK) {
+ printf("vm_memctl failed: %d\n", s);
+ cleanup_service(rp);
+ return s;
+ }
+ }
+
/* Tell VM about allowed calls. */
if ((s = vm_set_priv(rpub->endpoint, &rpub->vm_call_mask[0])) != OK) {
printf("RS: vm_set_priv failed: %d\n", s);
return str;
}
+/*===========================================================================*
+ * pt_map_in_range *
+ *===========================================================================*/
+PUBLIC int pt_map_in_range(struct vmproc *src_vmp, struct vmproc *dst_vmp,
+ vir_bytes start, vir_bytes end)
+{
+/* Transfer all the mappings from the pt of the source process to the pt of
+ * the destination process in the range specified.
+ */
+ int pde, pte;
+ int r;
+ vir_bytes viraddr, mapaddr;
+ pt_t *pt, *dst_pt;
+
+ pt = &src_vmp->vm_pt;
+ dst_pt = &dst_vmp->vm_pt;
+
+ end = end ? end : VM_DATATOP;
+ assert(start % I386_PAGE_SIZE == 0);
+ assert(end % I386_PAGE_SIZE == 0);
+ assert(I386_VM_PDE(start) >= proc_pde && start <= end);
+ assert(I386_VM_PDE(end) < I386_VM_DIR_ENTRIES);
+
+#if LU_DEBUG
+ printf("VM: pt_map_in_range: src = %d, dst = %d\n",
+ src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
+ printf("VM: pt_map_in_range: transferring from 0x%08x (pde %d pte %d) to 0x%08x (pde %d pte %d)\n",
+ start, I386_VM_PDE(start), I386_VM_PTE(start),
+ end, I386_VM_PDE(end), I386_VM_PTE(end));
+#endif
+
+ /* Scan all page-table entries in the range. */
+ for(viraddr = start; viraddr <= end; viraddr += I386_PAGE_SIZE) {
+ pde = I386_VM_PDE(viraddr);
+ if(!(pt->pt_dir[pde] & I386_VM_PRESENT)) {
+ if(viraddr == VM_DATATOP) break;
+ continue;
+ }
+ pte = I386_VM_PTE(viraddr);
+ if(!(pt->pt_pt[pde][pte] & I386_VM_PRESENT)) {
+ if(viraddr == VM_DATATOP) break;
+ continue;
+ }
+
+ /* Transfer the mapping. */
+ dst_pt->pt_pt[pde][pte] = pt->pt_pt[pde][pte];
+
+ if(viraddr == VM_DATATOP) break;
+ }
+
+ return OK;
+}
+
+/*===========================================================================*
+ * pt_ptmap *
+ *===========================================================================*/
+PUBLIC int pt_ptmap(struct vmproc *src_vmp, struct vmproc *dst_vmp)
+{
+/* Transfer mappings to page dir and page tables from source process and
+ * destination process. Make sure all the mappings are above the stack, not
+ * to corrupt valid mappings in the data segment of the destination process.
+ */
+ int pde, r;
+ phys_bytes physaddr;
+ vir_bytes viraddr;
+ pt_t *pt;
+
+ assert(src_vmp->vm_stacktop == dst_vmp->vm_stacktop);
+ pt = &src_vmp->vm_pt;
+
+#if LU_DEBUG
+ printf("VM: pt_ptmap: src = %d, dst = %d\n",
+ src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
+#endif
+
+ /* Transfer mapping to the page directory. */
+ assert((vir_bytes) pt->pt_dir >= src_vmp->vm_stacktop);
+ viraddr = arch_vir2map(src_vmp, (vir_bytes) pt->pt_dir);
+ physaddr = pt->pt_dir_phys & I386_VM_ADDR_MASK;
+ if((r=pt_writemap(&dst_vmp->vm_pt, viraddr, physaddr, I386_PAGE_SIZE,
+ I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE,
+ WMF_OVERWRITE)) != OK) {
+ return r;
+ }
+#if LU_DEBUG
+ printf("VM: pt_ptmap: transferred mapping to page dir: 0x%08x (0x%08x)\n",
+ viraddr, physaddr);
+#endif
+
+ /* Scan all non-reserved page-directory entries. */
+ for(pde=proc_pde; pde < I386_VM_DIR_ENTRIES; pde++) {
+ if(!(pt->pt_dir[pde] & I386_VM_PRESENT)) {
+ continue;
+ }
+
+ /* Transfer mapping to the page table. */
+ assert((vir_bytes) pt->pt_pt[pde] >= src_vmp->vm_stacktop);
+ viraddr = arch_vir2map(src_vmp, (vir_bytes) pt->pt_pt[pde]);
+ physaddr = pt->pt_dir[pde] & I386_VM_ADDR_MASK;
+ if((r=pt_writemap(&dst_vmp->vm_pt, viraddr, physaddr, I386_PAGE_SIZE,
+ I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE,
+ WMF_OVERWRITE)) != OK) {
+ return r;
+ }
+ }
+#if LU_DEBUG
+ printf("VM: pt_ptmap: transferred mappings to page tables, pde range %d - %d\n",
+ proc_pde, I386_VM_DIR_ENTRIES - 1);
+#endif
+
+ return OK;
+}
+
/*===========================================================================*
* pt_writemap *
*===========================================================================*/
/* Back to reality - this is where the stack actually is. */
vmprocess->vm_arch.vm_seg[S].mem_len -= extra_clicks;
-
+
+ /* Pretend VM stack top is the same as any regular process, not to
+ * have discrepancies with new VM instances later on.
+ */
+ vmprocess->vm_stacktop = VM_STACKTOP;
+
/* All OK. */
return;
}
_PROTOTYPE( int vm_isokendpt, (endpoint_t ep, int *proc) );
_PROTOTYPE( int get_stack_ptr, (int proc_nr, vir_bytes *sp) );
_PROTOTYPE( int do_info, (message *) );
-_PROTOTYPE( int swap_proc, (endpoint_t src_e, endpoint_t dst_e) );
+_PROTOTYPE( int swap_proc_slot, (struct vmproc *src_vmp, struct vmproc *dst_vmp));
+_PROTOTYPE( int swap_proc_dyn_data, (struct vmproc *src_vmp, struct vmproc *dst_vmp));
/* exit.c */
_PROTOTYPE( void clear_proc, (struct vmproc *vmp) );
_PROTOTYPE( void pt_check, (struct vmproc *vmp) );
_PROTOTYPE( int pt_new, (pt_t *pt) );
_PROTOTYPE( void pt_free, (pt_t *pt) );
+_PROTOTYPE( int pt_map_in_range, (struct vmproc *src_vmp, struct vmproc *dst_vmp,
+ vir_bytes start, vir_bytes end) );
+_PROTOTYPE( int pt_ptmap, (struct vmproc *src_vmp, struct vmproc *dst_vmp) );
_PROTOTYPE( int pt_ptalloc_in_range, (pt_t *pt, vir_bytes start, vir_bytes end,
u32_t flags, int verify));
_PROTOTYPE( int pt_writemap, (pt_t *pt, vir_bytes v, phys_bytes physaddr,
#include <env.h>
#include <stdio.h>
#include <assert.h>
+#include <memory.h>
#include "glo.h"
#include "proto.h"
PUBLIC int do_rs_update(message *m_ptr)
{
endpoint_t src_e, dst_e, reply_e;
+ int src_p, dst_p;
+ struct vmproc *src_vmp, *dst_vmp;
+ struct vir_region *vr;
int r;
src_e = m_ptr->VM_RS_SRC_ENDPT;
dst_e = m_ptr->VM_RS_DST_ENDPT;
+ /* Lookup slots for source and destination process. */
+ if(vm_isokendpt(src_e, &src_p) != OK) {
+ printf("do_rs_update: bad src endpoint %d\n", src_e);
+ return EINVAL;
+ }
+ src_vmp = &vmproc[src_p];
+ if(vm_isokendpt(dst_e, &dst_p) != OK) {
+ printf("do_rs_update: bad dst endpoint %d\n", dst_e);
+ return EINVAL;
+ }
+ dst_vmp = &vmproc[dst_p];
+
/* Let the kernel do the update first. */
r = sys_update(src_e, dst_e);
if(r != OK) {
}
/* Do the update in VM now. */
- r = swap_proc(src_e, dst_e);
+ r = swap_proc_slot(src_vmp, dst_vmp);
if(r != OK) {
return r;
}
+ r = swap_proc_dyn_data(src_vmp, dst_vmp);
+ if(r != OK) {
+ return r;
+ }
+ pt_bind(&src_vmp->vm_pt, src_vmp);
+ pt_bind(&dst_vmp->vm_pt, dst_vmp);
/* Reply, update-aware. */
reply_e = m_ptr->m_source;
if(reply_e == src_e) reply_e = dst_e;
- if(reply_e == dst_e) reply_e = src_e;
+ else if(reply_e == dst_e) reply_e = src_e;
m_ptr->m_type = OK;
r = send(reply_e, m_ptr);
if(r != OK) {
return SUSPEND;
}
+/*===========================================================================*
+ * rs_memctl_make_vm_instance *
+ *===========================================================================*/
+PRIVATE int rs_memctl_make_vm_instance(struct vmproc *new_vm_vmp)
+{
+ int vm_p, r;
+ u32_t flags;
+ int verify;
+ struct vmproc *this_vm_vmp;
+
+ this_vm_vmp = &vmproc[VM_PROC_NR];
+
+ /* Copy settings from current VM. */
+ new_vm_vmp->vm_stacktop = this_vm_vmp->vm_stacktop;
+ new_vm_vmp->vm_arch.vm_data_top = this_vm_vmp->vm_arch.vm_data_top;
+
+ /* Pin memory for the new VM instance. */
+ r = map_pin_memory(new_vm_vmp);
+ if(r != OK) {
+ return r;
+ }
+
+ /* Preallocate page tables for the entire address space for both
+ * VM and the new VM instance.
+ */
+ flags = 0;
+ verify = FALSE;
+ r = pt_ptalloc_in_range(&this_vm_vmp->vm_pt, 0, 0, flags, verify);
+ if(r != OK) {
+ return r;
+ }
+ r = pt_ptalloc_in_range(&new_vm_vmp->vm_pt, 0, 0, flags, verify);
+ if(r != OK) {
+ return r;
+ }
+
+ /* Let the new VM instance map VM's page tables and its own. */
+ r = pt_ptmap(this_vm_vmp, new_vm_vmp);
+ if(r != OK) {
+ return r;
+ }
+ r = pt_ptmap(new_vm_vmp, new_vm_vmp);
+ if(r != OK) {
+ return r;
+ }
+
+ return OK;
+}
+
/*===========================================================================*
* do_rs_memctl *
*===========================================================================*/
case VM_RS_MEM_PIN:
r = map_pin_memory(vmp);
return r;
-
+ case VM_RS_MEM_MAKE_VM:
+ r = rs_memctl_make_vm_instance(vmp);
+ return r;
default:
printf("do_rs_memctl: bad request %d\n", req);
return EINVAL;
#include "kernel/type.h"
#include "kernel/proc.h"
-#define SWAP_PROC_DEBUG 0
-
/*===========================================================================*
* get_mem_map *
*===========================================================================*/
}
/*===========================================================================*
- * swap_proc *
+ * swap_proc_slot *
*===========================================================================*/
-PUBLIC int swap_proc(endpoint_t src_e, endpoint_t dst_e)
+PUBLIC int swap_proc_slot(struct vmproc *src_vmp, struct vmproc *dst_vmp)
{
- struct vmproc *src_vmp, *dst_vmp;
struct vmproc orig_src_vmproc, orig_dst_vmproc;
- int src_p, dst_p, r;
- struct vir_region *vr;
-
- /* Lookup slots for source and destination process. */
- if(vm_isokendpt(src_e, &src_p) != OK) {
- printf("swap_proc: bad src endpoint %d\n", src_e);
- return EINVAL;
- }
- src_vmp = &vmproc[src_p];
- if(vm_isokendpt(dst_e, &dst_p) != OK) {
- printf("swap_proc: bad dst endpoint %d\n", dst_e);
- return EINVAL;
- }
- dst_vmp = &vmproc[dst_p];
-
-#if SWAP_PROC_DEBUG
- printf("swap_proc: swapping %d (%d, %d) and %d (%d, %d)\n",
- src_vmp->vm_endpoint, src_p, src_vmp->vm_slot,
- dst_vmp->vm_endpoint, dst_p, dst_vmp->vm_slot);
- printf("swap_proc: map_printmap for source before swapping:\n");
- map_printmap(src_vmp);
- printf("swap_proc: map_printmap for destination before swapping:\n");
- map_printmap(dst_vmp);
+#if LU_DEBUG
+ printf("VM: swap_proc: swapping %d (%d) and %d (%d)\n",
+ src_vmp->vm_endpoint, src_vmp->vm_slot,
+ dst_vmp->vm_endpoint, dst_vmp->vm_slot);
#endif
/* Save existing data. */
dst_vmp->vm_endpoint = orig_dst_vmproc.vm_endpoint;
dst_vmp->vm_slot = orig_dst_vmproc.vm_slot;
- /* Preserve vir_region's parents. */
+ /* Preserve yielded blocks. */
+ src_vmp->vm_yielded_blocks = orig_src_vmproc.vm_yielded_blocks;
+ dst_vmp->vm_yielded_blocks = orig_dst_vmproc.vm_yielded_blocks;
+
+#if LU_DEBUG
+ printf("VM: swap_proc: swapped %d (%d) and %d (%d)\n",
+ src_vmp->vm_endpoint, src_vmp->vm_slot,
+ dst_vmp->vm_endpoint, dst_vmp->vm_slot);
+#endif
+
+ return OK;
+}
+
+/*===========================================================================*
+ * swap_proc_dyn_data *
+ *===========================================================================*/
+PUBLIC int swap_proc_dyn_data(struct vmproc *src_vmp, struct vmproc *dst_vmp)
+{
+ struct vir_region *vr;
+ int is_vm;
+ int r;
+
+ is_vm = (dst_vmp->vm_endpoint == VM_PROC_NR);
+
+ /* For VM, transfer memory regions above the stack first. */
+ if(is_vm) {
+#if LU_DEBUG
+ printf("VM: swap_proc_dyn_data: tranferring regions above the stack from old VM (%d) to new VM (%d)\n",
+ src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
+#endif
+ assert(src_vmp->vm_stacktop == dst_vmp->vm_stacktop);
+ r = pt_map_in_range(src_vmp, dst_vmp,
+ arch_vir2map(src_vmp, src_vmp->vm_stacktop), 0);
+ if(r != OK) {
+ printf("swap_proc_dyn_data: pt_map_in_range failed\n");
+ return r;
+ }
+ }
+
+#if LU_DEBUG
+ printf("VM: swap_proc_dyn_data: swapping regions' parents for %d (%d) and %d (%d)\n",
+ src_vmp->vm_endpoint, src_vmp->vm_slot,
+ dst_vmp->vm_endpoint, dst_vmp->vm_slot);
+#endif
+
+ /* Swap vir_regions' parents. */
for(vr = src_vmp->vm_regions; vr; vr = vr->next) {
USE(vr, vr->parent = src_vmp;);
}
USE(vr, vr->parent = dst_vmp;);
}
- /* Adjust page tables. */
- if(src_vmp->vm_flags & VMF_HASPT)
- pt_bind(&src_vmp->vm_pt, src_vmp);
- if(dst_vmp->vm_flags & VMF_HASPT)
- pt_bind(&dst_vmp->vm_pt, dst_vmp);
- if((r=sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) {
- panic("swap_proc: VMCTL_FLUSHTLB failed: %d", r);
- }
-
-#if SWAP_PROC_DEBUG
- printf("swap_proc: swapped %d (%d, %d) and %d (%d, %d)\n",
- src_vmp->vm_endpoint, src_p, src_vmp->vm_slot,
- dst_vmp->vm_endpoint, dst_p, dst_vmp->vm_slot);
-
- printf("swap_proc: map_printmap for source after swapping:\n");
- map_printmap(src_vmp);
- printf("swap_proc: map_printmap for destination after swapping:\n");
- map_printmap(dst_vmp);
+ /* For regular processes, transfer regions above the stack now.
+ * In case of rollback, we need to skip this step. To sandbox the
+ * new instance and prevent state corruption on rollback, we share all
+ * the regions between the two instances as COW.
+ */
+ if(!is_vm && (dst_vmp->vm_flags & VMF_HASPT)) {
+ vr = map_lookup(dst_vmp, arch_vir2map(dst_vmp, dst_vmp->vm_stacktop));
+ if(vr && !map_lookup(src_vmp, arch_vir2map(src_vmp, src_vmp->vm_stacktop))) {
+#if LU_DEBUG
+ printf("VM: swap_proc_dyn_data: tranferring regions above the stack from %d to %d\n",
+ src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
#endif
+ assert(src_vmp->vm_stacktop == dst_vmp->vm_stacktop);
+ r = map_proc_copy_from(src_vmp, dst_vmp, vr);
+ if(r != OK) {
+ return r;
+ }
+ }
+ }
return OK;
}
/* How noisy are we supposed to be? */
#define VERBOSE 0
+#define LU_DEBUG 1
/* Minimum stack region size - 64MB. */
#define MINSTACKREGION (64*1024*1024)