From: Cristiano Giuffrida Date: Tue, 20 Jul 2010 23:03:52 +0000 (+0000) Subject: Crash recovery and live update support for VM. X-Git-Tag: v3.1.8~175 X-Git-Url: http://zhaoyanbai.com/repos/%22http:/www.isc.org/icons/doxygen.png?a=commitdiff_plain;h=91a83fe2653a64b413ba97328f62a46dc5ed1efd;p=minix.git Crash recovery and live update support for VM. --- diff --git a/include/minix/com.h b/include/minix/com.h index 16526b20d..454da85e7 100644 --- a/include/minix/com.h +++ b/include/minix/com.h @@ -1058,7 +1058,8 @@ #define VM_RS_MEMCTL (VM_RQ_BASE+42) # define VM_RS_CTL_ENDPT m1_i1 # define VM_RS_CTL_REQ m1_i2 -# define VM_RS_MEM_PIN 0 /* pin memory */ +# define VM_RS_MEM_PIN 0 /* pin memory */ +# define VM_RS_MEM_MAKE_VM 1 /* make VM instance */ /* Total. */ #define NR_VM_CALLS 43 diff --git a/servers/rs/manager.c b/servers/rs/manager.c index 9ffa357c0..3e778c0ec 100644 --- a/servers/rs/manager.c +++ b/servers/rs/manager.c @@ -552,6 +552,19 @@ struct rproc *rp; */ setuid(0); + /* If this is a VM instance, let VM know now. */ + if(rp->r_priv.s_flags & VM_SYS_PROC) { + if(rs_verbose) + printf("RS: informing VM of instance %s\n", srv_to_string(rp)); + + s = vm_memctl(rpub->endpoint, VM_RS_MEM_MAKE_VM); + if(s != OK) { + printf("vm_memctl failed: %d\n", s); + cleanup_service(rp); + return s; + } + } + /* Tell VM about allowed calls. */ if ((s = vm_set_priv(rpub->endpoint, &rpub->vm_call_mask[0])) != OK) { printf("RS: vm_set_priv failed: %d\n", s); diff --git a/servers/vm/arch/i386/pagetable.c b/servers/vm/arch/i386/pagetable.c index 1fb5c9477..880051381 100644 --- a/servers/vm/arch/i386/pagetable.c +++ b/servers/vm/arch/i386/pagetable.c @@ -526,6 +526,119 @@ PRIVATE char *ptestr(u32_t pte) return str; } +/*===========================================================================* + * pt_map_in_range * + *===========================================================================*/ +PUBLIC int pt_map_in_range(struct vmproc *src_vmp, struct vmproc *dst_vmp, + vir_bytes start, vir_bytes end) +{ +/* Transfer all the mappings from the pt of the source process to the pt of + * the destination process in the range specified. + */ + int pde, pte; + int r; + vir_bytes viraddr, mapaddr; + pt_t *pt, *dst_pt; + + pt = &src_vmp->vm_pt; + dst_pt = &dst_vmp->vm_pt; + + end = end ? end : VM_DATATOP; + assert(start % I386_PAGE_SIZE == 0); + assert(end % I386_PAGE_SIZE == 0); + assert(I386_VM_PDE(start) >= proc_pde && start <= end); + assert(I386_VM_PDE(end) < I386_VM_DIR_ENTRIES); + +#if LU_DEBUG + printf("VM: pt_map_in_range: src = %d, dst = %d\n", + src_vmp->vm_endpoint, dst_vmp->vm_endpoint); + printf("VM: pt_map_in_range: transferring from 0x%08x (pde %d pte %d) to 0x%08x (pde %d pte %d)\n", + start, I386_VM_PDE(start), I386_VM_PTE(start), + end, I386_VM_PDE(end), I386_VM_PTE(end)); +#endif + + /* Scan all page-table entries in the range. */ + for(viraddr = start; viraddr <= end; viraddr += I386_PAGE_SIZE) { + pde = I386_VM_PDE(viraddr); + if(!(pt->pt_dir[pde] & I386_VM_PRESENT)) { + if(viraddr == VM_DATATOP) break; + continue; + } + pte = I386_VM_PTE(viraddr); + if(!(pt->pt_pt[pde][pte] & I386_VM_PRESENT)) { + if(viraddr == VM_DATATOP) break; + continue; + } + + /* Transfer the mapping. */ + dst_pt->pt_pt[pde][pte] = pt->pt_pt[pde][pte]; + + if(viraddr == VM_DATATOP) break; + } + + return OK; +} + +/*===========================================================================* + * pt_ptmap * + *===========================================================================*/ +PUBLIC int pt_ptmap(struct vmproc *src_vmp, struct vmproc *dst_vmp) +{ +/* Transfer mappings to page dir and page tables from source process and + * destination process. Make sure all the mappings are above the stack, not + * to corrupt valid mappings in the data segment of the destination process. + */ + int pde, r; + phys_bytes physaddr; + vir_bytes viraddr; + pt_t *pt; + + assert(src_vmp->vm_stacktop == dst_vmp->vm_stacktop); + pt = &src_vmp->vm_pt; + +#if LU_DEBUG + printf("VM: pt_ptmap: src = %d, dst = %d\n", + src_vmp->vm_endpoint, dst_vmp->vm_endpoint); +#endif + + /* Transfer mapping to the page directory. */ + assert((vir_bytes) pt->pt_dir >= src_vmp->vm_stacktop); + viraddr = arch_vir2map(src_vmp, (vir_bytes) pt->pt_dir); + physaddr = pt->pt_dir_phys & I386_VM_ADDR_MASK; + if((r=pt_writemap(&dst_vmp->vm_pt, viraddr, physaddr, I386_PAGE_SIZE, + I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE, + WMF_OVERWRITE)) != OK) { + return r; + } +#if LU_DEBUG + printf("VM: pt_ptmap: transferred mapping to page dir: 0x%08x (0x%08x)\n", + viraddr, physaddr); +#endif + + /* Scan all non-reserved page-directory entries. */ + for(pde=proc_pde; pde < I386_VM_DIR_ENTRIES; pde++) { + if(!(pt->pt_dir[pde] & I386_VM_PRESENT)) { + continue; + } + + /* Transfer mapping to the page table. */ + assert((vir_bytes) pt->pt_pt[pde] >= src_vmp->vm_stacktop); + viraddr = arch_vir2map(src_vmp, (vir_bytes) pt->pt_pt[pde]); + physaddr = pt->pt_dir[pde] & I386_VM_ADDR_MASK; + if((r=pt_writemap(&dst_vmp->vm_pt, viraddr, physaddr, I386_PAGE_SIZE, + I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE, + WMF_OVERWRITE)) != OK) { + return r; + } + } +#if LU_DEBUG + printf("VM: pt_ptmap: transferred mappings to page tables, pde range %d - %d\n", + proc_pde, I386_VM_DIR_ENTRIES - 1); +#endif + + return OK; +} + /*===========================================================================* * pt_writemap * *===========================================================================*/ @@ -920,7 +1033,12 @@ PUBLIC void pt_init(phys_bytes usedlimit) /* Back to reality - this is where the stack actually is. */ vmprocess->vm_arch.vm_seg[S].mem_len -= extra_clicks; - + + /* Pretend VM stack top is the same as any regular process, not to + * have discrepancies with new VM instances later on. + */ + vmprocess->vm_stacktop = VM_STACKTOP; + /* All OK. */ return; } diff --git a/servers/vm/proto.h b/servers/vm/proto.h index 05ef411e6..48bf11876 100644 --- a/servers/vm/proto.h +++ b/servers/vm/proto.h @@ -44,7 +44,8 @@ _PROTOTYPE( void reserve_proc_mem, (struct memory *mem_chunks, _PROTOTYPE( int vm_isokendpt, (endpoint_t ep, int *proc) ); _PROTOTYPE( int get_stack_ptr, (int proc_nr, vir_bytes *sp) ); _PROTOTYPE( int do_info, (message *) ); -_PROTOTYPE( int swap_proc, (endpoint_t src_e, endpoint_t dst_e) ); +_PROTOTYPE( int swap_proc_slot, (struct vmproc *src_vmp, struct vmproc *dst_vmp)); +_PROTOTYPE( int swap_proc_dyn_data, (struct vmproc *src_vmp, struct vmproc *dst_vmp)); /* exit.c */ _PROTOTYPE( void clear_proc, (struct vmproc *vmp) ); @@ -101,6 +102,9 @@ _PROTOTYPE( void pt_init_mem, (void) ); _PROTOTYPE( void pt_check, (struct vmproc *vmp) ); _PROTOTYPE( int pt_new, (pt_t *pt) ); _PROTOTYPE( void pt_free, (pt_t *pt) ); +_PROTOTYPE( int pt_map_in_range, (struct vmproc *src_vmp, struct vmproc *dst_vmp, + vir_bytes start, vir_bytes end) ); +_PROTOTYPE( int pt_ptmap, (struct vmproc *src_vmp, struct vmproc *dst_vmp) ); _PROTOTYPE( int pt_ptalloc_in_range, (pt_t *pt, vir_bytes start, vir_bytes end, u32_t flags, int verify)); _PROTOTYPE( int pt_writemap, (pt_t *pt, vir_bytes v, phys_bytes physaddr, diff --git a/servers/vm/rs.c b/servers/vm/rs.c index 45e85e644..51134126e 100644 --- a/servers/vm/rs.c +++ b/servers/vm/rs.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "glo.h" #include "proto.h" @@ -61,11 +62,26 @@ PUBLIC int do_rs_set_priv(message *m) PUBLIC int do_rs_update(message *m_ptr) { endpoint_t src_e, dst_e, reply_e; + int src_p, dst_p; + struct vmproc *src_vmp, *dst_vmp; + struct vir_region *vr; int r; src_e = m_ptr->VM_RS_SRC_ENDPT; dst_e = m_ptr->VM_RS_DST_ENDPT; + /* Lookup slots for source and destination process. */ + if(vm_isokendpt(src_e, &src_p) != OK) { + printf("do_rs_update: bad src endpoint %d\n", src_e); + return EINVAL; + } + src_vmp = &vmproc[src_p]; + if(vm_isokendpt(dst_e, &dst_p) != OK) { + printf("do_rs_update: bad dst endpoint %d\n", dst_e); + return EINVAL; + } + dst_vmp = &vmproc[dst_p]; + /* Let the kernel do the update first. */ r = sys_update(src_e, dst_e); if(r != OK) { @@ -73,15 +89,21 @@ PUBLIC int do_rs_update(message *m_ptr) } /* Do the update in VM now. */ - r = swap_proc(src_e, dst_e); + r = swap_proc_slot(src_vmp, dst_vmp); if(r != OK) { return r; } + r = swap_proc_dyn_data(src_vmp, dst_vmp); + if(r != OK) { + return r; + } + pt_bind(&src_vmp->vm_pt, src_vmp); + pt_bind(&dst_vmp->vm_pt, dst_vmp); /* Reply, update-aware. */ reply_e = m_ptr->m_source; if(reply_e == src_e) reply_e = dst_e; - if(reply_e == dst_e) reply_e = src_e; + else if(reply_e == dst_e) reply_e = src_e; m_ptr->m_type = OK; r = send(reply_e, m_ptr); if(r != OK) { @@ -91,6 +113,55 @@ PUBLIC int do_rs_update(message *m_ptr) return SUSPEND; } +/*===========================================================================* + * rs_memctl_make_vm_instance * + *===========================================================================*/ +PRIVATE int rs_memctl_make_vm_instance(struct vmproc *new_vm_vmp) +{ + int vm_p, r; + u32_t flags; + int verify; + struct vmproc *this_vm_vmp; + + this_vm_vmp = &vmproc[VM_PROC_NR]; + + /* Copy settings from current VM. */ + new_vm_vmp->vm_stacktop = this_vm_vmp->vm_stacktop; + new_vm_vmp->vm_arch.vm_data_top = this_vm_vmp->vm_arch.vm_data_top; + + /* Pin memory for the new VM instance. */ + r = map_pin_memory(new_vm_vmp); + if(r != OK) { + return r; + } + + /* Preallocate page tables for the entire address space for both + * VM and the new VM instance. + */ + flags = 0; + verify = FALSE; + r = pt_ptalloc_in_range(&this_vm_vmp->vm_pt, 0, 0, flags, verify); + if(r != OK) { + return r; + } + r = pt_ptalloc_in_range(&new_vm_vmp->vm_pt, 0, 0, flags, verify); + if(r != OK) { + return r; + } + + /* Let the new VM instance map VM's page tables and its own. */ + r = pt_ptmap(this_vm_vmp, new_vm_vmp); + if(r != OK) { + return r; + } + r = pt_ptmap(new_vm_vmp, new_vm_vmp); + if(r != OK) { + return r; + } + + return OK; +} + /*===========================================================================* * do_rs_memctl * *===========================================================================*/ @@ -116,7 +187,9 @@ PUBLIC int do_rs_memctl(message *m_ptr) case VM_RS_MEM_PIN: r = map_pin_memory(vmp); return r; - + case VM_RS_MEM_MAKE_VM: + r = rs_memctl_make_vm_instance(vmp); + return r; default: printf("do_rs_memctl: bad request %d\n", req); return EINVAL; diff --git a/servers/vm/utility.c b/servers/vm/utility.c index 339a5599f..6d8500941 100644 --- a/servers/vm/utility.c +++ b/servers/vm/utility.c @@ -37,8 +37,6 @@ #include "kernel/type.h" #include "kernel/proc.h" -#define SWAP_PROC_DEBUG 0 - /*===========================================================================* * get_mem_map * *===========================================================================*/ @@ -232,36 +230,16 @@ PUBLIC int do_info(message *m) } /*===========================================================================* - * swap_proc * + * swap_proc_slot * *===========================================================================*/ -PUBLIC int swap_proc(endpoint_t src_e, endpoint_t dst_e) +PUBLIC int swap_proc_slot(struct vmproc *src_vmp, struct vmproc *dst_vmp) { - struct vmproc *src_vmp, *dst_vmp; struct vmproc orig_src_vmproc, orig_dst_vmproc; - int src_p, dst_p, r; - struct vir_region *vr; - - /* Lookup slots for source and destination process. */ - if(vm_isokendpt(src_e, &src_p) != OK) { - printf("swap_proc: bad src endpoint %d\n", src_e); - return EINVAL; - } - src_vmp = &vmproc[src_p]; - if(vm_isokendpt(dst_e, &dst_p) != OK) { - printf("swap_proc: bad dst endpoint %d\n", dst_e); - return EINVAL; - } - dst_vmp = &vmproc[dst_p]; - -#if SWAP_PROC_DEBUG - printf("swap_proc: swapping %d (%d, %d) and %d (%d, %d)\n", - src_vmp->vm_endpoint, src_p, src_vmp->vm_slot, - dst_vmp->vm_endpoint, dst_p, dst_vmp->vm_slot); - printf("swap_proc: map_printmap for source before swapping:\n"); - map_printmap(src_vmp); - printf("swap_proc: map_printmap for destination before swapping:\n"); - map_printmap(dst_vmp); +#if LU_DEBUG + printf("VM: swap_proc: swapping %d (%d) and %d (%d)\n", + src_vmp->vm_endpoint, src_vmp->vm_slot, + dst_vmp->vm_endpoint, dst_vmp->vm_slot); #endif /* Save existing data. */ @@ -278,7 +256,52 @@ PUBLIC int swap_proc(endpoint_t src_e, endpoint_t dst_e) dst_vmp->vm_endpoint = orig_dst_vmproc.vm_endpoint; dst_vmp->vm_slot = orig_dst_vmproc.vm_slot; - /* Preserve vir_region's parents. */ + /* Preserve yielded blocks. */ + src_vmp->vm_yielded_blocks = orig_src_vmproc.vm_yielded_blocks; + dst_vmp->vm_yielded_blocks = orig_dst_vmproc.vm_yielded_blocks; + +#if LU_DEBUG + printf("VM: swap_proc: swapped %d (%d) and %d (%d)\n", + src_vmp->vm_endpoint, src_vmp->vm_slot, + dst_vmp->vm_endpoint, dst_vmp->vm_slot); +#endif + + return OK; +} + +/*===========================================================================* + * swap_proc_dyn_data * + *===========================================================================*/ +PUBLIC int swap_proc_dyn_data(struct vmproc *src_vmp, struct vmproc *dst_vmp) +{ + struct vir_region *vr; + int is_vm; + int r; + + is_vm = (dst_vmp->vm_endpoint == VM_PROC_NR); + + /* For VM, transfer memory regions above the stack first. */ + if(is_vm) { +#if LU_DEBUG + printf("VM: swap_proc_dyn_data: tranferring regions above the stack from old VM (%d) to new VM (%d)\n", + src_vmp->vm_endpoint, dst_vmp->vm_endpoint); +#endif + assert(src_vmp->vm_stacktop == dst_vmp->vm_stacktop); + r = pt_map_in_range(src_vmp, dst_vmp, + arch_vir2map(src_vmp, src_vmp->vm_stacktop), 0); + if(r != OK) { + printf("swap_proc_dyn_data: pt_map_in_range failed\n"); + return r; + } + } + +#if LU_DEBUG + printf("VM: swap_proc_dyn_data: swapping regions' parents for %d (%d) and %d (%d)\n", + src_vmp->vm_endpoint, src_vmp->vm_slot, + dst_vmp->vm_endpoint, dst_vmp->vm_slot); +#endif + + /* Swap vir_regions' parents. */ for(vr = src_vmp->vm_regions; vr; vr = vr->next) { USE(vr, vr->parent = src_vmp;); } @@ -286,25 +309,25 @@ PUBLIC int swap_proc(endpoint_t src_e, endpoint_t dst_e) USE(vr, vr->parent = dst_vmp;); } - /* Adjust page tables. */ - if(src_vmp->vm_flags & VMF_HASPT) - pt_bind(&src_vmp->vm_pt, src_vmp); - if(dst_vmp->vm_flags & VMF_HASPT) - pt_bind(&dst_vmp->vm_pt, dst_vmp); - if((r=sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) { - panic("swap_proc: VMCTL_FLUSHTLB failed: %d", r); - } - -#if SWAP_PROC_DEBUG - printf("swap_proc: swapped %d (%d, %d) and %d (%d, %d)\n", - src_vmp->vm_endpoint, src_p, src_vmp->vm_slot, - dst_vmp->vm_endpoint, dst_p, dst_vmp->vm_slot); - - printf("swap_proc: map_printmap for source after swapping:\n"); - map_printmap(src_vmp); - printf("swap_proc: map_printmap for destination after swapping:\n"); - map_printmap(dst_vmp); + /* For regular processes, transfer regions above the stack now. + * In case of rollback, we need to skip this step. To sandbox the + * new instance and prevent state corruption on rollback, we share all + * the regions between the two instances as COW. + */ + if(!is_vm && (dst_vmp->vm_flags & VMF_HASPT)) { + vr = map_lookup(dst_vmp, arch_vir2map(dst_vmp, dst_vmp->vm_stacktop)); + if(vr && !map_lookup(src_vmp, arch_vir2map(src_vmp, src_vmp->vm_stacktop))) { +#if LU_DEBUG + printf("VM: swap_proc_dyn_data: tranferring regions above the stack from %d to %d\n", + src_vmp->vm_endpoint, dst_vmp->vm_endpoint); #endif + assert(src_vmp->vm_stacktop == dst_vmp->vm_stacktop); + r = map_proc_copy_from(src_vmp, dst_vmp, vr); + if(r != OK) { + return r; + } + } + } return OK; } diff --git a/servers/vm/vm.h b/servers/vm/vm.h index 89af44569..7dabed241 100644 --- a/servers/vm/vm.h +++ b/servers/vm/vm.h @@ -28,6 +28,7 @@ /* How noisy are we supposed to be? */ #define VERBOSE 0 +#define LU_DEBUG 1 /* Minimum stack region size - 64MB. */ #define MINSTACKREGION (64*1024*1024)