From: Tomas Hruby Date: Wed, 15 Sep 2010 14:11:12 +0000 (+0000) Subject: SMP - Process is stopped when VM modifies the page tables X-Git-Tag: v3.2.0~843 X-Git-Url: http://zhaoyanbai.com/repos/%22http:/www.isc.org/icons/openssl_button.gif?a=commitdiff_plain;h=6513d20744f1a350bfc0a81767efd32d97ce71b3;p=minix.git SMP - Process is stopped when VM modifies the page tables - RTS_VMINHIBIT flag is used to stop process while VM is fiddling with its pagetables - more generic way of sending synchronous scheduling events among cpus - do the x-cpu smp sched calls only if the target process is runnable. If it is not, it cannot be running and it cannot become runnable this CPU holds the BKL --- diff --git a/include/minix/com.h b/include/minix/com.h index 270e6406f..250e5e21a 100644 --- a/include/minix/com.h +++ b/include/minix/com.h @@ -635,6 +635,8 @@ #define VMCTL_KERN_PHYSMAP 27 #define VMCTL_KERN_MAP_REPLY 28 #define VMCTL_SETADDRSPACE 29 +#define VMCTL_VMINHIBIT_SET 30 +#define VMCTL_VMINHIBIT_CLEAR 31 /* Codes and field names for SYS_SYSCTL. */ #define SYSCTL_CODE m1_i1 /* SYSCTL_CODE_* below */ diff --git a/kernel/smp.c b/kernel/smp.c index 77b6cede8..38d503457 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -1,3 +1,5 @@ +#include + #include "smp.h" #include "interrupt.h" @@ -7,7 +9,7 @@ unsigned bsp_cpu_id; PUBLIC struct cpu cpus[CONFIG_MAX_CPUS]; -/* flags passed to another cpu along with a sched ipi */ +/* info passed to another cpu along with a sched ipi */ struct sched_ipi_data { volatile u32_t flags; volatile u32_t data; @@ -16,6 +18,7 @@ struct sched_ipi_data { PRIVATE struct sched_ipi_data sched_ipi_data[CONFIG_MAX_CPUS]; #define SCHED_IPI_STOP_PROC 1 +#define SCHED_IPI_VM_INHIBIT 2 static volatile unsigned ap_cpus_booted; @@ -55,34 +58,77 @@ PUBLIC void smp_schedule(unsigned cpu) arch_send_smp_schedule_ipi(cpu); } -PUBLIC void smp_schedule_stop_proc(struct proc * p) +/* + * tell another cpu about a task to do and return only after the cpu acks that + * the task is finished. Also wait before it finishes task sent by another cpu + * to the same one. + */ +PRIVATE void smp_schedule_sync(struct proc * p, unsigned task) { unsigned cpu = p->p_cpu; - sched_ipi_data[cpu].flags |= SCHED_IPI_STOP_PROC; + /* + * if some other cpu made a request to the same cpu, wait until it is + * done before proceeding + */ + if ((volatile unsigned)sched_ipi_data[cpu].flags != 0) { + BKL_UNLOCK(); + while ((volatile unsigned)sched_ipi_data[cpu].flags != 0); + BKL_LOCK(); + } + + sched_ipi_data[cpu].flags |= task; sched_ipi_data[cpu].data = (u32_t) p; arch_send_smp_schedule_ipi(cpu); + + /* wait until the destination cpu finishes its job */ BKL_UNLOCK(); while ((volatile unsigned)sched_ipi_data[cpu].flags != 0); BKL_LOCK(); } +PUBLIC void smp_schedule_stop_proc(struct proc * p) +{ + if (proc_is_runnable(p)) + smp_schedule_sync(p, SCHED_IPI_STOP_PROC); + else + RTS_SET(p, RTS_PROC_STOP); + assert(RTS_ISSET(p, RTS_PROC_STOP)); +} + +PUBLIC void smp_schedule_vminhibit(struct proc * p) +{ + if (proc_is_runnable(p)) + smp_schedule_sync(p, SCHED_IPI_VM_INHIBIT); + else + RTS_SET(p, RTS_VMINHIBIT); + assert(RTS_ISSET(p, RTS_VMINHIBIT)); +} + PUBLIC void smp_ipi_sched_handler(void) { - struct proc * p; + struct proc * curr; unsigned mycpu = cpuid; unsigned flgs; ipi_ack(); - p = get_cpu_var(mycpu, proc_ptr); + curr = get_cpu_var(mycpu, proc_ptr); flgs = sched_ipi_data[mycpu].flags; - if (flgs & SCHED_IPI_STOP_PROC) { - RTS_SET((struct proc *)sched_ipi_data[mycpu].data, RTS_PROC_STOP); + if (flgs) { + struct proc * p; + p = (struct proc *)sched_ipi_data[mycpu].data; + + if (flgs & SCHED_IPI_STOP_PROC) { + RTS_SET(p, RTS_PROC_STOP); + } + if (flgs & SCHED_IPI_VM_INHIBIT) { + RTS_SET(p, RTS_VMINHIBIT); + } } - else if (p->p_endpoint != IDLE) { - RTS_SET(p, RTS_PREEMPTED); + else if (curr->p_endpoint != IDLE) { + RTS_SET(curr, RTS_PREEMPTED); } sched_ipi_data[cpuid].flags = 0; } diff --git a/kernel/smp.h b/kernel/smp.h index 408f166bb..10855a679 100644 --- a/kernel/smp.h +++ b/kernel/smp.h @@ -59,7 +59,10 @@ _PROTOTYPE(void smp_ipi_halt_handler, (void)); _PROTOTYPE(void smp_ipi_sched_handler, (void)); _PROTOTYPE(void smp_schedule, (unsigned cpu)); +/* stop a processes on a different cpu */ _PROTOTYPE(void smp_schedule_stop_proc, (struct proc * p)); +/* stop a process on a different cpu because its adress space is being changed */ +_PROTOTYPE(void smp_schedule_vminhibit, (struct proc * p)); _PROTOTYPE(void arch_send_smp_schedule_ipi, (unsigned cpu)); _PROTOTYPE(void arch_smp_halt_cpu, (void)); diff --git a/kernel/system/do_runctl.c b/kernel/system/do_runctl.c index 3500f28f2..16759464c 100644 --- a/kernel/system/do_runctl.c +++ b/kernel/system/do_runctl.c @@ -56,13 +56,13 @@ PUBLIC int do_runctl(struct proc * caller, message * m_ptr) /* check if we must stop a process on a different CPU */ if (rp->p_cpu != cpuid) { smp_schedule_stop_proc(rp); - assert(RTS_ISSET(rp, RTS_PROC_STOP)); break; } #endif RTS_SET(rp, RTS_PROC_STOP); break; case RC_RESUME: + assert(RTS_ISSET(rp, RTS_PROC_STOP)); RTS_UNSET(rp, RTS_PROC_STOP); break; default: diff --git a/kernel/system/do_vmctl.c b/kernel/system/do_vmctl.c index 82673deac..37f85333d 100644 --- a/kernel/system/do_vmctl.c +++ b/kernel/system/do_vmctl.c @@ -140,6 +140,23 @@ PUBLIC int do_vmctl(struct proc * caller, message * m_ptr) return arch_phys_map_reply(m_ptr->SVMCTL_VALUE, (vir_bytes) m_ptr->SVMCTL_MAP_VIR_ADDR); } + case VMCTL_VMINHIBIT_SET: + /* check if we must stop a process on a different CPU */ +#if CONFIG_SMP + if (p->p_cpu != cpuid) { + smp_schedule_vminhibit(p); + } else +#endif + RTS_SET(p, RTS_VMINHIBIT); + return OK; + case VMCTL_VMINHIBIT_CLEAR: + assert(RTS_ISSET(p, RTS_VMINHIBIT)); + /* + * the processes is certainly not runnable, no need to tell its + * cpu + */ + RTS_UNSET(p, RTS_VMINHIBIT); + return OK; } /* Try architecture-specific vmctls. */ diff --git a/servers/vm/arch/i386/pagetable.c b/servers/vm/arch/i386/pagetable.c index 367508372..b07222c44 100644 --- a/servers/vm/arch/i386/pagetable.c +++ b/servers/vm/arch/i386/pagetable.c @@ -204,7 +204,7 @@ PRIVATE void vm_freepages(vir_bytes vir, vir_bytes phys, int pages, int reason) assert(!(vir % I386_PAGE_SIZE)); assert(!(phys % I386_PAGE_SIZE)); free_mem(ABS2CLICK(phys), pages); - if(pt_writemap(&vmprocess->vm_pt, arch_vir2map(vmprocess, vir), + if(pt_writemap(vmprocess, &vmprocess->vm_pt, arch_vir2map(vmprocess, vir), MAP_NONE, pages*I386_PAGE_SIZE, 0, WMF_OVERWRITE) != OK) panic("vm_freepages: pt_writemap failed"); } else { @@ -325,7 +325,7 @@ PUBLIC void *vm_allocpage(phys_bytes *phys, int reason) *phys = CLICK2ABS(newpage); /* Map this page into our address space. */ - if((r=pt_writemap(pt, loc, *phys, I386_PAGE_SIZE, + if((r=pt_writemap(vmprocess, pt, loc, *phys, I386_PAGE_SIZE, I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE, 0)) != OK) { free_mem(newpage, CLICKSPERPAGE); printf("vm_allocpage writemap failed\n"); @@ -365,7 +365,7 @@ PUBLIC void vm_pagelock(void *vir, int lockflag) flags |= I386_VM_WRITE; /* Update flags. */ - if((r=pt_writemap(pt, m, 0, I386_PAGE_SIZE, + if((r=pt_writemap(vmprocess, pt, m, 0, I386_PAGE_SIZE, flags, WMF_OVERWRITE | WMF_WRITEFLAGSONLY)) != OK) { panic("vm_lockpage: pt_writemap failed"); } @@ -605,7 +605,7 @@ PUBLIC int pt_ptmap(struct vmproc *src_vmp, struct vmproc *dst_vmp) assert((vir_bytes) pt->pt_dir >= src_vmp->vm_stacktop); viraddr = arch_vir2map(src_vmp, (vir_bytes) pt->pt_dir); physaddr = pt->pt_dir_phys & I386_VM_ADDR_MASK; - if((r=pt_writemap(&dst_vmp->vm_pt, viraddr, physaddr, I386_PAGE_SIZE, + if((r=pt_writemap(dst_vmp, &dst_vmp->vm_pt, viraddr, physaddr, I386_PAGE_SIZE, I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE, WMF_OVERWRITE)) != OK) { return r; @@ -625,7 +625,7 @@ PUBLIC int pt_ptmap(struct vmproc *src_vmp, struct vmproc *dst_vmp) assert((vir_bytes) pt->pt_pt[pde] >= src_vmp->vm_stacktop); viraddr = arch_vir2map(src_vmp, (vir_bytes) pt->pt_pt[pde]); physaddr = pt->pt_dir[pde] & I386_VM_ADDR_MASK; - if((r=pt_writemap(&dst_vmp->vm_pt, viraddr, physaddr, I386_PAGE_SIZE, + if((r=pt_writemap(dst_vmp, &dst_vmp->vm_pt, viraddr, physaddr, I386_PAGE_SIZE, I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE, WMF_OVERWRITE)) != OK) { return r; @@ -642,13 +642,28 @@ PUBLIC int pt_ptmap(struct vmproc *src_vmp, struct vmproc *dst_vmp) /*===========================================================================* * pt_writemap * *===========================================================================*/ -PUBLIC int pt_writemap(pt_t *pt, vir_bytes v, phys_bytes physaddr, - size_t bytes, u32_t flags, u32_t writemapflags) +PUBLIC int pt_writemap(struct vmproc * vmp, + pt_t *pt, + vir_bytes v, + phys_bytes physaddr, + size_t bytes, + u32_t flags, + u32_t writemapflags) { /* Write mapping into page table. Allocate a new page table if necessary. */ /* Page directory and table entries for this virtual address. */ int p, r, pages; int verify = 0; + int ret = OK; + + /* FIXME + * don't do it everytime, stop the process only on the first change and + * resume the execution on the last change. Do in a wrapper of this + * function + */ + if (vmp && vmp->vm_endpoint != NONE && vmp->vm_endpoint != VM_PROC_NR && + !(vmp->vm_flags & VMF_EXITING)) + sys_vmctl(vmp->vm_endpoint, VMCTL_VMINHIBIT_SET, 0); if(writemapflags & WMF_VERIFY) verify = 1; @@ -669,9 +684,9 @@ PUBLIC int pt_writemap(pt_t *pt, vir_bytes v, phys_bytes physaddr, * before we start writing in any of them, because it's a pain * to undo our work properly. */ - r = pt_ptalloc_in_range(pt, v, v + I386_PAGE_SIZE*pages, flags, verify); - if(r != OK) { - return r; + ret = pt_ptalloc_in_range(pt, v, v + I386_PAGE_SIZE*pages, flags, verify); + if(ret != OK) { + goto resume_exit; } /* Now write in them. */ @@ -729,7 +744,8 @@ PUBLIC int pt_writemap(pt_t *pt, vir_bytes v, phys_bytes physaddr, printf(" masked %s; ", ptestr(maskedentry)); printf(" expected %s\n", ptestr(entry)); - return EFAULT; + ret = EFAULT; + goto resume_exit; } } else { /* Write pagetable entry. */ @@ -743,7 +759,13 @@ PUBLIC int pt_writemap(pt_t *pt, vir_bytes v, phys_bytes physaddr, v += I386_PAGE_SIZE; } - return OK; +resume_exit: + + if (vmp && vmp->vm_endpoint != NONE && vmp->vm_endpoint != VM_PROC_NR && + !(vmp->vm_flags & VMF_EXITING)) + sys_vmctl(vmp->vm_endpoint, VMCTL_VMINHIBIT_CLEAR, 0); + + return ret; } /*===========================================================================* @@ -923,7 +945,7 @@ PUBLIC void pt_init(phys_bytes usedlimit) /* We have to write the new position in the PT, * so we can move our segments. */ - if(pt_writemap(newpt, v+moveup, v, I386_PAGE_SIZE, + if(pt_writemap(vmprocess, newpt, v+moveup, v, I386_PAGE_SIZE, I386_VM_PRESENT|I386_VM_WRITE|I386_VM_USER, 0) != OK) panic("pt_init: pt_writemap failed"); } @@ -1212,7 +1234,7 @@ PUBLIC int pt_mapkernel(pt_t *pt) } for(i = 0; i < kernmappings; i++) { - if(pt_writemap(pt, + if(pt_writemap(NULL, pt, kern_mappings[i].lin_addr, kern_mappings[i].phys_addr, kern_mappings[i].len, diff --git a/servers/vm/fork.c b/servers/vm/fork.c index bd872bccb..0466b2225 100644 --- a/servers/vm/fork.c +++ b/servers/vm/fork.c @@ -190,6 +190,9 @@ PUBLIC int do_fork(message *msg) panic("do_fork can't sys_fork: %d", r); } + if((r=pt_bind(&vmc->vm_pt, vmc)) != OK) + panic("fork can't pt_bind: %d", r); + if(fullvm) { vir_bytes vir; /* making these messages writable is an optimisation @@ -201,9 +204,6 @@ PUBLIC int do_fork(message *msg) handle_memory(vmp, vir, sizeof(message), 1); } - if((r=pt_bind(&vmc->vm_pt, vmc)) != OK) - panic("fork can't pt_bind: %d", r); - /* Inform caller of new child endpoint. */ msg->VMF_CHILD_ENDPOINT = vmc->vm_endpoint; diff --git a/servers/vm/mmap.c b/servers/vm/mmap.c index 47d715b57..d0996b2bf 100644 --- a/servers/vm/mmap.c +++ b/servers/vm/mmap.c @@ -425,7 +425,7 @@ PRIVATE int munmap_lin(vir_bytes addr, size_t len) return EFAULT; } - if(pt_writemap(&vmproc[VM_PROC_NR].vm_pt, addr, MAP_NONE, len, 0, + if(pt_writemap(NULL, &vmproc[VM_PROC_NR].vm_pt, addr, MAP_NONE, len, 0, WMF_OVERWRITE | WMF_FREE) != OK) { printf("munmap_lin: pt_writemap failed\n"); return EFAULT; diff --git a/servers/vm/proto.h b/servers/vm/proto.h index 00d243f9b..db1db0880 100644 --- a/servers/vm/proto.h +++ b/servers/vm/proto.h @@ -107,8 +107,9 @@ _PROTOTYPE( int pt_map_in_range, (struct vmproc *src_vmp, struct vmproc *dst_vmp _PROTOTYPE( int pt_ptmap, (struct vmproc *src_vmp, struct vmproc *dst_vmp) ); _PROTOTYPE( int pt_ptalloc_in_range, (pt_t *pt, vir_bytes start, vir_bytes end, u32_t flags, int verify)); -_PROTOTYPE( int pt_writemap, (pt_t *pt, vir_bytes v, phys_bytes physaddr, - size_t bytes, u32_t flags, u32_t writemapflags)); +_PROTOTYPE( int pt_writemap, (struct vmproc * vmp, pt_t *pt, vir_bytes v, + phys_bytes physaddr, size_t bytes, u32_t flags, + u32_t writemapflags)); _PROTOTYPE( int pt_checkrange, (pt_t *pt, vir_bytes v, size_t bytes, int write)); _PROTOTYPE( int pt_bind, (pt_t *pt, struct vmproc *who) ); _PROTOTYPE( void *vm_allocpage, (phys_bytes *p, int cat)); diff --git a/servers/vm/region.c b/servers/vm/region.c index 362f15626..4556e5de7 100644 --- a/servers/vm/region.c +++ b/servers/vm/region.c @@ -147,7 +147,7 @@ PRIVATE int map_sanitycheck_pt(struct vmproc *vmp, else rw = 0; - r = pt_writemap(&vmp->vm_pt, vr->vaddr + pr->offset, + r = pt_writemap(vmp, &vmp->vm_pt, vr->vaddr + pr->offset, pb->phys, pb->length, PTF_PRESENT | PTF_USER | rw, WMF_VERIFY); if(r != OK) { @@ -319,7 +319,7 @@ PRIVATE int map_ph_writept(struct vmproc *vmp, struct vir_region *vr, else rw = 0; - if(pt_writemap(&vmp->vm_pt, vr->vaddr + pr->offset, + if(pt_writemap(vmp, &vmp->vm_pt, vr->vaddr + pr->offset, pb->phys, pb->length, PTF_PRESENT | PTF_USER | rw, #if SANITYCHECKS !pr->written ? 0 : @@ -1683,7 +1683,7 @@ PUBLIC int map_unmap_region(struct vmproc *vmp, struct vir_region *region, SANITYCHECK(SCL_DETAIL); - if(pt_writemap(&vmp->vm_pt, regionstart, + if(pt_writemap(vmp, &vmp->vm_pt, regionstart, MAP_NONE, len, 0, WMF_OVERWRITE) != OK) { printf("VM: map_unmap_region: pt_writemap failed\n"); return ENOMEM; @@ -2007,7 +2007,7 @@ PRIVATE int do_map_memory(struct vmproc *vms, struct vmproc *vmd, if(flag < 0) { /* COW share */ pb->share_flag = PBSH_COW; /* Update the page table for the src process. */ - pt_writemap(&vms->vm_pt, offset_s + vrs->vaddr, + pt_writemap(vms, &vms->vm_pt, offset_s + vrs->vaddr, pb->phys, pb->length, pt_flag, WMF_OVERWRITE); } @@ -2015,7 +2015,7 @@ PRIVATE int do_map_memory(struct vmproc *vms, struct vmproc *vmd, pb->share_flag = PBSH_SMAP; } /* Update the page table for the destination process. */ - pt_writemap(&vmd->vm_pt, offset_d + vrd->vaddr, + pt_writemap(vmd, &vmd->vm_pt, offset_d + vrd->vaddr, pb->phys, pb->length, pt_flag, WMF_OVERWRITE); }