]> Zhao Yanbai Git Server - minix.git/commitdiff
SMP - Process is stopped when VM modifies the page tables
authorTomas Hruby <tom@minix3.org>
Wed, 15 Sep 2010 14:11:12 +0000 (14:11 +0000)
committerTomas Hruby <tom@minix3.org>
Wed, 15 Sep 2010 14:11:12 +0000 (14:11 +0000)
- RTS_VMINHIBIT flag is used to stop process while VM is fiddling with
  its pagetables

- more generic way of sending synchronous scheduling events among cpus

- do the x-cpu smp sched calls only if the target process is runnable.
  If it is not, it cannot be running and it cannot become runnable
  this CPU holds the BKL

include/minix/com.h
kernel/smp.c
kernel/smp.h
kernel/system/do_runctl.c
kernel/system/do_vmctl.c
servers/vm/arch/i386/pagetable.c
servers/vm/fork.c
servers/vm/mmap.c
servers/vm/proto.h
servers/vm/region.c

index 270e6406f89479f21f6cf451dd535248690437a5..250e5e21ae5d3791dd38ce3043dbf81e03691bb0 100644 (file)
 #define VMCTL_KERN_PHYSMAP     27
 #define VMCTL_KERN_MAP_REPLY   28
 #define VMCTL_SETADDRSPACE     29
+#define VMCTL_VMINHIBIT_SET    30
+#define VMCTL_VMINHIBIT_CLEAR  31
 
 /* Codes and field names for SYS_SYSCTL. */
 #define SYSCTL_CODE            m1_i1   /* SYSCTL_CODE_* below */
index 77b6cede8e22e925cec3a40aca1d645672fed6ba..38d503457d1982605fa66c44bb970d54cac27582 100644 (file)
@@ -1,3 +1,5 @@
+#include <assert.h>
+
 #include "smp.h"
 #include "interrupt.h"
 
@@ -7,7 +9,7 @@ unsigned bsp_cpu_id;
 
 PUBLIC struct cpu cpus[CONFIG_MAX_CPUS];
 
-/* flags passed to another cpu along with a sched ipi */
+/* info passed to another cpu along with a sched ipi */
 struct sched_ipi_data {
        volatile u32_t  flags;
        volatile u32_t  data;
@@ -16,6 +18,7 @@ struct sched_ipi_data {
 PRIVATE struct sched_ipi_data  sched_ipi_data[CONFIG_MAX_CPUS];
 
 #define SCHED_IPI_STOP_PROC    1
+#define SCHED_IPI_VM_INHIBIT   2
 
 static volatile unsigned ap_cpus_booted;
 
@@ -55,34 +58,77 @@ PUBLIC void smp_schedule(unsigned cpu)
        arch_send_smp_schedule_ipi(cpu);
 }
 
-PUBLIC void smp_schedule_stop_proc(struct proc * p)
+/*
+ * tell another cpu about a task to do and return only after the cpu acks that
+ * the task is finished. Also wait before it finishes task sent by another cpu
+ * to the same one.
+ */
+PRIVATE void smp_schedule_sync(struct proc * p, unsigned task)
 {
        unsigned cpu = p->p_cpu;
 
-       sched_ipi_data[cpu].flags |= SCHED_IPI_STOP_PROC;
+       /* 
+        * if some other cpu made a request to the same cpu, wait until it is
+        * done before proceeding
+        */
+       if ((volatile unsigned)sched_ipi_data[cpu].flags != 0) {
+               BKL_UNLOCK();
+               while ((volatile unsigned)sched_ipi_data[cpu].flags != 0);
+               BKL_LOCK();
+       }
+
+       sched_ipi_data[cpu].flags |= task;
        sched_ipi_data[cpu].data = (u32_t) p;
        arch_send_smp_schedule_ipi(cpu);
+
+       /* wait until the destination cpu finishes its job */
        BKL_UNLOCK();
        while ((volatile unsigned)sched_ipi_data[cpu].flags != 0);
        BKL_LOCK();
 }
 
+PUBLIC void smp_schedule_stop_proc(struct proc * p)
+{
+       if (proc_is_runnable(p))
+               smp_schedule_sync(p, SCHED_IPI_STOP_PROC);
+       else
+               RTS_SET(p, RTS_PROC_STOP);
+       assert(RTS_ISSET(p, RTS_PROC_STOP));
+}
+
+PUBLIC void smp_schedule_vminhibit(struct proc * p)
+{
+       if (proc_is_runnable(p))
+               smp_schedule_sync(p, SCHED_IPI_VM_INHIBIT);
+       else
+               RTS_SET(p, RTS_VMINHIBIT);
+       assert(RTS_ISSET(p, RTS_VMINHIBIT));
+}
+
 PUBLIC void smp_ipi_sched_handler(void)
 {
-       struct proc * p;
+       struct proc * curr;
        unsigned mycpu = cpuid;
        unsigned flgs;
        
        ipi_ack();
        
-       p = get_cpu_var(mycpu, proc_ptr);
+       curr = get_cpu_var(mycpu, proc_ptr);
        flgs = sched_ipi_data[mycpu].flags;
 
-       if (flgs & SCHED_IPI_STOP_PROC) {
-               RTS_SET((struct proc *)sched_ipi_data[mycpu].data, RTS_PROC_STOP);
+       if (flgs) {
+               struct proc * p;
+               p = (struct proc *)sched_ipi_data[mycpu].data;
+
+               if (flgs & SCHED_IPI_STOP_PROC) {
+                       RTS_SET(p, RTS_PROC_STOP);
+               }
+               if (flgs & SCHED_IPI_VM_INHIBIT) {
+                       RTS_SET(p, RTS_VMINHIBIT);
+               }
        }
-       else if (p->p_endpoint != IDLE) {
-               RTS_SET(p, RTS_PREEMPTED);
+       else if (curr->p_endpoint != IDLE) {
+               RTS_SET(curr, RTS_PREEMPTED);
        }
        sched_ipi_data[cpuid].flags = 0;
 }
index 408f166bb4c7ca3e444fc0888095cf408d8566d6..10855a67977551330a036eee6319233689a8e137 100644 (file)
@@ -59,7 +59,10 @@ _PROTOTYPE(void smp_ipi_halt_handler, (void));
 _PROTOTYPE(void smp_ipi_sched_handler, (void));
 
 _PROTOTYPE(void smp_schedule, (unsigned cpu));
+/* stop a processes on a different cpu */
 _PROTOTYPE(void smp_schedule_stop_proc, (struct proc * p));
+/* stop a process on a different cpu because its adress space is being changed */
+_PROTOTYPE(void smp_schedule_vminhibit, (struct proc * p));
 
 _PROTOTYPE(void arch_send_smp_schedule_ipi, (unsigned cpu));
 _PROTOTYPE(void arch_smp_halt_cpu, (void));
index 3500f28f26e7b33e31bc83f7d86f0ca699b8f50b..16759464c7bf46fb13222971495d6b1798712eb3 100644 (file)
@@ -56,13 +56,13 @@ PUBLIC int do_runctl(struct proc * caller, message * m_ptr)
          /* check if we must stop a process on a different CPU */
          if (rp->p_cpu != cpuid) {
                  smp_schedule_stop_proc(rp);
-                 assert(RTS_ISSET(rp, RTS_PROC_STOP));
                  break;
          }
 #endif
          RTS_SET(rp, RTS_PROC_STOP);
        break;
   case RC_RESUME:
+       assert(RTS_ISSET(rp, RTS_PROC_STOP));
        RTS_UNSET(rp, RTS_PROC_STOP);
        break;
   default:
index 82673deaca01c48b6fc28f9d8861c2a58318d1d1..37f85333d2867a27e9e841c7e10dfcf4b1baea32 100644 (file)
@@ -140,6 +140,23 @@ PUBLIC int do_vmctl(struct proc * caller, message * m_ptr)
                return arch_phys_map_reply(m_ptr->SVMCTL_VALUE,
                        (vir_bytes) m_ptr->SVMCTL_MAP_VIR_ADDR);
        }
+       case VMCTL_VMINHIBIT_SET:
+               /* check if we must stop a process on a different CPU */
+#if CONFIG_SMP
+               if (p->p_cpu != cpuid) {
+                       smp_schedule_vminhibit(p);
+               } else
+#endif
+                       RTS_SET(p, RTS_VMINHIBIT);
+               return OK;
+       case VMCTL_VMINHIBIT_CLEAR:
+               assert(RTS_ISSET(p, RTS_VMINHIBIT));
+               /*
+                * the processes is certainly not runnable, no need to tell its
+                * cpu
+                */
+               RTS_UNSET(p, RTS_VMINHIBIT);
+               return OK;
   }
 
   /* Try architecture-specific vmctls. */
index 3675083726032742d50e2e1e53d5f159c6065eee..b07222c44992d84f3a87bce0ce02beccb152d275 100644 (file)
@@ -204,7 +204,7 @@ PRIVATE void vm_freepages(vir_bytes vir, vir_bytes phys, int pages, int reason)
                assert(!(vir % I386_PAGE_SIZE)); 
                assert(!(phys % I386_PAGE_SIZE)); 
                free_mem(ABS2CLICK(phys), pages);
-               if(pt_writemap(&vmprocess->vm_pt, arch_vir2map(vmprocess, vir),
+               if(pt_writemap(vmprocess, &vmprocess->vm_pt, arch_vir2map(vmprocess, vir),
                        MAP_NONE, pages*I386_PAGE_SIZE, 0, WMF_OVERWRITE) != OK)
                        panic("vm_freepages: pt_writemap failed");
        } else {
@@ -325,7 +325,7 @@ PUBLIC void *vm_allocpage(phys_bytes *phys, int reason)
        *phys = CLICK2ABS(newpage);
 
        /* Map this page into our address space. */
-       if((r=pt_writemap(pt, loc, *phys, I386_PAGE_SIZE,
+       if((r=pt_writemap(vmprocess, pt, loc, *phys, I386_PAGE_SIZE,
                I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE, 0)) != OK) {
                free_mem(newpage, CLICKSPERPAGE);
                printf("vm_allocpage writemap failed\n");
@@ -365,7 +365,7 @@ PUBLIC void vm_pagelock(void *vir, int lockflag)
                flags |= I386_VM_WRITE;
 
        /* Update flags. */
-       if((r=pt_writemap(pt, m, 0, I386_PAGE_SIZE,
+       if((r=pt_writemap(vmprocess, pt, m, 0, I386_PAGE_SIZE,
                flags, WMF_OVERWRITE | WMF_WRITEFLAGSONLY)) != OK) {
                panic("vm_lockpage: pt_writemap failed");
        }
@@ -605,7 +605,7 @@ PUBLIC int pt_ptmap(struct vmproc *src_vmp, struct vmproc *dst_vmp)
        assert((vir_bytes) pt->pt_dir >= src_vmp->vm_stacktop);
        viraddr = arch_vir2map(src_vmp, (vir_bytes) pt->pt_dir);
        physaddr = pt->pt_dir_phys & I386_VM_ADDR_MASK;
-       if((r=pt_writemap(&dst_vmp->vm_pt, viraddr, physaddr, I386_PAGE_SIZE,
+       if((r=pt_writemap(dst_vmp, &dst_vmp->vm_pt, viraddr, physaddr, I386_PAGE_SIZE,
                I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE,
                WMF_OVERWRITE)) != OK) {
                return r;
@@ -625,7 +625,7 @@ PUBLIC int pt_ptmap(struct vmproc *src_vmp, struct vmproc *dst_vmp)
                assert((vir_bytes) pt->pt_pt[pde] >= src_vmp->vm_stacktop);
                viraddr = arch_vir2map(src_vmp, (vir_bytes) pt->pt_pt[pde]);
                physaddr = pt->pt_dir[pde] & I386_VM_ADDR_MASK;
-               if((r=pt_writemap(&dst_vmp->vm_pt, viraddr, physaddr, I386_PAGE_SIZE,
+               if((r=pt_writemap(dst_vmp, &dst_vmp->vm_pt, viraddr, physaddr, I386_PAGE_SIZE,
                        I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE,
                        WMF_OVERWRITE)) != OK) {
                        return r;
@@ -642,13 +642,28 @@ PUBLIC int pt_ptmap(struct vmproc *src_vmp, struct vmproc *dst_vmp)
 /*===========================================================================*
  *                             pt_writemap                                  *
  *===========================================================================*/
-PUBLIC int pt_writemap(pt_t *pt, vir_bytes v, phys_bytes physaddr,
-       size_t bytes, u32_t flags, u32_t writemapflags)
+PUBLIC int pt_writemap(struct vmproc * vmp,
+                       pt_t *pt,
+                       vir_bytes v,
+                       phys_bytes physaddr,
+                       size_t bytes,
+                       u32_t flags,
+                       u32_t writemapflags)
 {
 /* Write mapping into page table. Allocate a new page table if necessary. */
 /* Page directory and table entries for this virtual address. */
        int p, r, pages;
        int verify = 0;
+       int ret = OK;
+
+       /* FIXME
+        * don't do it everytime, stop the process only on the first change and
+        * resume the execution on the last change. Do in a wrapper of this
+        * function
+        */
+       if (vmp && vmp->vm_endpoint != NONE && vmp->vm_endpoint != VM_PROC_NR &&
+                       !(vmp->vm_flags & VMF_EXITING))
+               sys_vmctl(vmp->vm_endpoint, VMCTL_VMINHIBIT_SET, 0);
 
        if(writemapflags & WMF_VERIFY)
                verify = 1;
@@ -669,9 +684,9 @@ PUBLIC int pt_writemap(pt_t *pt, vir_bytes v, phys_bytes physaddr,
         * before we start writing in any of them, because it's a pain
         * to undo our work properly.
         */
-       r = pt_ptalloc_in_range(pt, v, v + I386_PAGE_SIZE*pages, flags, verify);
-       if(r != OK) {
-               return r;
+       ret = pt_ptalloc_in_range(pt, v, v + I386_PAGE_SIZE*pages, flags, verify);
+       if(ret != OK) {
+               goto resume_exit;
        }
 
        /* Now write in them. */
@@ -729,7 +744,8 @@ PUBLIC int pt_writemap(pt_t *pt, vir_bytes v, phys_bytes physaddr,
                                printf(" masked %s; ",
                                        ptestr(maskedentry));
                                printf(" expected %s\n", ptestr(entry));
-                               return EFAULT;
+                               ret = EFAULT;
+                               goto resume_exit;
                        }
                } else {
                        /* Write pagetable entry. */
@@ -743,7 +759,13 @@ PUBLIC int pt_writemap(pt_t *pt, vir_bytes v, phys_bytes physaddr,
                v += I386_PAGE_SIZE;
        }
 
-       return OK;
+resume_exit:
+
+       if (vmp && vmp->vm_endpoint != NONE && vmp->vm_endpoint != VM_PROC_NR &&
+                       !(vmp->vm_flags & VMF_EXITING))
+               sys_vmctl(vmp->vm_endpoint, VMCTL_VMINHIBIT_CLEAR, 0);
+
+       return ret;
 }
 
 /*===========================================================================*
@@ -923,7 +945,7 @@ PUBLIC void pt_init(phys_bytes usedlimit)
                 /* We have to write the new position in the PT,
                  * so we can move our segments.
                  */ 
-                if(pt_writemap(newpt, v+moveup, v, I386_PAGE_SIZE,
+                if(pt_writemap(vmprocess, newpt, v+moveup, v, I386_PAGE_SIZE,
                         I386_VM_PRESENT|I386_VM_WRITE|I386_VM_USER, 0) != OK)
                         panic("pt_init: pt_writemap failed");
         }
@@ -1212,7 +1234,7 @@ PUBLIC int pt_mapkernel(pt_t *pt)
        }
 
        for(i = 0; i < kernmappings; i++) {
-               if(pt_writemap(pt,
+               if(pt_writemap(NULL, pt,
                        kern_mappings[i].lin_addr,
                        kern_mappings[i].phys_addr,
                        kern_mappings[i].len,
index bd872bccbb7f2979c9807f6832fa43d51f4c2d90..0466b2225661ccaf0e731ce140b15474c8503e3b 100644 (file)
@@ -190,6 +190,9 @@ PUBLIC int do_fork(message *msg)
         panic("do_fork can't sys_fork: %d", r);
   }
 
+  if((r=pt_bind(&vmc->vm_pt, vmc)) != OK)
+       panic("fork can't pt_bind: %d", r);
+
   if(fullvm) {
        vir_bytes vir;
        /* making these messages writable is an optimisation
@@ -201,9 +204,6 @@ PUBLIC int do_fork(message *msg)
        handle_memory(vmp, vir, sizeof(message), 1);
   }
 
-  if((r=pt_bind(&vmc->vm_pt, vmc)) != OK)
-       panic("fork can't pt_bind: %d", r);
-
   /* Inform caller of new child endpoint. */
   msg->VMF_CHILD_ENDPOINT = vmc->vm_endpoint;
 
index 47d715b5796368b3092f2ec5eec9dfaabaa51b7d..d0996b2bfcdfdf2a0fae150132c404c186458874 100644 (file)
@@ -425,7 +425,7 @@ PRIVATE int munmap_lin(vir_bytes addr, size_t len)
                return EFAULT;
        }
 
-       if(pt_writemap(&vmproc[VM_PROC_NR].vm_pt, addr, MAP_NONE, len, 0,
+       if(pt_writemap(NULL, &vmproc[VM_PROC_NR].vm_pt, addr, MAP_NONE, len, 0,
                WMF_OVERWRITE | WMF_FREE) != OK) {
                printf("munmap_lin: pt_writemap failed\n");
                return EFAULT;
index 00d243f9bb2abc5ece76ae60abdd4ba4e00c855a..db1db0880389cb42a97bd657f6fe98352d863127 100644 (file)
@@ -107,8 +107,9 @@ _PROTOTYPE( int pt_map_in_range, (struct vmproc *src_vmp, struct vmproc *dst_vmp
 _PROTOTYPE( int pt_ptmap, (struct vmproc *src_vmp, struct vmproc *dst_vmp) );
 _PROTOTYPE( int pt_ptalloc_in_range, (pt_t *pt, vir_bytes start, vir_bytes end,
         u32_t flags, int verify));
-_PROTOTYPE( int pt_writemap, (pt_t *pt, vir_bytes v, phys_bytes physaddr, 
-        size_t bytes, u32_t flags, u32_t writemapflags));
+_PROTOTYPE( int pt_writemap, (struct vmproc * vmp, pt_t *pt, vir_bytes v,
+                       phys_bytes physaddr, size_t bytes, u32_t flags,
+                       u32_t writemapflags));
 _PROTOTYPE( int pt_checkrange, (pt_t *pt, vir_bytes v,  size_t bytes, int write));
 _PROTOTYPE( int pt_bind, (pt_t *pt, struct vmproc *who)                        );
 _PROTOTYPE( void *vm_allocpage, (phys_bytes *p, int cat));
index 362f156262738aa15116ab04b6e1f5062c233be5..4556e5de736d1a400356555ffebd839b6b22ef2b 100644 (file)
@@ -147,7 +147,7 @@ PRIVATE int map_sanitycheck_pt(struct vmproc *vmp,
        else
                rw = 0;
 
-       r = pt_writemap(&vmp->vm_pt, vr->vaddr + pr->offset,
+       r = pt_writemap(vmp, &vmp->vm_pt, vr->vaddr + pr->offset,
          pb->phys, pb->length, PTF_PRESENT | PTF_USER | rw, WMF_VERIFY);
 
        if(r != OK) {
@@ -319,7 +319,7 @@ PRIVATE int map_ph_writept(struct vmproc *vmp, struct vir_region *vr,
        else
                rw = 0;
 
-       if(pt_writemap(&vmp->vm_pt, vr->vaddr + pr->offset,
+       if(pt_writemap(vmp, &vmp->vm_pt, vr->vaddr + pr->offset,
          pb->phys, pb->length, PTF_PRESENT | PTF_USER | rw,
 #if SANITYCHECKS
                !pr->written ? 0 :
@@ -1683,7 +1683,7 @@ PUBLIC int map_unmap_region(struct vmproc *vmp, struct vir_region *region,
 
        SANITYCHECK(SCL_DETAIL);
 
-       if(pt_writemap(&vmp->vm_pt, regionstart,
+       if(pt_writemap(vmp, &vmp->vm_pt, regionstart,
          MAP_NONE, len, 0, WMF_OVERWRITE) != OK) {
            printf("VM: map_unmap_region: pt_writemap failed\n");
            return ENOMEM;
@@ -2007,7 +2007,7 @@ PRIVATE int do_map_memory(struct vmproc *vms, struct vmproc *vmd,
                         if(flag < 0) {                  /* COW share */
                                 pb->share_flag = PBSH_COW;
                                 /* Update the page table for the src process. */
-                                pt_writemap(&vms->vm_pt, offset_s + vrs->vaddr,
+                                pt_writemap(vms, &vms->vm_pt, offset_s + vrs->vaddr,
                                         pb->phys, pb->length,
                                         pt_flag, WMF_OVERWRITE);
                         }
@@ -2015,7 +2015,7 @@ PRIVATE int do_map_memory(struct vmproc *vms, struct vmproc *vmd,
                                 pb->share_flag = PBSH_SMAP;
                         }
                         /* Update the page table for the destination process. */
-                        pt_writemap(&vmd->vm_pt, offset_d + vrd->vaddr,
+                        pt_writemap(vmd, &vmd->vm_pt, offset_d + vrd->vaddr,
                                 pb->phys, pb->length, pt_flag, WMF_OVERWRITE);
                 }