]> Zhao Yanbai Git Server - minix.git/commitdiff
Primary goal for these changes is:
authorBen Gras <ben@minix3.org>
Mon, 21 Sep 2009 14:31:52 +0000 (14:31 +0000)
committerBen Gras <ben@minix3.org>
Mon, 21 Sep 2009 14:31:52 +0000 (14:31 +0000)
  - no longer have kernel have its own page table that is loaded
    on every kernel entry (trap, interrupt, exception). the primary
    purpose is to reduce the number of required reloads.
Result:
  - kernel can only access memory of process that was running when
    kernel was entered
  - kernel must be mapped into every process page table, so traps to
    kernel keep working
Problem:
  - kernel must often access memory of arbitrary processes (e.g. send
    arbitrary processes messages); this can't happen directly any more;
    usually because that process' page table isn't loaded at all, sometimes
    because that memory isn't mapped in at all, sometimes because it isn't
    mapped in read-write.
So:
  - kernel must be able to map in memory of any process, in its own
    address space.
Implementation:
  - VM and kernel share a range of memory in which addresses of
    all page tables of all processes are available. This has two purposes:
      . Kernel has to know what data to copy in order to map in a range
      . Kernel has to know where to write the data in order to map it in
    That last point is because kernel has to write in the currently loaded
    page table.
  - Processes and kernel are separated through segments; kernel segments
    haven't changed.
  - The kernel keeps the process whose page table is currently loaded
    in 'ptproc.'
  - If it wants to map in a range of memory, it writes the value of the
    page directory entry for that range into the page directory entry
    in the currently loaded map. There is a slot reserved for such
    purposes. The kernel can then access this memory directly.
  - In order to do this, its segment has been increased (and the
    segments of processes start where it ends).
  - In the pagefault handler, detect if the kernel is doing
    'trappable' memory access (i.e. a pagefault isn't a fatal
     error) and if so,
       - set the saved instruction pointer to phys_copy_fault,
 breaking out of phys_copy
       - set the saved eax register to the address of the page
 fault, both for sanity checking and for checking in
 which of the two ranges that phys_copy was called
 with the fault occured
  - Some boot-time processes do not have their own page table,
    and are mapped in with the kernel, and separated with
    segments. The kernel detects this using HASPT. If such a
    process has to be scheduled, any page table will work and
    no page table switch is done.

Major changes in kernel are
  - When accessing user processes memory, kernel no longer
    explicitly checks before it does so if that memory is OK.
    It simply makes the mapping (if necessary), tries to do the
    operation, and traps the pagefault if that memory isn't present;
    if that happens, the copy function returns EFAULT.
    So all of the CHECKRANGE_OR_SUSPEND macros are gone.
  - Kernel no longer has to copy/read and parse page tables.
  - A message copying optimisation: when messages are copied, and
    the recipient isn't mapped in, they are copied into a buffer
    in the kernel. This is done in QueueMess. The next time
    the recipient is scheduled, this message is copied into
    its memory. This happens in schedcheck().
    This eliminates the mapping/copying step for messages, and makes
    it easier to deliver messages. This eliminates soft_notify.
  - Kernel no longer creates a page table at all, so the vm_setbuf
    and pagetable writing in memory.c is gone.

Minor changes in kernel are
  - ipc_stats thrown out, wasn't used
  - misc flags all renamed to MF_*
  - NOREC_* macros to enter and leave functions that should not
    be called recursively; just sanity checks really
  - code to fully decode segment selectors and descriptors
    to print on exceptions
  - lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1

39 files changed:
kernel/arch/i386/arch_do_vmctl.c
kernel/arch/i386/do_sdevio.c
kernel/arch/i386/exception.c
kernel/arch/i386/include/archconst.h
kernel/arch/i386/include/archtypes.h
kernel/arch/i386/klib386.s
kernel/arch/i386/memory.c
kernel/arch/i386/mpx386.s
kernel/arch/i386/protect.c
kernel/arch/i386/proto.h
kernel/arch/i386/sha1.h [new file with mode: 0644]
kernel/arch/i386/system.c
kernel/arch/i386/vm.h [deleted file]
kernel/clock.c
kernel/debug.c
kernel/debug.h
kernel/glo.h
kernel/main.c
kernel/proc.c
kernel/proc.h
kernel/proto.h
kernel/system.c
kernel/system.h
kernel/system/Makefile
kernel/system/do_devio.c
kernel/system/do_exec.c
kernel/system/do_fork.c
kernel/system/do_getinfo.c
kernel/system/do_irqctl.c
kernel/system/do_memset.c
kernel/system/do_safecopy.c
kernel/system/do_sigsend.c
kernel/system/do_sysctl.c
kernel/system/do_umap.c
kernel/system/do_vm_setbuf.c [deleted file]
kernel/system/do_vmctl.c
kernel/system/do_vtimer.c
kernel/table.c
kernel/vm.h

index 523165d82a19f4c43bde6b1b9808c89a19f68ebb..2a8d526a21e423958f84d27519af6865edf4e1d2 100644 (file)
@@ -10,7 +10,9 @@
 #include "../../system.h"
 #include <minix/type.h>
 
-extern u32_t kernel_cr3;
+#include "proto.h"
+
+extern u32_t *vm_pagedirs;
 
 /*===========================================================================*
  *                             arch_do_vmctl                                *
@@ -30,7 +32,7 @@ struct proc *p;
                        p->p_seg.p_cr3 = m_ptr->SVMCTL_VALUE;
                        p->p_misc_flags |= MF_FULLVM;
                } else {
-                       p->p_seg.p_cr3 = kernel_cr3;
+                       p->p_seg.p_cr3 = 0;
                        p->p_misc_flags &= ~MF_FULLVM;
                }
                RTS_LOCK_UNSET(p, VMINHIBIT);
@@ -53,8 +55,33 @@ struct proc *p;
                m_ptr->SVMCTL_PF_I386_ERR = rp->p_pagefault.pf_flags;
                return OK;
        }
+       case VMCTL_I386_KERNELLIMIT:
+       {
+               int r;
+               /* VM wants kernel to increase its segment. */
+               r = prot_set_kern_seg_limit(m_ptr->SVMCTL_VALUE);
+               return r;
+       }
+       case VMCTL_I386_PAGEDIRS:
+       {
+               int pde;
+               vm_pagedirs = (u32_t *) m_ptr->SVMCTL_VALUE;
+               return OK;
+       }
+       case VMCTL_I386_FREEPDE:
+       {
+               i386_freepde(m_ptr->SVMCTL_VALUE);
+               return OK;
+       }
+       case VMCTL_FLUSHTLB:
+       {
+               level0(reload_cr3);
+               return OK;
+       }
   }
 
+
+
   kprintf("arch_do_vmctl: strange param %d\n", m_ptr->SVMCTL_PARAM);
   return EINVAL;
 }
index 7443c132fadbac9d18084dd2f750befa77fed9d7..63b152aff9ed1ed5b6045703b161211b8accd522 100644 (file)
@@ -24,6 +24,8 @@
 PUBLIC int do_sdevio(m_ptr)
 register message *m_ptr;       /* pointer to request message */
 {
+  vir_bytes newoffset;
+  endpoint_t newep;
   int proc_nr, proc_nr_e = m_ptr->DIO_VEC_ENDPT;
   int count = m_ptr->DIO_VEC_SIZE;
   long port = m_ptr->DIO_PORT;
@@ -32,6 +34,9 @@ register message *m_ptr;      /* pointer to request message */
   struct proc *rp;
   struct priv *privp;
   struct io_range *iorp;
+  int rem;
+  vir_bytes addr;
+  struct proc *destproc;
 
   /* Allow safe copies and accesses to SELF */
   if ((m_ptr->DIO_REQUEST & _DIO_SAFEMASK) != _DIO_SAFE &&
@@ -64,11 +69,23 @@ register message *m_ptr;    /* pointer to request message */
   /* Check for 'safe' variants. */
   if((m_ptr->DIO_REQUEST & _DIO_SAFEMASK) == _DIO_SAFE) {
      /* Map grant address to physical address. */
-     if ((phys_buf = umap_verify_grant(proc_addr(proc_nr), who_e,
+     if(verify_grant(proc_nr_e, who_e, 
        (vir_bytes) m_ptr->DIO_VEC_ADDR,
-       (vir_bytes) m_ptr->DIO_OFFSET, count,
-       req_dir == _DIO_INPUT ? CPF_WRITE : CPF_READ)) == 0)
-         return(EPERM);
+       count,
+       req_dir == _DIO_INPUT ? CPF_WRITE : CPF_READ,
+       (vir_bytes) m_ptr->DIO_OFFSET, 
+       &newoffset, &newep) != OK) {
+       printf("do_sdevio: verify_grant failed\n");
+       return EPERM;
+    }
+       if(!isokendpt(newep, &proc_nr))
+               return(EINVAL);
+     destproc = proc_addr(proc_nr);
+     if ((phys_buf = umap_local(destproc, D,
+        (vir_bytes) newoffset, count)) == 0) {
+       printf("do_sdevio: umap_local failed\n");
+         return(EFAULT);
+     }
   } else {
      if(proc_nr != who_p)
      {
@@ -77,10 +94,14 @@ register message *m_ptr;    /* pointer to request message */
        return EPERM;
      }
      /* Get and check physical address. */
-     if ((phys_buf = umap_virtual(proc_addr(proc_nr), D,
+     if ((phys_buf = umap_local(proc_addr(proc_nr), D,
         (vir_bytes) m_ptr->DIO_VEC_ADDR, count)) == 0)
          return(EFAULT);
+     destproc = proc_addr(proc_nr);
   }
+     /* current process must be target for phys_* to be OK */
+
+  vm_set_cr3(destproc);
 
        switch (io_type)
        {
index 35fb5544090e648b41e11e1504178bc088926a5e..7e54f2745a533cb8d96d03e99eb027e2062e2f96 100755 (executable)
 #include <string.h>
 #include <minix/sysutil.h>
 #include "../../proc.h"
+#include "../../proto.h"
+#include "../../vm.h"
 
-extern int vm_copy_in_progress;
+extern int vm_copy_in_progress, catch_pagefaults;
 extern struct proc *vm_copy_from, *vm_copy_to;
-extern u32_t vm_copy_from_v, vm_copy_to_v;
-extern u32_t vm_copy_from_p, vm_copy_to_p, vm_copy_cr3;
 
-u32_t pagefault_cr2, pagefault_count = 0;
-
-void pagefault(struct proc *pr, int trap_errno)
+void pagefault(vir_bytes old_eip, struct proc *pr, int trap_errno,
+       u32_t *old_eipptr, u32_t *old_eaxptr, u32_t pagefaultcr2)
 {
        int s;
        vir_bytes ph;
        u32_t pte;
+       int procok = 0, pcok = 0, rangeok = 0;
+       int in_physcopy = 0;
+       vir_bytes test_eip;
 
-       if(pagefault_count != 1)
-               minix_panic("recursive pagefault", pagefault_count);
+       vmassert(old_eipptr);
+       vmassert(old_eaxptr);
 
-       /* Don't schedule this process until pagefault is handled. */
-       if(RTS_ISSET(pr, PAGEFAULT))
-               minix_panic("PAGEFAULT set", pr->p_endpoint);
-       RTS_LOCK_SET(pr, PAGEFAULT);
+       vmassert(*old_eipptr == old_eip);
+       vmassert(old_eipptr != &old_eip);
+
+#if 0
+       printf("kernel: pagefault in pr %d, addr 0x%lx, his cr3 0x%lx, actual cr3 0x%lx\n",
+               pr->p_endpoint, pagefaultcr2, pr->p_seg.p_cr3, read_cr3());
+#endif
+
+       if(pr->p_seg.p_cr3) {
+               vmassert(pr->p_seg.p_cr3 == read_cr3());
+       }
+
+       test_eip = k_reenter ? old_eip : pr->p_reg.pc;
+
+       in_physcopy = (test_eip > (vir_bytes) phys_copy) &&
+          (test_eip < (vir_bytes) phys_copy_fault);
+
+       if((k_reenter || iskernelp(pr)) &&
+               catch_pagefaults && in_physcopy) {
+#if 0
+               printf("pf caught! addr 0x%lx\n", pagefaultcr2);
+#endif
+               *old_eipptr = (u32_t) phys_copy_fault;
+               *old_eaxptr = pagefaultcr2;
+
+               return;
+       }
 
-       if(pr->p_endpoint <= INIT_PROC_NR && !(pr->p_misc_flags & MF_FULLVM)) {
+       /* System processes that don't have their own page table can't
+        * have page faults. VM does have its own page table but also
+        * can't have page faults (because VM has to handle them).
+        */
+       if(k_reenter || (pr->p_endpoint <= INIT_PROC_NR &&
+        !(pr->p_misc_flags & MF_FULLVM)) || pr->p_endpoint == VM_PROC_NR) {
                /* Page fault we can't / don't want to
                 * handle.
                 */
-               kprintf("pagefault for process %d ('%s'), pc = 0x%x, addr = 0x%x, flags = 0x%x\n",
+               kprintf("pagefault for process %d ('%s'), pc = 0x%x, addr = 0x%x, flags = 0x%x, k_reenter %d\n",
                        pr->p_endpoint, pr->p_name, pr->p_reg.pc,
-                       pagefault_cr2, trap_errno);
+                       pagefaultcr2, trap_errno, k_reenter);
                proc_stacktrace(pr);
                minix_panic("page fault in system process", pr->p_endpoint);
 
                return;
        }
 
+       /* Don't schedule this process until pagefault is handled. */
+       vmassert(pr->p_seg.p_cr3 == read_cr3());
+       vmassert(!RTS_ISSET(pr, PAGEFAULT));
+       RTS_LOCK_SET(pr, PAGEFAULT);
+
        /* Save pagefault details, suspend process,
         * add process to pagefault chain,
         * and tell VM there is a pagefault to be
         * handled.
         */
-       pr->p_pagefault.pf_virtual = pagefault_cr2;
+       pr->p_pagefault.pf_virtual = pagefaultcr2;
        pr->p_pagefault.pf_flags = trap_errno;
        pr->p_nextpagefault = pagefaults;
        pagefaults = pr;
-       lock_notify(HARDWARE, VM_PROC_NR);
-
-       pagefault_count = 0;
-
-#if 0
-       kprintf("pagefault for process %d ('%s'), pc = 0x%x\n",
-                       pr->p_endpoint, pr->p_name, pr->p_reg.pc);
-       proc_stacktrace(pr);
-#endif
+               
+       mini_notify(proc_addr(HARDWARE), VM_PROC_NR);
 
        return;
 }
@@ -70,12 +98,16 @@ void pagefault(struct proc *pr, int trap_errno)
 /*===========================================================================*
  *                             exception                                    *
  *===========================================================================*/
-PUBLIC void exception(vec_nr, trap_errno, old_eip, old_cs, old_eflags)
+PUBLIC void exception(vec_nr, trap_errno, old_eip, old_cs, old_eflags,
+       old_eipptr, old_eaxptr, pagefaultcr2)
 unsigned vec_nr;
 u32_t trap_errno;
 u32_t old_eip;
 U16_t old_cs;
 u32_t old_eflags;
+u32_t *old_eipptr;
+u32_t *old_eaxptr;
+u32_t pagefaultcr2;
 {
 /* An exception or unexpected interrupt has occurred. */
 
@@ -108,16 +140,14 @@ struct proc *t;
   register struct ex_s *ep;
   struct proc *saved_proc;
 
-#if DEBUG_SCHED_CHECK
-  for (t = BEG_PROC_ADDR; t < END_PROC_ADDR; ++t) {
-       if(t->p_magic != PMAGIC)
-               kprintf("entry %d broken\n", t->p_nr);
+  if(k_reenter > 2) {
+       /* This can't end well. */
+       minix_panic("exception: k_reenter too high", k_reenter);
   }
-#endif
 
   /* Save proc_ptr, because it may be changed by debug statements. */
   saved_proc = proc_ptr;       
-
+  
   ep = &ex_data[vec_nr];
 
   if (vec_nr == 2) {           /* spurious NMI on some machines */
@@ -126,8 +156,9 @@ struct proc *t;
   }
 
   if(vec_nr == PAGE_FAULT_VECTOR) {
-               pagefault(saved_proc, trap_errno);
-               return;
+       pagefault(old_eip, saved_proc, trap_errno,
+               old_eipptr, old_eaxptr, pagefaultcr2);
+       return;
   }
 
   /* If an exception occurs while running a process, the k_reenter variable 
@@ -137,22 +168,19 @@ struct proc *t;
   if (k_reenter == 0 && ! iskernelp(saved_proc)) {
        {
 
-               kprintf(
-"exception for process %d, endpoint %d ('%s'), pc = 0x%x:0x%x, sp = 0x%x:0x%x\n",
-                       proc_nr(saved_proc), saved_proc->p_endpoint,
-                       saved_proc->p_name,
-                       saved_proc->p_reg.cs, saved_proc->p_reg.pc,
-                       saved_proc->p_reg.ss, saved_proc->p_reg.sp);
                kprintf(
   "vec_nr= %d, trap_errno= 0x%lx, eip= 0x%lx, cs= 0x%x, eflags= 0x%lx\n",
                        vec_nr, (unsigned long)trap_errno,
                        (unsigned long)old_eip, old_cs,
                        (unsigned long)old_eflags);
+               printseg("cs: ", 1, saved_proc, old_cs);
+               printseg("ds: ", 0, saved_proc, saved_proc->p_reg.ds);
+               if(saved_proc->p_reg.ds != saved_proc->p_reg.ss) {
+                       printseg("ss: ", 0, saved_proc, saved_proc->p_reg.ss);
+               }
                proc_stacktrace(saved_proc);
        }
 
-       kprintf("kernel: cause_sig %d for %d\n",
-               ep->signum, saved_proc->p_endpoint);
        cause_sig(proc_nr(saved_proc), ep->signum);
        return;
   }
@@ -168,7 +196,7 @@ struct proc *t;
        vec_nr, trap_errno, old_eip, old_cs, old_eflags);
   /* TODO should we enable this only when compiled for some debug mode? */
   if (saved_proc) {
-         kprintf("process %d (%s), ", proc_nr(saved_proc), saved_proc->p_name);
+         kprintf("scheduled was: process %d (%s), ", proc_nr(saved_proc), saved_proc->p_name);
          kprintf("pc = %u:0x%x\n", (unsigned) saved_proc->p_reg.cs,
                          (unsigned) saved_proc->p_reg.pc);
          proc_stacktrace(saved_proc);
@@ -184,24 +212,30 @@ struct proc *t;
 /*===========================================================================*
  *                             stacktrace                                   *
  *===========================================================================*/
-PUBLIC void proc_stacktrace(struct proc *proc)
+PUBLIC void proc_stacktrace(struct proc *whichproc)
 {
        reg_t bp, v_bp, v_pc, v_hbp;
+       int iskernel;
+
+       v_bp = whichproc->p_reg.fp;
 
-       v_bp = proc->p_reg.fp;
+       iskernel = iskernelp(whichproc);
 
-       kprintf("%8.8s %6d 0x%lx ",
-               proc->p_name, proc->p_endpoint, proc->p_reg.pc);
+       kprintf("%-8.8s %6d 0x%lx ",
+               whichproc->p_name, whichproc->p_endpoint, whichproc->p_reg.pc);
 
        while(v_bp) {
-               if(data_copy(proc->p_endpoint, v_bp,
-                       SYSTEM, (vir_bytes) &v_hbp, sizeof(v_hbp)) != OK) {
+
+#define PRCOPY(pr, pv, v, n) \
+  (iskernel ? (memcpy((char *) v, (char *) pv, n), OK) : \
+     data_copy(pr->p_endpoint, pv, SYSTEM, (vir_bytes) (v), n))
+
+               if(PRCOPY(whichproc, v_bp, &v_hbp, sizeof(v_hbp)) != OK) {
                        kprintf("(v_bp 0x%lx ?)", v_bp);
                        break;
                }
-               if(data_copy(proc->p_endpoint, v_bp + sizeof(v_pc),
-                       SYSTEM, (vir_bytes) &v_pc, sizeof(v_pc)) != OK) {
-                       kprintf("(v_pc 0x%lx ?)", v_pc);
+               if(PRCOPY(whichproc, v_bp + sizeof(v_pc), &v_pc, sizeof(v_pc)) != OK) {
+                       kprintf("(v_pc 0x%lx ?)", v_bp + sizeof(v_pc));
                        break;
                }
                kprintf("0x%lx ", (unsigned long) v_pc);
index d968b7b15e507b08a110d0264a6a8a257554067e..0bf7a3cbd825a19e13daac9af25d94babfeb028f 100644 (file)
@@ -54,6 +54,7 @@
 #define INTR_PRIVILEGE       0 /* kernel and interrupt handlers */
 #define TASK_PRIVILEGE       1 /* kernel tasks */
 #define USER_PRIVILEGE       3 /* servers and user processes */
+#define RPL_MASK             0x03      /* bits in selector RPL */
 
 /* 286 hardware constants. */
 
 #define IOPL_MASK 0x003000
 
 #define vir2phys(vir)   (kinfo.data_base + (vir_bytes) (vir))
+#define phys2vir(ph)   ((vir_bytes) (ph) - kinfo.data_base)
 
 #endif /* _I386_ACONST_H */
index ac17eeb7f2125bffa2e3201e9d004dcfcdb1af4b..bdd8f8246e6cf27e23fc23f2a5509dd79614ffa1 100644 (file)
@@ -56,7 +56,7 @@ struct segdesc_s {            /* segment descriptor for protected mode */
 typedef struct segframe {
        reg_t p_ldt_sel;    /* selector in gdt with ldt base and limit */
        reg_t   p_cr3;          /* page table root */
-       struct segdesc_s p_ldt[2+NR_REMOTE_SEGS]; /* CS, DS and remote */
+       struct segdesc_s p_ldt[LDT_SIZE]; /* CS, DS and remote */
 } segframe_t;
 
 /* Page fault event. Stored in process table. Only valid if PAGEFAULT
@@ -68,5 +68,7 @@ struct pagefault
        u32_t   pf_flags;       /* Pagefault flags on stack. */
 };
 
+#define INMEMORY(p) (!p->p_seg.p_cr3 || ptproc == p)
+
 #endif /* #ifndef _I386_TYPES_H */
 
index 0aff6c0bd8a353c2b447b8b435684a6b2e2f46a6..cc6cd748ef47e215a42f4d98637e5f3db22fa85a 100755 (executable)
@@ -8,7 +8,6 @@
 #include <ibm/interrupt.h>
 #include <archconst.h>
 #include "../../const.h"
-#include "vm.h"
 #include "sconst.h"
 
 ! This file contains a number of assembly code utility routines needed by the
@@ -28,6 +27,7 @@
 .define        _intr_unmask    ! enable an irq at the 8259 controller
 .define        _intr_mask      ! disable an irq
 .define        _phys_copy      ! copy data from anywhere to anywhere in memory
+.define        _phys_copy_fault! phys_copy pagefault
 .define        _phys_memset    ! write pattern anywhere in memory
 .define        _mem_rdw        ! copy one word from [segment:offset]
 .define        _reset          ! reset the system
 .define        _level0         ! call a function at level 0
 .define        _read_cpu_flags ! read the cpu flags
 .define        _read_cr0       ! read cr0
-.define        _write_cr3      ! write cr3
-.define _last_cr3
+.define        _getcr3val
 .define        _write_cr0      ! write a value in cr0
 .define        _read_cr4
+.define        _thecr3
 .define        _write_cr4
-
-.define        _kernel_cr3     
+.define        _catch_pagefaults
 
 ! The routines only guarantee to preserve the registers the C compiler
 ! expects to be preserved (ebx, esi, edi, ebp, esp, segment registers, and
@@ -156,55 +155,6 @@ csinit:    mov     eax, DS_SELECTOR
        ret
 
 
-!*===========================================================================*
-!*                             cp_mess                                      *
-!*===========================================================================*
-! PUBLIC void cp_mess(int src, phys_clicks src_clicks, vir_bytes src_offset,
-!                    phys_clicks dst_clicks, vir_bytes dst_offset);
-! This routine makes a fast copy of a message from anywhere in the address
-! space to anywhere else.  It also copies the source address provided as a
-! parameter to the call into the first word of the destination message.
-!
-! Note that the message size, "Msize" is in DWORDS (not bytes) and must be set
-! correctly.  Changing the definition of message in the type file and not
-! changing it here will lead to total disaster.
-!
-!CM_ARGS       =       4 + 4 + 4 + 4 + 4       ! 4 + 4 + 4 + 4 + 4
-!!             es  ds edi esi eip      proc scl sof dcl dof
-!
-!      .align  16
-!_cp_mess:
-!      cld
-!      push    esi
-!      push    edi
-!      push    ds
-!      push    es
-!
-!      mov     eax, FLAT_DS_SELECTOR
-!      mov     ds, ax
-!      mov     es, ax
-!
-!      mov     esi, CM_ARGS+4(esp)             ! src clicks
-!      shl     esi, CLICK_SHIFT
-!      add     esi, CM_ARGS+4+4(esp)           ! src offset
-!      mov     edi, CM_ARGS+4+4+4(esp)         ! dst clicks
-!      shl     edi, CLICK_SHIFT
-!      add     edi, CM_ARGS+4+4+4+4(esp)       ! dst offset
-!
-!      mov     eax, CM_ARGS(esp)       ! process number of sender
-!      stos                            ! copy number of sender to dest message
-!      add     esi, 4                  ! do not copy first word
-!      mov     ecx, Msize - 1          ! remember, first word does not count
-!      rep
-!      movs                            ! copy the message
-!
-!      pop     es
-!      pop     ds
-!      pop     edi
-!      pop     esi
-!      ret                             ! that is all folks!
-!
-
 !*===========================================================================*
 !*                             exit                                         *
 !*===========================================================================*
@@ -236,8 +186,6 @@ _phys_insw:
        push    edi
        push    es
 
-       LOADKERNELCR3
-
        mov     ecx, FLAT_DS_SELECTOR
        mov     es, cx
        mov     edx, 8(ebp)             ! port to read from
@@ -264,8 +212,6 @@ _phys_insb:
        push    edi
        push    es
 
-       LOADKERNELCR3
-
        mov     ecx, FLAT_DS_SELECTOR
        mov     es, cx
        mov     edx, 8(ebp)             ! port to read from
@@ -293,8 +239,6 @@ _phys_outsw:
        push    esi
        push    ds
 
-       LOADKERNELCR3
-
        mov     ecx, FLAT_DS_SELECTOR
        mov     ds, cx
        mov     edx, 8(ebp)             ! port to write to
@@ -322,8 +266,6 @@ _phys_outsb:
        push    esi
        push    ds
 
-       LOADKERNELCR3
-
        mov     ecx, FLAT_DS_SELECTOR
        mov     ds, cx
        mov     edx, 8(ebp)             ! port to write to
@@ -416,7 +358,7 @@ dis_already:
 !*===========================================================================*
 !*                             phys_copy                                    *
 !*===========================================================================*
-! PUBLIC void phys_copy(phys_bytes source, phys_bytes destination,
+! PUBLIC phys_bytes phys_copy(phys_bytes source, phys_bytes destination,
 !                      phys_bytes bytecount);
 ! Copy a block of physical memory.
 
@@ -430,8 +372,6 @@ _phys_copy:
        push    edi
        push    es
 
-       LOADKERNELCR3
-
        mov     eax, FLAT_DS_SELECTOR
        mov     es, ax
 
@@ -457,6 +397,8 @@ pc_small:
        rep
    eseg        movsb
 
+       mov     eax, 0                  ! 0 means: no fault
+_phys_copy_fault:                      ! kernel can send us here
        pop     es
        pop     edi
        pop     esi
@@ -477,8 +419,6 @@ _phys_memset:
        push    ebx
        push    ds
 
-       LOADKERNELCR3
-
        mov     esi, 8(ebp)
        mov     eax, 16(ebp)
        mov     ebx, FLAT_DS_SELECTOR
@@ -633,14 +573,13 @@ _write_cr4:
        pop     ebp
        ret
 
+
 !*===========================================================================*
-!*                             write_cr3                               *
+!*                             getcr3val                               *
 !*===========================================================================*
-! PUBLIC void write_cr3(unsigned long value);
-_write_cr3:
-       push    ebp
-       mov     ebp, esp
-       LOADCR3WITHEAX(0x22, 8(ebp))
-       pop     ebp
+! PUBLIC unsigned long getcr3val(void);
+_getcr3val:
+       mov     eax, cr3
+       mov     (_thecr3), eax
        ret
 
index a04b0e94c120f611c26e8eb5668ffb0ff151c41b..11f6732852786ecb9b79980170801e84a8bdab9b 100644 (file)
@@ -1,4 +1,5 @@
 
+
 #include "../../kernel.h"
 #include "../../proc.h"
 #include "../../vm.h"
 
 #include "proto.h"
 #include "../../proto.h"
+#include "../../proto.h"
 #include "../../debug.h"
 
-/* VM functions and data. */
-PRIVATE u32_t vm_cr3;
-PUBLIC u32_t kernel_cr3;
-extern u32_t cswitch;
-u32_t last_cr3 = 0;
+#include "sha1.h"
+
+PRIVATE int psok = 0;
+
+#define PROCPDEPTR(pr, pi) ((u32_t *) ((u8_t *) vm_pagedirs +\
+                               I386_PAGE_SIZE * pr->p_nr +     \
+                               I386_VM_PT_ENT_SIZE * pi))
+
+u8_t *vm_pagedirs = NULL;
+
+#define NOPDE -1
+#define PDEMASK(n) (1L << (n))
+PUBLIC u32_t dirtypde;
+#define WANT_FREEPDES (sizeof(dirtypde)*8-5)
+PRIVATE int nfreepdes = 0, freepdes[WANT_FREEPDES], inusepde = NOPDE;
 
 #define HASPT(procptr) ((procptr)->p_seg.p_cr3 != 0)
 
-FORWARD _PROTOTYPE( void phys_put32, (phys_bytes addr, u32_t value)    );
-FORWARD _PROTOTYPE( u32_t phys_get32, (phys_bytes addr)                        );
-FORWARD _PROTOTYPE( void vm_set_cr3, (u32_t value)                     );
+FORWARD _PROTOTYPE( u32_t phys_get32, (vir_bytes v)                    );
 FORWARD _PROTOTYPE( void set_cr3, (void)                               );
 FORWARD _PROTOTYPE( void vm_enable_paging, (void)                      );
 
-#if DEBUG_VMASSERT
-#define vmassert(t) { \
-       if(!(t)) { minix_panic("vm: assert " #t " failed\n", __LINE__); } }
-#else
-#define vmassert(t) { }
-#endif
-
+       
 /* *** Internal VM Functions *** */
 
-PUBLIC void vm_init(void)
+PUBLIC void vm_init(struct proc *newptproc)
 {
-       int o;
-       phys_bytes p, pt_size;
-       phys_bytes vm_dir_base, vm_pt_base, phys_mem;
-       u32_t entry;
-       unsigned pages;
-       struct proc* rp;
-       struct proc *sys = proc_addr(SYSTEM);
-       static int init_done = 0;
-
-       if (!vm_size)
-               minix_panic("i386_vm_init: no space for page tables", NO_NUM);
-
-       if(init_done)
-               return;
+       int i;
+       if(vm_running)
+               minix_panic("vm_init: vm_running", NO_NUM);
+       vm_set_cr3(newptproc);
+       level0(vm_enable_paging);
+       vm_running = 1;
 
-       /* Align page directory */
-       o= (vm_base % I386_PAGE_SIZE);
-       if (o != 0)
-               o= I386_PAGE_SIZE-o;
-       vm_dir_base= vm_base+o;
-
-       /* Page tables start after the page directory */
-       vm_pt_base= vm_dir_base+I386_PAGE_SIZE;
-
-       pt_size= (vm_base+vm_size)-vm_pt_base;
-       pt_size -= (pt_size % I386_PAGE_SIZE);
-
-       /* Compute the number of pages based on vm_mem_high */
-       pages= (vm_mem_high-1)/I386_PAGE_SIZE + 1;
-
-       if (pages * I386_VM_PT_ENT_SIZE > pt_size)
-               minix_panic("i386_vm_init: page table too small", NO_NUM);
-
-       for (p= 0; p*I386_VM_PT_ENT_SIZE < pt_size; p++)
-       {
-               phys_mem= p*I386_PAGE_SIZE;
-               entry= phys_mem | I386_VM_USER | I386_VM_WRITE |
-                       I386_VM_PRESENT;
-               if (phys_mem >= vm_mem_high)
-                       entry= 0;
-#if VM_KERN_NOPAGEZERO
-               if (phys_mem == (sys->p_memmap[T].mem_phys << CLICK_SHIFT) ||
-                   phys_mem == (sys->p_memmap[D].mem_phys << CLICK_SHIFT)) {
-                       entry = 0;
-               }
-#endif
-               phys_put32(vm_pt_base + p*I386_VM_PT_ENT_SIZE, entry);
-       }
+}
 
-       for (p= 0; p < I386_VM_DIR_ENTRIES; p++)
-       {
-               phys_mem= vm_pt_base + p*I386_PAGE_SIZE;
-               entry= phys_mem | I386_VM_USER | I386_VM_WRITE |
-                       I386_VM_PRESENT;
-               if (phys_mem >= vm_pt_base + pt_size)
-                       entry= 0;
-               phys_put32(vm_dir_base + p*I386_VM_PT_ENT_SIZE, entry);
-       }
 
+#define TYPEDIRECT     0
+#define TYPEPROCMAP    1
+#define TYPEPHYS       2
+
+/* This macro sets up a mapping from within the kernel's address
+ * space to any other area of memory, either straight physical
+ * memory (PROC == NULL) or a process view of memory, in 4MB chunks.
+ * It recognizes PROC having kernel address space as a special case.
+ *
+ * It sets PTR to the pointer within kernel address space at the start
+ * of the 4MB chunk, and OFFSET to the offset within that chunk
+ * that corresponds to LINADDR.
+ *
+ * It needs FREEPDE (available and addressable PDE within kernel
+ * address space), SEG (hardware segment), VIRT (in-datasegment
+ * address if known).
+ */
+#define CREATEPDE(PROC, PTR, LINADDR, REMAIN, BYTES, PDE, TYPE) { \
+       u32_t *pdeptr = NULL;                           \
+       int proc_pde_index;                                     \
+       proc_pde_index = I386_VM_PDE(LINADDR);                  \
+       PDE = NOPDE;                                            \
+       if((PROC) && (((PROC) == ptproc) || !HASPT(PROC))) {    \
+               PTR = LINADDR;                                  \
+               TYPE = TYPEDIRECT;                              \
+       } else {                                                \
+               int fp;                                         \
+               int mustinvl;                                   \
+               u32_t pdeval, *pdevalptr, mask;                 \
+               phys_bytes offset;                              \
+               vmassert(psok);                                 \
+               if(PROC) {                                      \
+                       TYPE = TYPEPROCMAP;                             \
+                       vmassert(!iskernelp(PROC));             \
+                       vmassert(HASPT(PROC));                  \
+                       pdeptr = PROCPDEPTR(PROC, proc_pde_index);      \
+                       pdeval = *pdeptr;                       \
+               } else {                                        \
+                       TYPE = TYPEPHYS;                                \
+                       pdeval = (LINADDR & I386_VM_ADDR_MASK_4MB) |    \
+                               I386_VM_BIGPAGE | I386_VM_PRESENT |     \
+                               I386_VM_WRITE | I386_VM_USER;           \
+               }                                                       \
+               for(fp = 0; fp < nfreepdes; fp++) {                     \
+                       int k = freepdes[fp];                           \
+                       if(inusepde == k)                               \
+                               continue;                               \
+                       *PROCPDEPTR(ptproc, k) = 0;                     \
+                       PDE = k;                                        \
+                       vmassert(k >= 0);                               \
+                       vmassert(k < sizeof(dirtypde)*8);               \
+                       mask = PDEMASK(PDE);                            \
+                       if(dirtypde & mask)                             \
+                               continue;                               \
+                       break;                                          \
+               }                                                       \
+               vmassert(PDE != NOPDE);                                 \
+               vmassert(mask);                                         \
+               if(dirtypde & mask) {                                   \
+                       mustinvl = 1;                                   \
+               } else {                                                \
+                       mustinvl = 0;                                   \
+               }                                                       \
+               inusepde = PDE;                                         \
+               *PROCPDEPTR(ptproc, PDE) = pdeval;                      \
+               offset = LINADDR & I386_VM_OFFSET_MASK_4MB;             \
+               PTR = I386_BIG_PAGE_SIZE*PDE + offset;                  \
+               REMAIN = MIN(REMAIN, I386_BIG_PAGE_SIZE - offset);      \
+               if(mustinvl) {  \
+                       level0(reload_cr3);                     \
+               }               \
+       }                                                               \
+}
 
-       /* Set this cr3 in all currently running processes for
-        * future context switches.
-        */
-       for (rp=BEG_PROC_ADDR; rp<END_PROC_ADDR; rp++) {
-               u32_t mycr3;
-               if(isemptyp(rp)) continue;
-               rp->p_seg.p_cr3 = vm_dir_base;
-       }
+#define DONEPDE(PDE)   {                               \
+       if(PDE != NOPDE) {                              \
+               vmassert(PDE > 0);                      \
+               vmassert(PDE < sizeof(dirtypde)*8);     \
+               dirtypde |= PDEMASK(PDE);               \
+       }                                               \
+}
 
-       kernel_cr3 = vm_dir_base;
+#define WIPEPDE(PDE)   {                               \
+       if(PDE != NOPDE) {                              \
+               vmassert(PDE > 0);                      \
+               vmassert(PDE < sizeof(dirtypde)*8);     \
+               *PROCPDEPTR(ptproc, PDE) = 0;           \
+       }                                               \
+}
 
-       /* Set this cr3 now (not active until paging enabled). */
-       vm_set_cr3(vm_dir_base);
+/*===========================================================================*
+ *                             lin_lin_copy                                 *
+ *===========================================================================*/
+int lin_lin_copy(struct proc *srcproc, vir_bytes srclinaddr, 
+       struct proc *dstproc, vir_bytes dstlinaddr, vir_bytes bytes)
+{
+       u32_t addr;
+       int o1, o2;
+       int procslot;
+       int firstloop = 1;
 
-       /* Actually enable paging (activating cr3 load above). */
-       level0(vm_enable_paging);
+       NOREC_ENTER(linlincopy);
 
-       /* Don't do this init in the future. */
-       init_done = 1;
-       vm_running = 1;
-}
+       vmassert(vm_running);
+       vmassert(nfreepdes >= 3);
 
-PRIVATE void phys_put32(addr, value)
-phys_bytes addr;
-u32_t value;
-{
-       phys_copy(vir2phys((vir_bytes)&value), addr, sizeof(value));
+       vmassert(ptproc);
+       vmassert(proc_ptr);
+       vmassert(read_cr3() == ptproc->p_seg.p_cr3);
+
+       procslot = ptproc->p_nr;
+
+       vmassert(procslot >= 0 && procslot < I386_VM_DIR_ENTRIES);
+
+       while(bytes > 0) {
+               phys_bytes srcptr, dstptr;
+               vir_bytes chunk = bytes;
+               int srcpde, dstpde;
+               int srctype, dsttype;
+
+               /* Set up 4MB ranges. */
+               inusepde = NOPDE;
+               CREATEPDE(srcproc, srcptr, srclinaddr, chunk, bytes, srcpde, srctype);
+               CREATEPDE(dstproc, dstptr, dstlinaddr, chunk, bytes, dstpde, dsttype);
+
+               /* Copy pages. */
+               PHYS_COPY_CATCH(srcptr, dstptr, chunk, addr);
+
+               DONEPDE(srcpde);
+               DONEPDE(dstpde);
+
+               if(addr) {
+                       /* If addr is nonzero, a page fault was caught. */
+
+                       if(addr >= srcptr && addr < (srcptr + chunk)) {
+                               WIPEPDE(srcpde);
+                               WIPEPDE(dstpde);
+                               NOREC_RETURN(linlincopy, EFAULT_SRC);
+                       }
+                       if(addr >= dstptr && addr < (dstptr + chunk)) {
+                               WIPEPDE(srcpde);
+                               WIPEPDE(dstpde);
+                               NOREC_RETURN(linlincopy, EFAULT_DST);
+                       }
+
+                       minix_panic("lin_lin_copy fault out of range", NO_NUM);
+
+                       /* Not reached. */
+                       NOREC_RETURN(linlincopy, EFAULT);
+               }
+
+               WIPEPDE(srcpde);
+               WIPEPDE(dstpde);
+
+               /* Update counter and addresses for next iteration, if any. */
+               bytes -= chunk;
+               srclinaddr += chunk;
+               dstlinaddr += chunk;
+
+               firstloop = 0;
+       }
+
+       NOREC_RETURN(linlincopy, OK);
 }
 
+
 PRIVATE u32_t phys_get32(addr)
 phys_bytes addr;
 {
-       u32_t value;
+       u32_t v;
+       int r;
+
+       if(!vm_running) {
+               phys_copy(addr, vir2phys(&v), sizeof(v));
+               return v;
+       }
 
-       phys_copy(addr, vir2phys((vir_bytes)&value), sizeof(value));
+       if((r=lin_lin_copy(NULL, addr, 
+               proc_addr(SYSTEM), vir2phys(&v), sizeof(v))) != OK) {
+               minix_panic("lin_lin_copy for phys_get32 failed", r);
+       }
 
-       return value;
+       return v;
 }
 
-PRIVATE void vm_set_cr3(value)
-u32_t value;
+PRIVATE u32_t vm_cr3;  /* temp arg to level0() func */
+
+PUBLIC void vm_set_cr3(struct proc *newptproc)
 {
-       vm_cr3= value;
-       level0(set_cr3);
+       int u = 0;
+       if(!intr_disabled()) { lock; u = 1; }
+       vm_cr3= newptproc->p_seg.p_cr3;
+       if(vm_cr3) {
+               vmassert(intr_disabled());
+               level0(set_cr3);
+               vmassert(intr_disabled());
+               ptproc = newptproc;
+               vmassert(intr_disabled());
+       }
+       if(u) { unlock; }
 }
 
 PRIVATE void set_cr3()
@@ -153,10 +263,42 @@ PRIVATE void set_cr3()
        write_cr3(vm_cr3);
 }
 
+char *cr0_str(u32_t e)
+{
+       static char str[80];
+       strcpy(str, "");
+#define FLAG(v) do { if(e & (v)) { strcat(str, #v " "); e &= ~v; } } while(0)
+       FLAG(I386_CR0_PE);
+       FLAG(I386_CR0_MP);
+       FLAG(I386_CR0_EM);
+       FLAG(I386_CR0_TS);
+       FLAG(I386_CR0_ET);
+       FLAG(I386_CR0_PG);
+       FLAG(I386_CR0_WP);
+       if(e) { strcat(str, " (++)"); }
+       return str;
+}
+
+char *cr4_str(u32_t e)
+{
+       static char str[80];
+       strcpy(str, "");
+       FLAG(I386_CR4_VME);
+       FLAG(I386_CR4_PVI);
+       FLAG(I386_CR4_TSD);
+       FLAG(I386_CR4_DE);
+       FLAG(I386_CR4_PSE);
+       FLAG(I386_CR4_PAE);
+       FLAG(I386_CR4_MCE);
+       FLAG(I386_CR4_PGE);
+       if(e) { strcat(str, " (++)"); }
+       return str;
+}
+
 PRIVATE void vm_enable_paging(void)
 {
        u32_t cr0, cr4;
-       int psok, pgeok;
+       int pgeok;
 
        psok = _cpufeature(_CPUF_I386_PSE);
        pgeok = _cpufeature(_CPUF_I386_PGE);
@@ -166,19 +308,26 @@ PRIVATE void vm_enable_paging(void)
 
        /* First clear PG and PGE flag, as PGE must be enabled after PG. */
        write_cr0(cr0 & ~I386_CR0_PG);
-       write_cr4(cr4 & ~I386_CR4_PGE);
+       write_cr4(cr4 & ~(I386_CR4_PGE | I386_CR4_PSE));
 
        cr0= read_cr0();
        cr4= read_cr4();
 
+       /* Our first page table contains 4MB entries. */
+       if(psok)
+               cr4 |= I386_CR4_PSE;
+
+       write_cr4(cr4);
+
        /* First enable paging, then enable global page flag. */
-       write_cr0(cr0 | I386_CR0_PG);
+       cr0 |= I386_CR0_PG;
+       write_cr0(cr0 );
+       cr0 |= I386_CR0_WP;
+       write_cr0(cr0);
 
        /* May we enable these features? */
        if(pgeok)
                cr4 |= I386_CR4_PGE;
-       if(psok)
-               cr4 |= I386_CR4_PSE;
 
        write_cr4(cr4);
 }
@@ -315,6 +464,7 @@ vir_bytes bytes;                /* # of bytes to be copied */
        return phys;
 }
 
+
 /*===========================================================================*
  *                              vm_lookup                                    *
  *===========================================================================*/
@@ -323,6 +473,7 @@ PUBLIC int vm_lookup(struct proc *proc, vir_bytes virtual, vir_bytes *physical,
        u32_t *root, *pt;
        int pde, pte;
        u32_t pde_v, pte_v;
+       NOREC_ENTER(vmlookup);
 
        vmassert(proc);
        vmassert(physical);
@@ -330,7 +481,7 @@ PUBLIC int vm_lookup(struct proc *proc, vir_bytes virtual, vir_bytes *physical,
 
        if(!HASPT(proc)) {
                *physical = virtual;
-               return OK;
+               NOREC_RETURN(vmlookup, OK);
        }
 
        /* Retrieve page directory entry. */
@@ -339,39 +490,35 @@ PUBLIC int vm_lookup(struct proc *proc, vir_bytes virtual, vir_bytes *physical,
        pde = I386_VM_PDE(virtual);
        vmassert(pde >= 0 && pde < I386_VM_DIR_ENTRIES);
        pde_v = phys_get32((u32_t) (root + pde));
+
        if(!(pde_v & I386_VM_PRESENT)) {
-#if 0
-               kprintf("vm_lookup: %d:%s:0x%lx: cr3 0x%lx: pde %d not present\n",
-                       proc->p_endpoint, proc->p_name, virtual, root, pde);
-               kprintf("kernel stack: ");
-               util_stacktrace();
-#endif
-               return EFAULT;
+               NOREC_RETURN(vmlookup, EFAULT);
        }
 
-       /* Retrieve page table entry. */
-       pt = (u32_t *) I386_VM_PFA(pde_v);
-       vmassert(!((u32_t) pt % I386_PAGE_SIZE));
-       pte = I386_VM_PTE(virtual);
-       vmassert(pte >= 0 && pte < I386_VM_PT_ENTRIES);
-       pte_v = phys_get32((u32_t) (pt + pte));
-       if(!(pte_v & I386_VM_PRESENT)) {
-#if 0
-               kprintf("vm_lookup: %d:%s:0x%lx: cr3 %lx: pde %d: pte %d not present\n",
-                       proc->p_endpoint, proc->p_name, virtual, root, pde, pte);
-               kprintf("kernel stack: ");
-               util_stacktrace();
-#endif
-               return EFAULT;
-       }
+       /* We don't expect to ever see this. */
+       if(pde_v & I386_VM_BIGPAGE) {
+               *physical = pde_v & I386_VM_ADDR_MASK_4MB;
+               if(ptent) *ptent = pde_v;
+               *physical += virtual & I386_VM_OFFSET_MASK_4MB;
+       } else {
+               /* Retrieve page table entry. */
+               pt = (u32_t *) I386_VM_PFA(pde_v);
+               vmassert(!((u32_t) pt % I386_PAGE_SIZE));
+               pte = I386_VM_PTE(virtual);
+               vmassert(pte >= 0 && pte < I386_VM_PT_ENTRIES);
+               pte_v = phys_get32((u32_t) (pt + pte));
+               if(!(pte_v & I386_VM_PRESENT)) {
+                       NOREC_RETURN(vmlookup, EFAULT);
+               }
 
-       if(ptent) *ptent = pte_v;
+               if(ptent) *ptent = pte_v;
 
-       /* Actual address now known; retrieve it and add page offset. */
-       *physical = I386_VM_PFA(pte_v);
-       *physical += virtual % I386_PAGE_SIZE;
+               /* Actual address now known; retrieve it and add page offset. */
+               *physical = I386_VM_PFA(pte_v);
+               *physical += virtual % I386_PAGE_SIZE;
+       }
 
-       return OK;
+       NOREC_RETURN(vmlookup, OK);
 }
 
 /* From virtual address v in process p,
@@ -390,54 +537,6 @@ PUBLIC int vm_lookup(struct proc *proc, vir_bytes virtual, vir_bytes *physical,
                        return r;                               \
                } } }
 
-/*===========================================================================*
- *                              vm_copy                                      *
- *===========================================================================*/
-int vm_copy(vir_bytes src, struct proc *srcproc,
-        vir_bytes dst, struct proc *dstproc, phys_bytes bytes)
-{
-#define WRAPS(v) (ULONG_MAX - (v) <= bytes)
-
-       if(WRAPS(src) || WRAPS(dst))
-               minix_panic("vm_copy: linear address wraps", NO_NUM);
-
-       while(bytes > 0) {
-               u32_t n, flags;
-               phys_bytes p_src, p_dst;
-#define PAGEREMAIN(v) (I386_PAGE_SIZE - ((v) % I386_PAGE_SIZE))
-
-               /* We can copy this number of bytes without
-                * crossing a page boundary, but don't copy more
-                * than asked.
-                */
-               n = MIN(PAGEREMAIN(src), PAGEREMAIN(dst));
-               n = MIN(n, bytes);
-               vmassert(n > 0);
-               vmassert(n <= I386_PAGE_SIZE);
-
-               /* Convert both virtual addresses to physical and do
-                * copy.
-                */
-               LOOKUP(p_src, srcproc, src, NULL);
-               LOOKUP(p_dst, dstproc, dst, &flags);
-               if(!(flags & I386_VM_WRITE)) {
-                       kprintf("vm_copy: copying to nonwritable page\n");
-                       kprintf("kernel stack: ");
-                       util_stacktrace();
-                       return EFAULT;
-               }
-               phys_copy(p_src, p_dst, n);
-
-               /* Book number of bytes copied. */
-               vmassert(bytes >= n);
-               bytes -= n;
-               src += n;
-               dst += n;
-       }
-
-       return OK;
-}
-
 /*===========================================================================*
  *                              vm_contiguous                                *
  *===========================================================================*/
@@ -493,155 +592,99 @@ PUBLIC int vm_contiguous(struct proc *targetproc, u32_t vir_buf, size_t bytes)
                boundaries++;
        }
 
-       if(verbose_vm)
-               kprintf("vm_contiguous: yes (%d boundaries tested)\n",
-                       boundaries);
-
        return 1;
 }
 
-int vm_checkrange_verbose = 0;
-
 /*===========================================================================*
- *                              vm_checkrange                                *
+ *                              vm_suspend                                *
  *===========================================================================*/
-PUBLIC int vm_checkrange(struct proc *caller, struct proc *target,
-       vir_bytes vir, vir_bytes bytes, int wrfl, int checkonly)
+PUBLIC int vm_suspend(struct proc *caller, struct proc *target,
+       vir_bytes linaddr, vir_bytes len, int wrflag, int type)
 {
-       u32_t flags, po, v;
-       int r;
+       /* This range is not OK for this process. Set parameters  
+        * of the request and notify VM about the pending request. 
+        */                                                             
+       vmassert(!RTS_ISSET(caller, VMREQUEST));
+       vmassert(!RTS_ISSET(target, VMREQUEST));
 
-       if(!HASPT(target))
-               return OK;
+       RTS_LOCK_SET(caller, VMREQUEST);
 
-       /* If caller has had a reply to this request, return it. */
-       if(RTS_ISSET(caller, VMREQUEST)) {
-               if(caller->p_vmrequest.who == target->p_endpoint) {
-                       if(caller->p_vmrequest.vmresult == VMSUSPEND)
-                               minix_panic("check sees VMSUSPEND?", NO_NUM);
-                       RTS_LOCK_UNSET(caller, VMREQUEST);
-#if 0
-                       kprintf("SYSTEM: vm_checkrange: returning vmresult %d\n",
-                               caller->p_vmrequest.vmresult);
-#endif
-                       return caller->p_vmrequest.vmresult;
-               } else {
-#if 0
-                       kprintf("SYSTEM: vm_checkrange: caller has a request for %d, "
-                               "but our target is %d\n",
-                               caller->p_vmrequest.who, target->p_endpoint);
+#if DEBUG_VMASSERT
+       caller->p_vmrequest.stacktrace[0] = '\0';
+       util_stacktrace_strcat(caller->p_vmrequest.stacktrace);
 #endif
-               }
-       }
-
-       po = vir % I386_PAGE_SIZE;
-       if(po > 0) {
-               vir -= po;
-               bytes += po;
-       }
 
-       vmassert(target);
-       vmassert(bytes > 0);
-
-       for(v = vir; v < vir + bytes;  v+= I386_PAGE_SIZE) {
-               u32_t phys;
+       caller->p_vmrequest.writeflag = 1;
+       caller->p_vmrequest.start = linaddr;
+       caller->p_vmrequest.length = len;
+       caller->p_vmrequest.who = target->p_endpoint;
+       caller->p_vmrequest.type = type;
+                                                       
+       /* Connect caller on vmrequest wait queue. */   
+       if(!(caller->p_vmrequest.nextrequestor = vmrequest))
+               mini_notify(proc_addr(SYSTEM), VM_PROC_NR);
+       vmrequest = caller;
+}
 
-               /* If page exists and it's writable if desired, we're OK
-                * for this page.
-                */
-               if(vm_lookup(target, v, &phys, &flags) == OK &&
-                       !(wrfl && !(flags & I386_VM_WRITE))) {
-                       if(vm_checkrange_verbose) {
-#if 0
-                               kprintf("SYSTEM: checkrange:%s:%d: 0x%lx: write 0x%lx, flags 0x%lx, phys 0x%lx, OK\n",
-                               target->p_name, target->p_endpoint, v, wrfl, flags, phys);
-#endif
-                       }
-                       continue;
-               }
+/*===========================================================================*
+ *                              delivermsg                                *
+ *===========================================================================*/
+int delivermsg(struct proc *rp)
+{
+       phys_bytes addr;  
+       int r;
+       NOREC_ENTER(deliver);
 
-               if(vm_checkrange_verbose) {
-                       kprintf("SYSTEM: checkrange:%s:%d: 0x%lx: write 0x%lx, flags 0x%lx, phys 0x%lx, NOT OK\n",
-                       target->p_name, target->p_endpoint, v, wrfl, flags, phys);
-               }
+       vmassert(rp->p_misc_flags & MF_DELIVERMSG);
+       vmassert(rp->p_delivermsg.m_source != NONE);
 
-               if(checkonly) {
-                       return VMSUSPEND;
-               }
+       vmassert(rp->p_delivermsg_lin);
+#if DEBUG_VMASSERT
+       if(rp->p_delivermsg_lin !=
+               umap_local(rp, D, rp->p_delivermsg_vir, sizeof(message))) {
+               printf("vir: 0x%lx lin was: 0x%lx umap now: 0x%lx\n",
+               rp->p_delivermsg_vir, rp->p_delivermsg_lin,
+               umap_local(rp, D, rp->p_delivermsg_vir, sizeof(message)));
+               minix_panic("that's wrong", NO_NUM);
+       }
 
-               /* This range is not OK for this process. Set parameters
-                * of the request and notify VM about the pending request.
-                */
-               if(RTS_ISSET(caller, VMREQUEST))
-                       minix_panic("VMREQUEST already set", caller->p_endpoint);
-               RTS_LOCK_SET(caller, VMREQUEST);
-
-               /* Set parameters in caller. */
-               caller->p_vmrequest.writeflag = wrfl;
-               caller->p_vmrequest.start = vir;
-               caller->p_vmrequest.length = bytes;
-               caller->p_vmrequest.who = target->p_endpoint;
-
-               /* Set caller in target. */
-               target->p_vmrequest.requestor = caller;
-
-               /* Connect caller on vmrequest wait queue. */
-               caller->p_vmrequest.nextrequestor = vmrequest;
-               vmrequest = caller;
-               if(!caller->p_vmrequest.nextrequestor) {
-                       int n = 0;
-                       struct proc *vmr;
-                       for(vmr = vmrequest; vmr; vmr = vmr->p_vmrequest.nextrequestor)
-                               n++;
-                       soft_notify(VM_PROC_NR);
-#if 0
-                       kprintf("(%d) ", n);
-                       kprintf("%d/%d ",
-                               caller->p_endpoint, target->p_endpoint);
-                       util_stacktrace();
 #endif
-               }
 
-#if 0
-               kprintf("SYSTEM: vm_checkrange: range bad for "
-                       "target %s:0x%lx-0x%lx, caller %s\n",
-                               target->p_name, vir, vir+bytes, caller->p_name);
-
-               kprintf("vm_checkrange kernel trace: ");
-               util_stacktrace();
-               kprintf("target trace: ");
-               proc_stacktrace(target);
-#endif
+       vm_set_cr3(rp);
 
-               if(target->p_endpoint == VM_PROC_NR) {
-                       kprintf("caller trace: ");
-                       proc_stacktrace(caller);
-                       kprintf("target trace: ");
-                       proc_stacktrace(target);
-                       minix_panic("VM ranges should be OK", NO_NUM);
-               }
+       PHYS_COPY_CATCH(vir2phys(&rp->p_delivermsg),
+               rp->p_delivermsg_lin, sizeof(message), addr);
 
-               return VMSUSPEND;
+       if(addr) {
+               vm_suspend(rp, rp, rp->p_delivermsg_lin, sizeof(message), 1,
+                       VMSTYPE_DELIVERMSG);
+               r = VMSUSPEND;
+       } else {
+#if DEBUG_VMASSERT
+               rp->p_delivermsg.m_source = NONE;
+               rp->p_delivermsg_lin = 0;
+#endif
+               rp->p_misc_flags &= ~MF_DELIVERMSG;
+               r = OK;
        }
 
-       return OK;
+       NOREC_RETURN(deliver, r);
 }
 
 char *flagstr(u32_t e, int dir)
 {
        static char str[80];
        strcpy(str, "");
-#define FLAG(v) do { if(e & (v)) { strcat(str, #v " "); } } while(0)
        FLAG(I386_VM_PRESENT);
        FLAG(I386_VM_WRITE);
        FLAG(I386_VM_USER);
        FLAG(I386_VM_PWT);
        FLAG(I386_VM_PCD);
+       FLAG(I386_VM_GLOBAL);
        if(dir)
                FLAG(I386_VM_BIGPAGE);  /* Page directory entry only */
        else
                FLAG(I386_VM_DIRTY);    /* Page table entry only */
-
        return str;
 }
 
@@ -658,8 +701,9 @@ void vm_pt_print(u32_t *pagetable, u32_t v)
                if(!(pte_v & I386_VM_PRESENT))
                        continue;
                pfa = I386_VM_PFA(pte_v);
-               kprintf("%4d:%08lx:%08lx ",
-                       pte, v + I386_PAGE_SIZE*pte, pfa);
+               kprintf("%4d:%08lx:%08lx %2s ",
+                       pte, v + I386_PAGE_SIZE*pte, pfa,
+                       (pte_v & I386_VM_WRITE) ? "rw":"RO");
                col++;
                if(col == 3) { kprintf("\n"); col = 0; }
        }
@@ -668,31 +712,85 @@ void vm_pt_print(u32_t *pagetable, u32_t v)
        return;
 }
 
-/*===========================================================================*
- *                              vm_print                                     *
- *===========================================================================*/
 void vm_print(u32_t *root)
 {
        int pde;
 
        vmassert(!((u32_t) root % I386_PAGE_SIZE));
 
-       for(pde = 0; pde < I386_VM_DIR_ENTRIES; pde++) {
+       printf("page table 0x%lx:\n", root);
+
+       for(pde = 10; pde < I386_VM_DIR_ENTRIES; pde++) {
                u32_t pde_v;
                u32_t *pte_a;
                pde_v = phys_get32((u32_t) (root + pde));
                if(!(pde_v & I386_VM_PRESENT))
                        continue;
-               pte_a = (u32_t *) I386_VM_PFA(pde_v);
-               kprintf("%4d: pt %08lx %s\n",
-                       pde, pte_a, flagstr(pde_v, 1));
-               vm_pt_print(pte_a, pde * I386_VM_PT_ENTRIES * I386_PAGE_SIZE);
+               if(pde_v & I386_VM_BIGPAGE) {
+                       kprintf("%4d: 0x%lx, flags %s\n",
+                               pde, I386_VM_PFA(pde_v), flagstr(pde_v, 1));
+               } else {
+                       pte_a = (u32_t *) I386_VM_PFA(pde_v);
+                       kprintf("%4d: pt %08lx %s\n",
+                               pde, pte_a, flagstr(pde_v, 1));
+                       vm_pt_print(pte_a, pde * I386_VM_PT_ENTRIES * I386_PAGE_SIZE);
+                       kprintf("\n");
+               }
        }
 
 
        return;
 }
 
+u32_t thecr3;
+
+u32_t read_cr3(void)
+{
+       level0(getcr3val);
+       return thecr3;
+}
+
+
+/*===========================================================================*
+ *                             lin_memset                                   *
+ *===========================================================================*/
+int vm_phys_memset(phys_bytes ph, u8_t c, phys_bytes bytes)
+{
+       char *v;
+       u32_t p;
+       NOREC_ENTER(physmemset);
+
+       p = c | (c << 8) | (c << 16) | (c << 24);
+
+       if(!vm_running) {
+               phys_memset(ph, p, bytes);
+               NOREC_RETURN(physmemset, OK);
+       }
+
+       vmassert(nfreepdes >= 3);
+
+       /* With VM, we have to map in the physical memory. 
+        * We can do this 4MB at a time.
+        */
+       while(bytes > 0) {
+               int pde, t;
+               vir_bytes chunk = bytes;
+               phys_bytes ptr;
+               inusepde = NOPDE;
+               CREATEPDE(((struct proc *) NULL), ptr, ph, chunk, bytes, pde, t);
+               /* We can memset as many bytes as we have remaining,
+                * or as many as remain in the 4MB chunk we mapped in.
+                */
+               phys_memset(ptr, p, chunk);
+               DONEPDE(pde);
+               bytes -= chunk;
+               ph += chunk;
+       }
+
+
+       NOREC_RETURN(physmemset, OK);
+}
+
 /*===========================================================================*
  *                             virtual_copy_f                               *
  *===========================================================================*/
@@ -710,6 +808,7 @@ int vmcheck;                        /* if nonzero, can return VMSUSPEND */
   int seg_index;
   int i, r;
   struct proc *procs[2];
+  NOREC_ENTER(virtualcopy);
 
   /* Check copy count. */
   if (bytes <= 0) return(EDOM);
@@ -735,7 +834,9 @@ int vmcheck;                        /* if nonzero, can return VMSUSPEND */
       switch(type) {
       case LOCAL_SEG:
       case LOCAL_VM_SEG:
-         if(!p) return EDEADSRCDST;
+         if(!p) {
+               NOREC_RETURN(virtualcopy, EDEADSRCDST);
+         }
           seg_index = vir_addr[i]->segment & SEGMENT_INDEX;
          if(type == LOCAL_SEG)
                  phys_addr[i] = umap_local(p, seg_index, vir_addr[i]->offset,
@@ -751,7 +852,9 @@ int vmcheck;                        /* if nonzero, can return VMSUSPEND */
          }
           break;
       case REMOTE_SEG:
-         if(!p) return EDEADSRCDST;
+         if(!p) {
+               NOREC_RETURN(virtualcopy, EDEADSRCDST);
+         }
           seg_index = vir_addr[i]->segment & SEGMENT_INDEX;
           phys_addr[i] = umap_remote(p, seg_index, vir_addr[i]->offset, bytes);
           break;
@@ -763,43 +866,96 @@ int vmcheck;                      /* if nonzero, can return VMSUSPEND */
       case PHYS_SEG:
           phys_addr[i] = vir_addr[i]->offset;
           break;
-      case GRANT_SEG:
-         phys_addr[i] = umap_grant(p, vir_addr[i]->offset, bytes);
-         break;
       default:
          kprintf("virtual_copy: strange type 0x%x\n", type);
-          return(EINVAL);
+         NOREC_RETURN(virtualcopy, EINVAL);
       }
 
       /* Check if mapping succeeded. */
       if (phys_addr[i] <= 0 && vir_addr[i]->segment != PHYS_SEG)  {
       kprintf("virtual_copy EFAULT\n");
-          return(EFAULT);
+         NOREC_RETURN(virtualcopy, EFAULT);
       }
   }
 
-  if(vmcheck && procs[_SRC_])
-       CHECKRANGE_OR_SUSPEND(procs[_SRC_], phys_addr[_SRC_], bytes, 0);
-  if(vmcheck && procs[_DST_])
-       CHECKRANGE_OR_SUSPEND(procs[_DST_], phys_addr[_DST_], bytes, 1);
+  if(vm_running) {
+       int r;
+       struct proc *caller;
 
-#define NOPT(p) (!(p) || !HASPT(p))
-  /* Now copy bytes between physical addresseses. */
-  if(NOPT(procs[_SRC_]) && NOPT(procs[_DST_])) {
-       /* Without vm, address ranges actually are physical. */
-       phys_copy(phys_addr[_SRC_], phys_addr[_DST_], (phys_bytes) bytes);
-       r = OK;
-  } else {
-       /* With vm, addresses need further interpretation. */
-       r = vm_copy(phys_addr[_SRC_], procs[_SRC_], 
-               phys_addr[_DST_], procs[_DST_], (phys_bytes) bytes);
-       if(r != OK) {
-               kprintf("vm_copy: %lx to %lx failed\n",
-                       phys_addr[_SRC_],phys_addr[_DST_]);
+       caller = proc_addr(who_p);
+
+       if(RTS_ISSET(caller, VMREQUEST)) {
+               struct proc *target;
+               int pn;
+               vmassert(caller->p_vmrequest.vmresult != VMSUSPEND);
+               RTS_LOCK_UNSET(caller, VMREQUEST);
+               if(caller->p_vmrequest.vmresult != OK) {
+                       printf("virtual_copy: returning VM error %d\n",
+                               caller->p_vmrequest.vmresult);
+                       NOREC_RETURN(virtualcopy, caller->p_vmrequest.vmresult);
+               }
+       }
+
+       if((r=lin_lin_copy(procs[_SRC_], phys_addr[_SRC_],
+               procs[_DST_], phys_addr[_DST_], bytes)) != OK) {
+               struct proc *target;
+               int wr;
+               phys_bytes lin;
+               if(r != EFAULT_SRC && r != EFAULT_DST)
+                       minix_panic("lin_lin_copy failed", r);
+               if(!vmcheck) {
+                       NOREC_RETURN(virtualcopy, r);
+               }
+
+               vmassert(procs[_SRC_] && procs[_DST_]);
+
+               if(r == EFAULT_SRC) {
+                       lin = phys_addr[_SRC_];
+                       target = procs[_SRC_];
+                       wr = 0;
+               } else if(r == EFAULT_DST) {
+                       lin = phys_addr[_DST_];
+                       target = procs[_DST_];
+                       wr = 1;
+               } else {
+                       minix_panic("r strange", r);
+               }
+
+#if 0
+               printf("virtual_copy: suspending caller %d / %s, target %d / %s\n",
+                       caller->p_endpoint, caller->p_name,
+                       target->p_endpoint, target->p_name);
+#endif
+
+               vmassert(k_reenter == -1);
+               vmassert(proc_ptr->p_endpoint == SYSTEM);
+               vm_suspend(caller, target, lin, bytes, wr, VMSTYPE_KERNELCALL);
+
+               NOREC_RETURN(virtualcopy, VMSUSPEND);
        }
+
+       NOREC_RETURN(virtualcopy, OK);
   }
 
-  return(r);
+  vmassert(!vm_running);
+
+  /* can't copy to/from process with PT without VM */
+#define NOPT(p) (!(p) || !HASPT(p))
+  if(!NOPT(procs[_SRC_])) {
+       kprintf("ignoring page table src: %s / %d at 0x%lx\n",
+               procs[_SRC_]->p_name, procs[_SRC_]->p_endpoint, procs[_SRC_]->p_seg.p_cr3);
+}
+  if(!NOPT(procs[_DST_])) {
+       kprintf("ignoring page table dst: %s / %d at 0x%lx\n",
+               procs[_DST_]->p_name, procs[_DST_]->p_endpoint,
+               procs[_DST_]->p_seg.p_cr3);
+  }
+
+  /* Now copy bytes between physical addresseses. */
+  if(phys_copy(phys_addr[_SRC_], phys_addr[_DST_], (phys_bytes) bytes))
+       NOREC_RETURN(virtualcopy, EFAULT);
+  NOREC_RETURN(virtualcopy, OK);
 }
 
 /*===========================================================================*
@@ -821,6 +977,25 @@ PUBLIC int data_copy(
   return virtual_copy(&src, &dst, bytes);
 }
 
+/*===========================================================================*
+ *                             data_copy_vmcheck                            *
+ *===========================================================================*/
+PUBLIC int data_copy_vmcheck(
+       endpoint_t from_proc, vir_bytes from_addr,
+       endpoint_t to_proc, vir_bytes to_addr,
+       size_t bytes)
+{
+  struct vir_addr src, dst;
+
+  src.segment = dst.segment = D;
+  src.offset = from_addr;
+  dst.offset = to_addr;
+  src.proc_nr_e = from_proc;
+  dst.proc_nr_e = to_proc;
+
+  return virtual_copy_vmcheck(&src, &dst, bytes);
+}
+
 /*===========================================================================*
  *                             arch_pre_exec                                *
  *===========================================================================*/
@@ -852,4 +1027,10 @@ PUBLIC int arch_umap(struct proc *pr, vir_bytes offset, vir_bytes count,
        return EINVAL;
 }
 
-
+/* VM reports page directory slot we're allowed to use freely. */
+void i386_freepde(int pde)
+{
+       if(nfreepdes >= WANT_FREEPDES)
+               return;
+       freepdes[nfreepdes++] = pde;
+}
index 066df8573d21baab5577f5ef93ad0dd3f089c89d..3c2248c48ed0f96705eee3123ccecc84c790fab3 100755 (executable)
@@ -60,7 +60,6 @@ begbss:
 #include <ibm/interrupt.h>
 #include <archconst.h>
 #include "../../const.h"
-#include "vm.h"
 #include "sconst.h"
 
 /* Selected 386 tss offsets. */
@@ -74,9 +73,8 @@ begbss:
 
 .define        _restart
 .define        save
-.define        _kernel_cr3
-.define        _pagefault_cr2
-.define _pagefault_count
+.define _reload_cr3
+.define        _write_cr3      ! write cr3
 
 .define errexception
 .define exception1
@@ -101,6 +99,8 @@ begbss:
 .define        _params_size
 .define _params_offset
 .define _mon_ds
+.define _schedcheck
+.define _dirtypde
 
 .define        _hwint00        ! handlers for hardware interrupts
 .define        _hwint01
@@ -218,12 +218,6 @@ csinit:
        ltr     ax
        push    0                       ! set flags to known good state
        popf                            ! esp, clear nested task and int enable
-#if VM_KERN_NOPAGEZERO
-       jmp     laststep
-
-.align I386_PAGE_SIZE
-laststep:
-#endif
        jmp     _main                   ! main()
 
 
@@ -239,7 +233,6 @@ laststep:
 #define hwint_master(irq)      \
        call    save                    /* save interrupted process state */;\
        push    (_irq_handlers+4*irq)   /* irq_handlers[irq]              */;\
-       LOADCR3WITHEAX(irq, (_kernel_cr3))      /* switch to kernel page table    */;\
        call    _intr_handle            /* intr_handle(irq_handlers[irq]) */;\
        pop     ecx                                                         ;\
        cmp     (_irq_actids+4*irq), 0  /* interrupt still active?        */;\
@@ -291,7 +284,6 @@ _hwint07:           ! Interrupt routine for irq 7 (printer)
 #define hwint_slave(irq)       \
        call    save                    /* save interrupted process state */;\
        push    (_irq_handlers+4*irq)   /* irq_handlers[irq]              */;\
-       LOADCR3WITHEAX(irq, (_kernel_cr3))      /* switch to kernel page table    */;\
        call    _intr_handle            /* intr_handle(irq_handlers[irq]) */;\
        pop     ecx                                                         ;\
        cmp     (_irq_actids+4*irq), 0  /* interrupt still active?        */;\
@@ -398,11 +390,9 @@ _p_s_call:
        push    eax             ! source / destination
        push    ecx             ! call number (ipc primitive to use)
 
-!      LOADCR3WITHEAX(0x20, (_kernel_cr3))
-
        call    _sys_call       ! sys_call(call_nr, src_dst, m_ptr, bit_map)
                                ! caller is now explicitly in proc_ptr
-       mov     AXREG(esi), eax ! sys_call MUST PRESERVE si
+       mov     AXREG(esi), eax
 
 ! Fall into code to restart proc/task running.
 
@@ -413,14 +403,21 @@ _restart:
 
 ! Restart the current process or the next process if it is set. 
 
-       cmp     (_next_ptr), 0          ! see if another process is scheduled
-       jz      0f
-       mov     eax, (_next_ptr)
-       mov     (_proc_ptr), eax        ! schedule new process 
-       mov     (_next_ptr), 0
-0:     mov     esp, (_proc_ptr)        ! will assume P_STACKBASE == 0
+       cli
+       call    _schedcheck             ! ask C function who we're running
+       mov     esp, (_proc_ptr)        ! will assume P_STACKBASE == 0
        lldt    P_LDT_SEL(esp)          ! enable process' segment descriptors 
-       LOADCR3WITHEAX(0x21, P_CR3(esp))        ! switch to process page table
+       cmp     P_CR3(esp), 0           ! process does not have its own PT
+       jz      0f      
+       mov     eax, P_CR3(esp)
+       cmp     eax, (loadedcr3)
+       jz      0f
+       mov     cr3, eax
+       mov     (loadedcr3), eax
+       mov     eax, (_proc_ptr)
+       mov     (_ptproc), eax
+       mov     (_dirtypde), 0
+0:
        lea     eax, P_STACKTOP(esp)    ! arrange for next interrupt
        mov     (_tss+TSS3_S_SP0), eax  ! to save state in process table
 restart1:
@@ -496,8 +493,7 @@ _page_fault:
        push    PAGE_FAULT_VECTOR
        push    eax
        mov     eax, cr2
-sseg   mov     (_pagefault_cr2), eax
-sseg   inc     (_pagefault_count)
+sseg   mov     (pagefaultcr2), eax
        pop     eax
        jmp     errexception
 
@@ -526,19 +522,26 @@ errexception:
  sseg  pop     (ex_number)
  sseg  pop     (trap_errno)
 exception1:                            ! Common for all exceptions.
+ sseg  mov     (old_eax_ptr), esp      ! where will eax be saved?
+ sseg  sub     (old_eax_ptr), PCREG-AXREG      ! here
+
        push    eax                     ! eax is scratch register
 
        mov     eax, 0+4(esp)           ! old eip
  sseg  mov     (old_eip), eax
+       mov     eax, esp
+       add     eax, 4
+ sseg  mov     (old_eip_ptr), eax
        movzx   eax, 4+4(esp)           ! old cs
  sseg  mov     (old_cs), eax
        mov     eax, 8+4(esp)           ! old eflags
  sseg  mov     (old_eflags), eax
 
-       LOADCR3WITHEAX(0x24, (_kernel_cr3))
-
        pop     eax
        call    save
+       push    (pagefaultcr2)
+       push    (old_eax_ptr)
+       push    (old_eip_ptr)
        push    (old_eflags)
        push    (old_cs)
        push    (old_eip)
@@ -546,34 +549,53 @@ exception1:                               ! Common for all exceptions.
        push    (ex_number)
        call    _exception              ! (ex_number, trap_errno, old_eip,
                                        !       old_cs, old_eflags)
-       add     esp, 5*4
+       add     esp, 8*4
        ret
 
+
 !*===========================================================================*
-!*                             level0_call                                  *
+!*                             write_cr3                               *
 !*===========================================================================*
-_level0_call:
-       call    save
-       jmp     (_level0_func)
+! PUBLIC void write_cr3(unsigned long value);
+_write_cr3:
+       push    ebp
+       mov     ebp, esp
+       mov     eax, 8(ebp)
+       cmp     eax, (loadedcr3)
+       jz      0f
+       mov     cr3, eax
+       mov     (loadedcr3), eax
+       mov     (_dirtypde), 0
+0:
+       pop     ebp
+       ret
 
 !*===========================================================================*
-!*                             load_kernel_cr3                              *
+!*                             reload_cr3                              *
 !*===========================================================================*
-.align 16
-_load_kernel_cr3:
-       mov     eax, (_kernel_cr3)
-       mov     cr3, eax
+! PUBLIC void reload_cr3(void);
+_reload_cr3:
+       push    ebp
+       mov     ebp, esp
+       mov     (_dirtypde), 0
+       mov     eax, cr3
+       mov     cr3, eax
+       pop     ebp
        ret
 
+!*===========================================================================*
+!*                             level0_call                                  *
+!*===========================================================================*
+_level0_call:
+       call    save
+       jmp     (_level0_func)
+
 !*===========================================================================*
 !*                             data                                         *
 !*===========================================================================*
 
 .sect .rom     ! Before the string table please
        .data2  0x526F          ! this must be the first data entry (magic #)
-#if VM_KERN_NOPAGEZERO
-.align I386_PAGE_SIZE
-#endif
 
 .sect .bss
 k_stack:
@@ -581,7 +603,11 @@ k_stack:
 k_stktop:                      ! top of kernel stack
        .comm   ex_number, 4
        .comm   trap_errno, 4
+       .comm   old_eip_ptr, 4
+       .comm   old_eax_ptr, 4
        .comm   old_eip, 4
        .comm   old_cs, 4
        .comm   old_eflags, 4
+       .comm   pagefaultcr2, 4
+       .comm   loadedcr3, 4
 
index 398f8d19c60ed5679602e5b6e6096bd3a3277100..b2ae9eaff8959879ac264529957a47637437480a 100755 (executable)
@@ -167,6 +167,11 @@ PUBLIC void prot_init(void)
   unsigned ldt_index;
   register struct proc *rp;
 
+  /* Click-round kernel. */
+  if(kinfo.data_base % CLICK_SIZE)
+       minix_panic("kinfo.data_base not aligned", NO_NUM);
+  kinfo.data_size = ((kinfo.data_size+CLICK_SIZE-1)/CLICK_SIZE) * CLICK_SIZE;
+
   /* Build gdt and idt pointers in GDT where the BIOS expects them. */
   dtp= (struct desctableptr_s *) &gdt[GDT_INDEX];
   * (u16_t *) dtp->limit = (sizeof gdt) - 1;
@@ -334,3 +339,118 @@ PUBLIC void alloc_segments(register struct proc *rp)
       rp->p_reg.ds = (DS_LDT_INDEX*DESC_SIZE) | TI | privilege;
 }
 
+/*===========================================================================*
+ *                             printseg                             *
+ *===========================================================================*/
+PUBLIC void printseg(char *banner, int iscs, struct proc *pr, u32_t selector)
+{
+       u32_t base, limit, index, dpl;
+       struct segdesc_s *desc;
+
+       if(banner) { kprintf("%s", banner); }
+
+       index = selector >> 3;
+
+       kprintf("RPL %d, ind %d of ",
+               (selector & RPL_MASK), index);
+
+       if(selector & TI) {
+               kprintf("LDT");
+               if(index < 0 || index >= LDT_SIZE) {
+                       kprintf("invalid index in ldt\n");
+                       return;
+               }
+               desc = &pr->p_seg.p_ldt[index];
+       } else {
+               kprintf("GDT");
+               if(index < 0 || index >= GDT_SIZE) {
+                       kprintf("invalid index in gdt\n");
+                       return;
+               }
+               desc = &gdt[index];
+       }
+
+       limit = desc->limit_low |
+               (((u32_t) desc->granularity & LIMIT_HIGH) << GRANULARITY_SHIFT);
+
+       if(desc->granularity & GRANULAR) {
+               limit = (limit << PAGE_GRAN_SHIFT) + 0xfff;
+       }
+
+       base = desc->base_low | 
+               ((u32_t) desc->base_middle << BASE_MIDDLE_SHIFT) |
+               ((u32_t) desc->base_high << BASE_HIGH_SHIFT);
+
+       kprintf(" -> base 0x%08lx size 0x%08lx ", base, limit+1);
+
+       if(iscs) {
+               if(!(desc->granularity & BIG))
+                       kprintf("16bit ");
+       } else {
+               if(!(desc->granularity & BIG)) 
+                       kprintf("not big ");
+       }
+
+       if(desc->granularity & 0x20) {  /* reserved */
+               minix_panic("granularity reserved field set", NO_NUM);
+       }
+
+       if(!(desc->access & PRESENT))
+               kprintf("notpresent ");
+
+       if(!(desc->access & SEGMENT))
+               kprintf("system ");
+
+       if(desc->access & EXECUTABLE) {
+               kprintf("   exec ");
+               if(desc->access & CONFORMING) kprintf("conforming ");
+               if(!(desc->access & READABLE)) kprintf("non-readable ");
+       } else {
+               kprintf("nonexec ");
+               if(desc->access & EXPAND_DOWN) kprintf("non-expand-down ");
+               if(!(desc->access & WRITEABLE)) kprintf("non-writable ");
+       }
+
+       if(!(desc->access & ACCESSED)) {
+               kprintf("nonacc ");
+       }
+
+       dpl = ((u32_t) desc->access & DPL) >> DPL_SHIFT;
+
+       kprintf("DPL %d\n", dpl);
+
+       return;
+}
+
+/*===========================================================================*
+ *                             prot_set_kern_seg_limit                      *
+ *===========================================================================*/
+PUBLIC int prot_set_kern_seg_limit(vir_bytes limit)
+{
+       struct proc *rp;
+       vir_bytes prev;
+       int orig_click;
+       int incr_clicks;
+
+       if(limit <= kinfo.data_base) {
+               kprintf("prot_set_kern_seg_limit: limit bogus\n");
+               return EINVAL;
+       }
+
+       /* Do actual increase. */
+       orig_click = kinfo.data_size / CLICK_SIZE;
+       kinfo.data_size = limit - kinfo.data_base;
+       incr_clicks = kinfo.data_size / CLICK_SIZE - orig_click;
+
+       prot_init();
+
+       /* Increase kernel processes too. */
+       for (rp = BEG_PROC_ADDR; rp < END_PROC_ADDR; ++rp) {
+               if (RTS_ISSET(rp, SLOT_FREE) || !iskernelp(rp))
+                       continue;
+               rp->p_memmap[S].mem_len += incr_clicks;
+               alloc_segments(rp);
+       }
+
+       return OK;
+}
index 8b18f0c9166ef8468a5d25607635bc8cd30eb585..5ab71ea578394ecf3b2a6ee23c85b4db8529394c 100644 (file)
@@ -49,11 +49,17 @@ _PROTOTYPE( void vir_insb, (u16_t port, struct proc *proc, u32_t vir, size_t cou
 _PROTOTYPE( void vir_outsb, (u16_t port, struct proc *proc, u32_t vir, size_t count));
 _PROTOTYPE( void vir_insw, (u16_t port, struct proc *proc, u32_t vir, size_t count));
 _PROTOTYPE( void vir_outsw, (u16_t port, struct proc *proc, u32_t vir, size_t count));
+_PROTOTYPE( void i386_updatepde, (int pde, u32_t val));
+_PROTOTYPE( void i386_freepde, (int pde));
+_PROTOTYPE( void getcr3val, (void));
+_PROTOTYPE( void switchedcr3, (void));
+_PROTOTYPE( void vm_set_cr3, (struct proc *));
 
 
 /* exception.c */
 _PROTOTYPE( void exception, (unsigned vec_nr, u32_t trap_errno,
-       u32_t old_eip, U16_t old_cs, u32_t old_eflags)                  );
+       u32_t old_eip, U16_t old_cs, u32_t old_eflags,
+       u32_t *old_eip_ptr, u32_t *old_eax_ptr, u32_t pagefaultcr2)     );
 
 /* klib386.s */
 _PROTOTYPE( void level0, (void (*func)(void))                           );
@@ -70,7 +76,12 @@ _PROTOTYPE( void phys_insb, (U16_t port, phys_bytes buf, size_t count)  );
 _PROTOTYPE( void phys_insw, (U16_t port, phys_bytes buf, size_t count)  );
 _PROTOTYPE( void phys_outsb, (U16_t port, phys_bytes buf, size_t count) );
 _PROTOTYPE( void phys_outsw, (U16_t port, phys_bytes buf, size_t count) );
-_PROTOTYPE( void i386_invlpg, (U32_t addr) );
+_PROTOTYPE( void i386_invlpg_level0, (void) );
+_PROTOTYPE( int _memcpy_k, (void *dst, void *src, size_t n) );
+_PROTOTYPE( int _memcpy_k_fault, (void) );
+_PROTOTYPE( u32_t read_cr3, (void) );
+_PROTOTYPE( void reload_cr3, (void) );
+_PROTOTYPE( void phys_memset, (phys_bytes ph, u32_t c, phys_bytes bytes)       );
 
 /* protect.c */
 _PROTOTYPE( void prot_init, (void)                                             );
@@ -79,6 +90,8 @@ _PROTOTYPE( void init_codeseg, (struct segdesc_s *segdp, phys_bytes base,
 _PROTOTYPE( void init_dataseg, (struct segdesc_s *segdp, phys_bytes base,
                 vir_bytes size, int privilege)                          );
 _PROTOTYPE( void enable_iop, (struct proc *pp)                          );
+_PROTOTYPE( int prot_set_kern_seg_limit, (vir_bytes limit)             );
+_PROTOTYPE( void printseg, (char *banner, int iscs, struct proc *pr, u32_t selector)             );
 
 /* prototype of an interrupt vector table entry */
 struct gate_table_s {
diff --git a/kernel/arch/i386/sha1.h b/kernel/arch/i386/sha1.h
new file mode 100644 (file)
index 0000000..dbfdff3
--- /dev/null
@@ -0,0 +1,551 @@
+/* sha1.c : Implementation of the Secure Hash Algorithm */
+
+/* SHA: NIST's Secure Hash Algorithm */
+
+/*     This version written November 2000 by David Ireland of 
+       DI Management Services Pty Limited <code@di-mgt.com.au>
+
+       Adapted from code in the Python Cryptography Toolkit, 
+       version 1.0.0 by A.M. Kuchling 1995.
+*/
+
+/* AM Kuchling's posting:- 
+   Based on SHA code originally posted to sci.crypt by Peter Gutmann
+   in message <30ajo5$oe8@ccu2.auckland.ac.nz>.
+   Modified to test for endianness on creation of SHA objects by AMK.
+   Also, the original specification of SHA was found to have a weakness
+   by NSA/NIST.  This code implements the fixed version of SHA.
+*/
+
+/* Here's the first paragraph of Peter Gutmann's posting:
+   
+The following is my SHA (FIPS 180) code updated to allow use of the "fixed"
+SHA, thanks to Jim Gillogly and an anonymous contributor for the information on
+what's changed in the new version.  The fix is a simple change which involves
+adding a single rotate in the initial expansion function.  It is unknown
+whether this is an optimal solution to the problem which was discovered in the
+SHA or whether it's simply a bandaid which fixes the problem with a minimum of
+effort (for example the reengineering of a great many Capstone chips).
+*/
+
+/* h files included here to make this just one file ... */
+
+/* global.h */
+
+#ifndef _GLOBAL_H_
+#define _GLOBAL_H_ 1
+
+/* POINTER defines a generic pointer type */
+typedef unsigned char *POINTER;
+
+/* UINT4 defines a four byte word */
+typedef unsigned long int UINT4;
+
+/* SHA1BYTE defines a unsigned character */
+typedef unsigned char SHA1BYTE;
+
+#endif /* end _GLOBAL_H_ */
+
+/* sha.h */
+
+#ifndef _SHA_H_
+#define _SHA_H_ 1
+
+/* #include "global.h" */
+
+/* The structure for storing SHS info */
+
+typedef struct 
+{
+       UINT4 digest[ 5 ];            /* Message digest */
+       UINT4 countLo, countHi;       /* 64-bit bit count */
+       UINT4 data[ 16 ];             /* SHS data buffer */
+       int Endianness;
+} SHA_CTX;
+
+/* Message digest functions */
+
+void SHAInit(SHA_CTX *);
+void SHAUpdate(SHA_CTX *, SHA1BYTE *buffer, int count);
+void SHAFinal(SHA1BYTE *output, SHA_CTX *);
+
+#endif /* end _SHA_H_ */
+
+/* endian.h */
+
+#ifndef _ENDIAN_H_
+#define _ENDIAN_H_ 1
+
+void endianTest(int *endianness);
+
+#endif /* end _ENDIAN_H_ */
+
+
+/* sha.c */
+
+#include <stdio.h>
+#include <string.h>
+
+static void SHAtoByte(SHA1BYTE *output, UINT4 *input, unsigned int len);
+
+/* The SHS block size and message digest sizes, in bytes */
+
+#define SHS_DATASIZE    64
+#define SHS_DIGESTSIZE  20
+
+
+/* The SHS f()-functions.  The f1 and f3 functions can be optimized to
+   save one boolean operation each - thanks to Rich Schroeppel,
+   rcs@cs.arizona.edu for discovering this */
+
+/*#define f1(x,y,z) ( ( x & y ) | ( ~x & z ) )          // Rounds  0-19 */
+#define f1(x,y,z)   ( z ^ ( x & ( y ^ z ) ) )           /* Rounds  0-19 */
+#define f2(x,y,z)   ( x ^ y ^ z )                       /* Rounds 20-39 */
+/*#define f3(x,y,z) ( ( x & y ) | ( x & z ) | ( y & z ) )   // Rounds 40-59 */
+#define f3(x,y,z)   ( ( x & y ) | ( z & ( x | y ) ) )   /* Rounds 40-59 */
+#define f4(x,y,z)   ( x ^ y ^ z )                       /* Rounds 60-79 */
+
+/* The SHS Mysterious Constants */
+
+#define K1  0x5A827999L                                 /* Rounds  0-19 */
+#define K2  0x6ED9EBA1L                                 /* Rounds 20-39 */
+#define K3  0x8F1BBCDCL                                 /* Rounds 40-59 */
+#define K4  0xCA62C1D6L                                 /* Rounds 60-79 */
+
+/* SHS initial values */
+
+#define h0init  0x67452301L
+#define h1init  0xEFCDAB89L
+#define h2init  0x98BADCFEL
+#define h3init  0x10325476L
+#define h4init  0xC3D2E1F0L
+
+/* Note that it may be necessary to add parentheses to these macros if they
+   are to be called with expressions as arguments */
+/* 32-bit rotate left - kludged with shifts */
+
+#define ROTL(n,X)  ( ( ( X ) << n ) | ( ( X ) >> ( 32 - n ) ) )
+
+/* The initial expanding function.  The hash function is defined over an
+   80-UINT2 expanded input array W, where the first 16 are copies of the input
+   data, and the remaining 64 are defined by
+
+        W[ i ] = W[ i - 16 ] ^ W[ i - 14 ] ^ W[ i - 8 ] ^ W[ i - 3 ]
+
+   This implementation generates these values on the fly in a circular
+   buffer - thanks to Colin Plumb, colin@nyx10.cs.du.edu for this
+   optimization.
+
+   The updated SHS changes the expanding function by adding a rotate of 1
+   bit.  Thanks to Jim Gillogly, jim@rand.org, and an anonymous contributor
+   for this information */
+
+#define expand(W,i) ( W[ i & 15 ] = ROTL( 1, ( W[ i & 15 ] ^ W[ (i - 14) & 15 ] ^ \
+                                                 W[ (i - 8) & 15 ] ^ W[ (i - 3) & 15 ] ) ) )
+
+
+/* The prototype SHS sub-round.  The fundamental sub-round is:
+
+        a' = e + ROTL( 5, a ) + f( b, c, d ) + k + data;
+        b' = a;
+        c' = ROTL( 30, b );
+        d' = c;
+        e' = d;
+
+   but this is implemented by unrolling the loop 5 times and renaming the
+   variables ( e, a, b, c, d ) = ( a', b', c', d', e' ) each iteration.
+   This code is then replicated 20 times for each of the 4 functions, using
+   the next 20 values from the W[] array each time */
+
+#define subRound(a, b, c, d, e, f, k, data) \
+    ( e += ROTL( 5, a ) + f( b, c, d ) + k + data, b = ROTL( 30, b ) )
+
+/* Initialize the SHS values */
+
+void SHAInit(SHA_CTX *shsInfo)
+{
+    endianTest(&shsInfo->Endianness);
+    /* Set the h-vars to their initial values */
+    shsInfo->digest[ 0 ] = h0init;
+    shsInfo->digest[ 1 ] = h1init;
+    shsInfo->digest[ 2 ] = h2init;
+    shsInfo->digest[ 3 ] = h3init;
+    shsInfo->digest[ 4 ] = h4init;
+
+    /* Initialise bit count */
+    shsInfo->countLo = shsInfo->countHi = 0;
+}
+
+/* Perform the SHS transformation.  Note that this code, like MD5, seems to
+   break some optimizing compilers due to the complexity of the expressions
+   and the size of the basic block.  It may be necessary to split it into
+   sections, e.g. based on the four subrounds
+
+   Note that this corrupts the shsInfo->data area */
+
+static void SHSTransform( UINT4 *digest, UINT4 *data )
+    {
+    UINT4 A, B, C, Dv, E;     /* Local vars */
+    UINT4 eData[ 16 ];       /* Expanded data */
+
+    /* Set up first buffer and local data buffer */
+    A = digest[ 0 ];
+    B = digest[ 1 ];
+    C = digest[ 2 ];
+    Dv = digest[ 3 ];
+    E = digest[ 4 ];
+    memcpy( (POINTER)eData, (POINTER)data, SHS_DATASIZE );
+
+    /* Heavy mangling, in 4 sub-rounds of 20 interations each. */
+    subRound( A, B, C, Dv, E, f1, K1, eData[  0 ] );
+    subRound( E, A, B, C, Dv, f1, K1, eData[  1 ] );
+    subRound( Dv, E, A, B, C, f1, K1, eData[  2 ] );
+    subRound( C, Dv, E, A, B, f1, K1, eData[  3 ] );
+    subRound( B, C, Dv, E, A, f1, K1, eData[  4 ] );
+    subRound( A, B, C, Dv, E, f1, K1, eData[  5 ] );
+    subRound( E, A, B, C, Dv, f1, K1, eData[  6 ] );
+    subRound( Dv, E, A, B, C, f1, K1, eData[  7 ] );
+    subRound( C, Dv, E, A, B, f1, K1, eData[  8 ] );
+    subRound( B, C, Dv, E, A, f1, K1, eData[  9 ] );
+    subRound( A, B, C, Dv, E, f1, K1, eData[ 10 ] );
+    subRound( E, A, B, C, Dv, f1, K1, eData[ 11 ] );
+    subRound( Dv, E, A, B, C, f1, K1, eData[ 12 ] );
+    subRound( C, Dv, E, A, B, f1, K1, eData[ 13 ] );
+    subRound( B, C, Dv, E, A, f1, K1, eData[ 14 ] );
+    subRound( A, B, C, Dv, E, f1, K1, eData[ 15 ] );
+    subRound( E, A, B, C, Dv, f1, K1, expand( eData, 16 ) );
+    subRound( Dv, E, A, B, C, f1, K1, expand( eData, 17 ) );
+    subRound( C, Dv, E, A, B, f1, K1, expand( eData, 18 ) );
+    subRound( B, C, Dv, E, A, f1, K1, expand( eData, 19 ) );
+
+    subRound( A, B, C, Dv, E, f2, K2, expand( eData, 20 ) );
+    subRound( E, A, B, C, Dv, f2, K2, expand( eData, 21 ) );
+    subRound( Dv, E, A, B, C, f2, K2, expand( eData, 22 ) );
+    subRound( C, Dv, E, A, B, f2, K2, expand( eData, 23 ) );
+    subRound( B, C, Dv, E, A, f2, K2, expand( eData, 24 ) );
+    subRound( A, B, C, Dv, E, f2, K2, expand( eData, 25 ) );
+    subRound( E, A, B, C, Dv, f2, K2, expand( eData, 26 ) );
+    subRound( Dv, E, A, B, C, f2, K2, expand( eData, 27 ) );
+    subRound( C, Dv, E, A, B, f2, K2, expand( eData, 28 ) );
+    subRound( B, C, Dv, E, A, f2, K2, expand( eData, 29 ) );
+    subRound( A, B, C, Dv, E, f2, K2, expand( eData, 30 ) );
+    subRound( E, A, B, C, Dv, f2, K2, expand( eData, 31 ) );
+    subRound( Dv, E, A, B, C, f2, K2, expand( eData, 32 ) );
+    subRound( C, Dv, E, A, B, f2, K2, expand( eData, 33 ) );
+    subRound( B, C, Dv, E, A, f2, K2, expand( eData, 34 ) );
+    subRound( A, B, C, Dv, E, f2, K2, expand( eData, 35 ) );
+    subRound( E, A, B, C, Dv, f2, K2, expand( eData, 36 ) );
+    subRound( Dv, E, A, B, C, f2, K2, expand( eData, 37 ) );
+    subRound( C, Dv, E, A, B, f2, K2, expand( eData, 38 ) );
+    subRound( B, C, Dv, E, A, f2, K2, expand( eData, 39 ) );
+
+    subRound( A, B, C, Dv, E, f3, K3, expand( eData, 40 ) );
+    subRound( E, A, B, C, Dv, f3, K3, expand( eData, 41 ) );
+    subRound( Dv, E, A, B, C, f3, K3, expand( eData, 42 ) );
+    subRound( C, Dv, E, A, B, f3, K3, expand( eData, 43 ) );
+    subRound( B, C, Dv, E, A, f3, K3, expand( eData, 44 ) );
+    subRound( A, B, C, Dv, E, f3, K3, expand( eData, 45 ) );
+    subRound( E, A, B, C, Dv, f3, K3, expand( eData, 46 ) );
+    subRound( Dv, E, A, B, C, f3, K3, expand( eData, 47 ) );
+    subRound( C, Dv, E, A, B, f3, K3, expand( eData, 48 ) );
+    subRound( B, C, Dv, E, A, f3, K3, expand( eData, 49 ) );
+    subRound( A, B, C, Dv, E, f3, K3, expand( eData, 50 ) );
+    subRound( E, A, B, C, Dv, f3, K3, expand( eData, 51 ) );
+    subRound( Dv, E, A, B, C, f3, K3, expand( eData, 52 ) );
+    subRound( C, Dv, E, A, B, f3, K3, expand( eData, 53 ) );
+    subRound( B, C, Dv, E, A, f3, K3, expand( eData, 54 ) );
+    subRound( A, B, C, Dv, E, f3, K3, expand( eData, 55 ) );
+    subRound( E, A, B, C, Dv, f3, K3, expand( eData, 56 ) );
+    subRound( Dv, E, A, B, C, f3, K3, expand( eData, 57 ) );
+    subRound( C, Dv, E, A, B, f3, K3, expand( eData, 58 ) );
+    subRound( B, C, Dv, E, A, f3, K3, expand( eData, 59 ) );
+
+    subRound( A, B, C, Dv, E, f4, K4, expand( eData, 60 ) );
+    subRound( E, A, B, C, Dv, f4, K4, expand( eData, 61 ) );
+    subRound( Dv, E, A, B, C, f4, K4, expand( eData, 62 ) );
+    subRound( C, Dv, E, A, B, f4, K4, expand( eData, 63 ) );
+    subRound( B, C, Dv, E, A, f4, K4, expand( eData, 64 ) );
+    subRound( A, B, C, Dv, E, f4, K4, expand( eData, 65 ) );
+    subRound( E, A, B, C, Dv, f4, K4, expand( eData, 66 ) );
+    subRound( Dv, E, A, B, C, f4, K4, expand( eData, 67 ) );
+    subRound( C, Dv, E, A, B, f4, K4, expand( eData, 68 ) );
+    subRound( B, C, Dv, E, A, f4, K4, expand( eData, 69 ) );
+    subRound( A, B, C, Dv, E, f4, K4, expand( eData, 70 ) );
+    subRound( E, A, B, C, Dv, f4, K4, expand( eData, 71 ) );
+    subRound( Dv, E, A, B, C, f4, K4, expand( eData, 72 ) );
+    subRound( C, Dv, E, A, B, f4, K4, expand( eData, 73 ) );
+    subRound( B, C, Dv, E, A, f4, K4, expand( eData, 74 ) );
+    subRound( A, B, C, Dv, E, f4, K4, expand( eData, 75 ) );
+    subRound( E, A, B, C, Dv, f4, K4, expand( eData, 76 ) );
+    subRound( Dv, E, A, B, C, f4, K4, expand( eData, 77 ) );
+    subRound( C, Dv, E, A, B, f4, K4, expand( eData, 78 ) );
+    subRound( B, C, Dv, E, A, f4, K4, expand( eData, 79 ) );
+
+    /* Build message digest */
+    digest[ 0 ] += A;
+    digest[ 1 ] += B;
+    digest[ 2 ] += C;
+    digest[ 3 ] += Dv;
+    digest[ 4 ] += E;
+    }
+
+/* When run on a little-endian CPU we need to perform byte reversal on an
+   array of long words. */
+
+static void longReverse(UINT4 *buffer, int byteCount, int Endianness )
+{
+    UINT4 value;
+
+    if (Endianness) return;
+    byteCount /= sizeof( UINT4 );
+    while( byteCount-- )
+        {
+        value = *buffer;
+        value = ( ( value & 0xFF00FF00L ) >> 8  ) | \
+                ( ( value & 0x00FF00FFL ) << 8 );
+        *buffer++ = ( value << 16 ) | ( value >> 16 );
+        }
+}
+
+/* Update SHS for a block of data */
+
+void SHAUpdate(SHA_CTX *shsInfo, SHA1BYTE *buffer, int count)
+{
+    UINT4 tmp;
+    int dataCount;
+
+    /* Update bitcount */
+    tmp = shsInfo->countLo;
+    if ( ( shsInfo->countLo = tmp + ( ( UINT4 ) count << 3 ) ) < tmp )
+        shsInfo->countHi++;             /* Carry from low to high */
+    shsInfo->countHi += count >> 29;
+
+    /* Get count of bytes already in data */
+    dataCount = ( int ) ( tmp >> 3 ) & 0x3F;
+
+    /* Handle any leading odd-sized chunks */
+    if( dataCount )
+        {
+        SHA1BYTE *p = ( SHA1BYTE * ) shsInfo->data + dataCount;
+
+        dataCount = SHS_DATASIZE - dataCount;
+        if( count < dataCount )
+            {
+            memcpy( p, buffer, count );
+            return;
+            }
+        memcpy( p, buffer, dataCount );
+        longReverse( shsInfo->data, SHS_DATASIZE, shsInfo->Endianness);
+        SHSTransform( shsInfo->digest, shsInfo->data );
+        buffer += dataCount;
+        count -= dataCount;
+        }
+
+    /* Process data in SHS_DATASIZE chunks */
+    while( count >= SHS_DATASIZE )
+        {
+        memcpy( (POINTER)shsInfo->data, (POINTER)buffer, SHS_DATASIZE );
+        longReverse( shsInfo->data, SHS_DATASIZE, shsInfo->Endianness );
+        SHSTransform( shsInfo->digest, shsInfo->data );
+        buffer += SHS_DATASIZE;
+        count -= SHS_DATASIZE;
+        }
+
+    /* Handle any remaining bytes of data. */
+    memcpy( (POINTER)shsInfo->data, (POINTER)buffer, count );
+    }
+
+/* Final wrapup - pad to SHS_DATASIZE-byte boundary with the bit pattern
+   1 0* (64-bit count of bits processed, MSB-first) */
+
+void SHAFinal(SHA1BYTE *output, SHA_CTX *shsInfo)
+{
+    int count;
+    SHA1BYTE *dataPtr;
+
+    /* Compute number of bytes mod 64 */
+    count = ( int ) shsInfo->countLo;
+    count = ( count >> 3 ) & 0x3F;
+
+    /* Set the first char of padding to 0x80.  This is safe since there is
+       always at least one byte free */
+    dataPtr = ( SHA1BYTE * ) shsInfo->data + count;
+    *dataPtr++ = 0x80;
+
+    /* Bytes of padding needed to make 64 bytes */
+    count = SHS_DATASIZE - 1 - count;
+
+    /* Pad out to 56 mod 64 */
+    if( count < 8 )
+        {
+        /* Two lots of padding:  Pad the first block to 64 bytes */
+        memset( dataPtr, 0, count );
+        longReverse( shsInfo->data, SHS_DATASIZE, shsInfo->Endianness );
+        SHSTransform( shsInfo->digest, shsInfo->data );
+
+        /* Now fill the next block with 56 bytes */
+        memset( (POINTER)shsInfo->data, 0, SHS_DATASIZE - 8 );
+        }
+    else
+        /* Pad block to 56 bytes */
+        memset( dataPtr, 0, count - 8 );
+
+    /* Append length in bits and transform */
+    shsInfo->data[ 14 ] = shsInfo->countHi;
+    shsInfo->data[ 15 ] = shsInfo->countLo;
+
+    longReverse( shsInfo->data, SHS_DATASIZE - 8, shsInfo->Endianness );
+    SHSTransform( shsInfo->digest, shsInfo->data );
+
+       /* Output to an array of bytes */
+       SHAtoByte(output, shsInfo->digest, SHS_DIGESTSIZE);
+
+       /* Zeroise sensitive stuff */
+       memset((POINTER)shsInfo, 0, sizeof(shsInfo));
+}
+
+static void SHAtoByte(SHA1BYTE *output, UINT4 *input, unsigned int len)
+{      /* Output SHA digest in byte array */
+       unsigned int i, j;
+
+       for(i = 0, j = 0; j < len; i++, j += 4) 
+       {
+        output[j+3] = (SHA1BYTE)( input[i]        & 0xff);
+        output[j+2] = (SHA1BYTE)((input[i] >> 8 ) & 0xff);
+        output[j+1] = (SHA1BYTE)((input[i] >> 16) & 0xff);
+        output[j  ] = (SHA1BYTE)((input[i] >> 24) & 0xff);
+       }
+}
+
+
+unsigned char digest[SHS_DIGESTSIZE];
+unsigned char testmessage[3] = {'a', 'b', 'c' };
+unsigned char *mess56 = (unsigned char *)
+       "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq";
+
+/* Correct solutions from FIPS PUB 180-1 */
+char *dig1 = "A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D";
+char *dig2 = "84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1";
+char *dig3 = "34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F";
+
+/* Output should look like:-
+ a9993e36 4706816a ba3e2571 7850c26c 9cd0d89d
+ A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D <= correct
+ 84983e44 1c3bd26e baae4aa1 f95129e5 e54670f1
+ 84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1 <= correct
+ 34aa973c d4c4daa4 f61eeb2b dbad2731 6534016f
+ 34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F <= correct
+*/
+
+void sha1test(void)
+{
+       SHA_CTX sha;
+       int i;
+       SHA1BYTE big[1000];
+
+       SHAInit(&sha);
+       SHAUpdate(&sha, testmessage, 3);
+       SHAFinal(digest, &sha);
+
+       for (i = 0; i < SHS_DIGESTSIZE; i++)
+       {
+               if ((i % 4) == 0) printf(" ");
+               printf("%02x", digest[i]);
+       }
+       printf("\n");
+       printf(" %s <= correct\n", dig1);
+
+       SHAInit(&sha);
+       SHAUpdate(&sha, mess56, 56);
+       SHAFinal(digest, &sha);
+
+       for (i = 0; i < SHS_DIGESTSIZE; i++)
+       {
+               if ((i % 4) == 0) printf(" ");
+               printf("%02x", digest[i]);
+       }
+       printf("\n");
+       printf(" %s <= correct\n", dig2);
+
+       /* Fill up big array */
+       for (i = 0; i < 1000; i++)
+               big[i] = 'a';
+
+       SHAInit(&sha);
+       /* Digest 1 million x 'a' */
+       for (i = 0; i < 1000; i++)
+               SHAUpdate(&sha, big, 1000);
+       SHAFinal(digest, &sha);
+
+       for (i = 0; i < SHS_DIGESTSIZE; i++)
+       {
+               if ((i % 4) == 0) printf(" ");
+               printf("%02x", digest[i]);
+       }
+       printf("\n");
+       printf(" %s <= correct\n", dig3);
+}
+
+/* endian.c */
+
+void endianTest(int *endian_ness)
+{
+       if((*(unsigned short *) ("#S") >> 8) == '#')
+       {
+               /* printf("Big endian = no change\n"); */
+               *endian_ness = !(0);
+       }
+       else
+       {
+               /* printf("Little endian = swap\n"); */
+               *endian_ness = 0;
+       }
+}
+
+static char *
+sha1print(char *digest)
+{
+       int i;
+       for(i = 0; i < SHS_DIGESTSIZE; i++) {
+               printf("%02x", (unsigned char) digest[i]);
+       }
+       printf("\n");
+}
+
+static int
+phys_sha1(unsigned long ptr, unsigned long bytes, unsigned char *digest)
+{
+       unsigned long addr = 0;
+       SHA_CTX sha;
+
+       SHAInit(&sha);
+
+       while(bytes > 0) {
+               unsigned long chunk;
+               static unsigned char buf[1024];
+               chunk = bytes > sizeof(buf) ? sizeof(buf) : bytes;
+               PHYS_COPY_CATCH(ptr, vir2phys(buf), chunk, addr);
+               if(addr) {
+                       return EFAULT;
+               }
+               SHAUpdate(&sha, buf, chunk);
+               ptr += chunk;
+               bytes -= chunk;
+       }
+
+       SHAFinal(digest, &sha);
+       return OK;
+}
+
+static void
+sha1(unsigned char *ptr, unsigned long bytes, unsigned char *digest)
+{
+       SHA_CTX sha;
+
+       SHAInit(&sha);
+       SHAUpdate(&sha, ptr, bytes);
+       SHAFinal(digest, &sha);
+
+       return;
+}
+
index 805e4d45175cd3d20452682464de246bb4b912b6..80a7fb9a3227cd538bfea067a3b86738871c664c 100644 (file)
 
 #include "proto.h"
 #include "../../proc.h"
+#include "../../debug.h"
 
 #define CR0_EM 0x0004          /* set to enable trap on any FP instruction */
 
 FORWARD _PROTOTYPE( void ser_debug, (int c));
-FORWARD _PROTOTYPE( void ser_dump_stats, (void));
 
 PUBLIC void arch_shutdown(int how)
 {
@@ -137,82 +137,143 @@ PUBLIC void do_ser_debug()
        ser_debug(c);
 }
 
+PRIVATE void ser_dump_queues(void)
+{
+       int q;
+       for(q = 0; q < NR_SCHED_QUEUES; q++) {
+               struct proc *p;
+               if(rdy_head[q]) 
+                       printf("%2d: ", q);
+               for(p = rdy_head[q]; p; p = p->p_nextready) {
+                       printf("%s / %d  ", p->p_name, p->p_endpoint);
+               }
+               printf("\n");
+       }
+
+}
+
+PRIVATE void ser_dump_segs(void)
+{
+       struct proc *pp;
+       for (pp= BEG_PROC_ADDR; pp < END_PROC_ADDR; pp++)
+       {
+               if (pp->p_rts_flags & SLOT_FREE)
+                       continue;
+               kprintf("%d: %s ep %d\n", proc_nr(pp), pp->p_name, pp->p_endpoint);
+               printseg("cs: ", 1, pp, pp->p_reg.cs);
+               printseg("ds: ", 0, pp, pp->p_reg.ds);
+               if(pp->p_reg.ss != pp->p_reg.ds) {
+                       printseg("ss: ", 0, pp, pp->p_reg.ss);
+               }
+       }
+}
+
 PRIVATE void ser_debug(int c)
 {
+       int u = 0;
+
        do_serial_debug++;
-       kprintf("ser_debug: %d\n", c);
+       /* Disable interrupts so that we get a consistent state. */
+       if(!intr_disabled()) { lock; u = 1; };
+
        switch(c)
        {
        case '1':
                ser_dump_proc();
                break;
        case '2':
-               ser_dump_stats();
+               ser_dump_queues();
                break;
+       case '3':
+               ser_dump_segs();
+               break;
+#if DEBUG_TRACE
+#define TOGGLECASE(ch, flag)                           \
+       case ch: {                                      \
+               if(verboseflags & flag) {               \
+                       verboseflags &= ~flag;          \
+                       printf("%s disabled\n", #flag); \
+               } else {                                \
+                       verboseflags |= flag;           \
+                       printf("%s enabled\n", #flag);  \
+               }                                       \
+               break;                                  \
+               }
+       TOGGLECASE('8', VF_SCHEDULING)
+       TOGGLECASE('9', VF_PICKPROC)
+#endif
        }
        do_serial_debug--;
+       if(u) { unlock; }
 }
 
-PUBLIC void ser_dump_proc()
+PRIVATE void printslot(struct proc *pp, int level)
 {
-       struct proc *pp;
-       int u = 0;
+       struct proc *depproc = NULL;
+       int dep = NONE;
+#define COL { int i; for(i = 0; i < level; i++) printf("> "); }
 
-       /* Disable interrupts so that we get a consistent state. */
-       if(!intr_disabled()) { lock; u = 1; };
+       if(level >= NR_PROCS) {
+               kprintf("loop??\n");
+               return;
+       }
 
-       for (pp= BEG_PROC_ADDR; pp < END_PROC_ADDR; pp++)
-       {
-               if (pp->p_rts_flags & SLOT_FREE)
-                       continue;
-               kprintf(
-       "%d: 0x%02x %s e %d src %d dst %d prio %d/%d time %d/%d EIP 0x%x\n",
-                       proc_nr(pp),
-                       pp->p_rts_flags, pp->p_name,
-                       pp->p_endpoint, pp->p_getfrom_e, pp->p_sendto_e,
-                       pp->p_priority, pp->p_max_priority,
-                       pp->p_user_time, pp->p_sys_time, 
-                       pp->p_reg.pc);
-               proc_stacktrace(pp);
+       COL
+
+       kprintf("%d: %s %d prio %d/%d time %d/%d cr3 0x%lx rts %s misc %s",
+               proc_nr(pp), pp->p_name, pp->p_endpoint, 
+               pp->p_priority, pp->p_max_priority, pp->p_user_time,
+               pp->p_sys_time, pp->p_seg.p_cr3,
+               rtsflagstr(pp->p_rts_flags), miscflagstr(pp->p_misc_flags));
+
+       if(pp->p_rts_flags & SENDING) {
+               dep = pp->p_sendto_e;
+               kprintf(" to: ");
+       } else if(pp->p_rts_flags & RECEIVING) {
+               dep = pp->p_getfrom_e;
+               kprintf(" from: ");
        }
 
-       if(u) { unlock; }
+       if(dep != NONE) {
+               if(dep == ANY) {
+                       kprintf(" ANY\n");
+               } else {
+                       int procno;
+                       if(!isokendpt(dep, &procno)) {
+                               kprintf(" ??? %d\n", dep);
+                       } else {
+                               depproc = proc_addr(procno);
+                               if(depproc->p_rts_flags & SLOT_FREE) {
+                                       kprintf(" empty slot %d???\n", procno);
+                                       depproc = NULL;
+                               } else {
+                                       kprintf(" %s\n", depproc->p_name);
+                               }
+                       }
+               }
+       } else {
+               kprintf("\n");
+       }
+
+       COL
+       proc_stacktrace(pp);
+
+
+       if(depproc)
+               printslot(depproc, level+1);
 }
 
-PRIVATE void ser_dump_stats()
+
+PUBLIC void ser_dump_proc()
 {
-       kprintf("ipc_stats:\n");
-       kprintf("deadproc: %d\n", ipc_stats.deadproc);
-       kprintf("bad_endpoint: %d\n", ipc_stats.bad_endpoint);
-       kprintf("dst_not_allowed: %d\n", ipc_stats.dst_not_allowed);
-       kprintf("bad_call: %d\n", ipc_stats.bad_call);
-       kprintf("call_not_allowed: %d\n", ipc_stats.call_not_allowed);
-       kprintf("bad_buffer: %d\n", ipc_stats.bad_buffer);
-       kprintf("deadlock: %d\n", ipc_stats.deadlock);
-       kprintf("not_ready: %d\n", ipc_stats.not_ready);
-       kprintf("src_died: %d\n", ipc_stats.src_died);
-       kprintf("dst_died: %d\n", ipc_stats.dst_died);
-       kprintf("no_priv: %d\n", ipc_stats.no_priv);
-       kprintf("bad_size: %d\n", ipc_stats.bad_size);
-       kprintf("bad_senda: %d\n", ipc_stats.bad_senda);
-       if (ex64hi(ipc_stats.total))
-       {
-               kprintf("total: %x:%08x\n", ex64hi(ipc_stats.total),
-                       ex64lo(ipc_stats.total));
-       }
-       else
-               kprintf("total: %u\n", ex64lo(ipc_stats.total));
+       struct proc *pp;
 
-       kprintf("sys_stats:\n");
-       kprintf("bad_req: %d\n", sys_stats.bad_req);
-       kprintf("not_allowed: %d\n", sys_stats.not_allowed);
-       if (ex64hi(sys_stats.total))
+       for (pp= BEG_PROC_ADDR; pp < END_PROC_ADDR; pp++)
        {
-               kprintf("total: %x:%08x\n", ex64hi(sys_stats.total),
-                       ex64lo(sys_stats.total));
+               if (pp->p_rts_flags & SLOT_FREE)
+                       continue;
+               printslot(pp, 0);
        }
-       else
-               kprintf("total: %u\n", ex64lo(sys_stats.total));
 }
 
 #if SPROFILE
diff --git a/kernel/arch/i386/vm.h b/kernel/arch/i386/vm.h
deleted file mode 100644 (file)
index 1707ac9..0000000
+++ /dev/null
@@ -1,27 +0,0 @@
-
-.define _load_kernel_cr3
-.define _last_cr3
-
-#define LOADKERNELCR3                  ;\
-       inc     (_cr3switch)            ;\
-       mov     eax,    (_kernel_cr3)   ;\
-       cmp     (_last_cr3), eax        ;\
-       jz      9f                      ;\
-       push    _load_kernel_cr3        ;\
-       call    _level0                 ;\
-       pop     eax                     ;\
-       mov     eax,    (_kernel_cr3)   ;\
-       mov     (_last_cr3), eax        ;\
-       inc     (_cr3reload)            ;\
-9:
-
-#define LOADCR3WITHEAX(type, newcr3)   ;\
-sseg   inc     (_cr3switch)            ;\
-sseg   mov     eax,    newcr3          ;\
-sseg   cmp     (_last_cr3), eax        ;\
-       jz      8f                      ;\
-       mov     cr3, eax                ;\
-sseg   inc     (_cr3reload)            ;\
-sseg   mov     (_last_cr3), eax        ;\
-8:
-
index d5eb1ddfc381deffd716faad6566f3292ef37cc4..0be366e4b18f9d37f32f798e5dbcae8f236ba175 100755 (executable)
@@ -230,25 +230,23 @@ irq_hook_t *hook;
    * If any of the timers expire, do_clocktick() will send out signals.
    */
   expired = 0;
-  if ((proc_ptr->p_misc_flags & VIRT_TIMER) &&
+  if ((proc_ptr->p_misc_flags & MF_VIRT_TIMER) &&
        (proc_ptr->p_virt_left -= ticks) <= 0) expired = 1;
-  if ((proc_ptr->p_misc_flags & PROF_TIMER) &&
+  if ((proc_ptr->p_misc_flags & MF_PROF_TIMER) &&
        (proc_ptr->p_prof_left -= ticks) <= 0) expired = 1;
   if (! (priv(proc_ptr)->s_flags & BILLABLE) &&
-       (bill_ptr->p_misc_flags & PROF_TIMER) &&
+       (bill_ptr->p_misc_flags & MF_PROF_TIMER) &&
        (bill_ptr->p_prof_left -= ticks) <= 0) expired = 1;
 
-#if 0
   /* Update load average. */
   load_update();
-#endif
   
   /* Check if do_clocktick() must be called. Done for alarms and scheduling.
    * Some processes, such as the kernel tasks, cannot be preempted. 
    */ 
   if ((next_timeout <= realtime) || (proc_ptr->p_ticks_left <= 0) || expired) {
       prev_ptr = proc_ptr;                     /* store running process */
-      lock_notify(HARDWARE, CLOCK);            /* send notification */
+      mini_notify(proc_addr(HARDWARE), CLOCK);         /* send notification */
   } 
 
   if (do_serial_debug)
index 12cd8aa64242d6aa6d6220033db7eccfc16be93d..7324d4857aaa95438677a7e1b970fb0baaf53ced 100644 (file)
@@ -25,6 +25,8 @@ check_runqueues_f(char *file, int line)
        minix_panic("check_runqueues called with interrupts enabled", NO_NUM);
   }
 
+  FIXME("check_runqueues being done");
+
 #define MYPANIC(msg) {         \
        kprintf("check_runqueues:%s:%d: %s\n", file, line, msg); \
        minix_panic("check_runqueues failed", NO_NUM);  \
@@ -94,7 +96,9 @@ check_runqueues_f(char *file, int line)
   for (xp = BEG_PROC_ADDR; xp < END_PROC_ADDR; ++xp) {
        if(xp->p_magic != PMAGIC) 
                MYPANIC("p_magic wrong in proc table");
-       if (! isemptyp(xp) && xp->p_ready && ! xp->p_found) {
+       if (isemptyp(xp))
+               continue;
+       if(xp->p_ready && ! xp->p_found) {
                kprintf("sched error: ready proc %d not on queue\n", xp->p_nr);
                MYPANIC("ready proc not on scheduling queue");
                if (l++ > MAX_LOOP) { MYPANIC("loop in debug.c?"); }
@@ -103,3 +107,43 @@ check_runqueues_f(char *file, int line)
 }
 
 #endif /* DEBUG_SCHED_CHECK */
+
+PUBLIC char *
+rtsflagstr(int flags)
+{
+       static char str[100];
+       str[0] = '\0';
+
+#define FLAG(n) if(flags & n) { strcat(str, #n " "); }
+
+       FLAG(SLOT_FREE);
+       FLAG(NO_PRIORITY);
+       FLAG(SENDING);
+       FLAG(RECEIVING);
+       FLAG(SIGNALED);
+       FLAG(SIG_PENDING);
+       FLAG(P_STOP);
+       FLAG(NO_PRIV);
+       FLAG(NO_ENDPOINT);
+       FLAG(VMINHIBIT);
+       FLAG(PAGEFAULT);
+       FLAG(VMREQUEST);
+       FLAG(VMREQTARGET);
+
+       return str;
+}
+
+PUBLIC char *
+miscflagstr(int flags)
+{
+       static char str[100];
+       str[0] = '\0';
+
+       FLAG(MF_REPLY_PEND);
+       FLAG(MF_ASYNMSG);
+       FLAG(MF_FULLVM);
+       FLAG(MF_DELIVERMSG);
+
+       return str;
+}
+
index 283b00be24a0a47151c27295aaaa1e139c02044a..e25605e0c0ddfb73ea9f143aa2954f1ce0a17f55 100644 (file)
@@ -8,6 +8,7 @@
  */
 
 #include <ansi.h>
+#include <minix/debug.h>
 #include "config.h"
 
 /* Enable prints such as
 #define DEBUG_TIME_LOCKS               1
 
 /* Runtime sanity checking. */
-#define DEBUG_VMASSERT                 1
+#define DEBUG_VMASSERT                 0
 #define DEBUG_SCHED_CHECK              0
+#define DEBUG_STACK_CHECK              0
+#define DEBUG_TRACE                    0
+
+#if DEBUG_TRACE
+
+#define VF_SCHEDULING          (1L << 1)
+#define VF_PICKPROC            (1L << 2)
+
+#define TRACE(code, statement) if(verboseflags & code) { printf("%s:%d: ", __FILE__, __LINE__); statement }
+
+#else
+#define TRACE(code, statement)
+#endif
+
+#define NOREC_ENTER(varname) \
+       static int varname = 0; \
+       int mustunlock = 0; \
+       if(!intr_disabled()) { lock; mustunlock = 1; } \
+       if(varname) {   \
+               minix_panic(#varname " recursive enter", __LINE__); \
+       } \
+       varname = 1;
+
+#define NOREC_RETURN(varname, v) do {  \
+       if(!varname)            \
+               minix_panic(#varname " flag off", __LINE__); \
+       if(!intr_disabled())    \
+               minix_panic(#varname " interrupts on", __LINE__); \
+       varname = 0;    \
+       if(mustunlock)  { unlock;       } \
+       return v;       \
+       } while(0)
+
+#if DEBUG_VMASSERT
+#define vmassert(t) { \
+       if(!(t)) { minix_panic("vm: assert " #t " failed\n", __LINE__); } }
+#else
+#define vmassert(t) { }
+#endif
 
 #endif /* DEBUG_H */
index e3ed5735bd1b4b381e135fff2458372bc5f88da1..2088183536dae6680d9e6be7877333a1526f0a27 100755 (executable)
@@ -16,6 +16,7 @@
 #include <minix/config.h>
 #include <archtypes.h>
 #include "config.h"
+#include "debug.h"
 
 /* Variables relating to shutting down MINIX. */
 EXTERN char kernel_exception;          /* TRUE after system exceptions */
@@ -29,14 +30,13 @@ EXTERN struct k_randomness krandom; /* gather kernel random information */
 EXTERN struct loadinfo kloadinfo;      /* status of load average */
 
 /* Process scheduling information and the kernel reentry count. */
-EXTERN struct proc *prev_ptr;  /* previously running process */
 EXTERN struct proc *proc_ptr;  /* pointer to currently running process */
 EXTERN struct proc *next_ptr;  /* next process to run after restart() */
+EXTERN struct proc *prev_ptr;  
 EXTERN struct proc *bill_ptr;  /* process to bill for clock ticks */
 EXTERN struct proc *vmrestart;  /* first process on vmrestart queue */
 EXTERN struct proc *vmrequest;  /* first process on vmrequest queue */
 EXTERN struct proc *pagefaults; /* first process on pagefault queue */
-EXTERN struct proc *softnotify;        /* first process on softnotify queue */
 EXTERN char k_reenter;         /* kernel reentry count (entry count less 1) */
 EXTERN unsigned lost_ticks;    /* clock ticks counted outside clock task */
 
@@ -47,32 +47,6 @@ EXTERN int irq_actids[NR_IRQ_VECTORS];               /* IRQ ID bits active */
 EXTERN int irq_use;                            /* map of all in-use irq's */
 EXTERN u32_t system_hz;                                /* HZ value */
 
-EXTERN struct ipc_stats
-{
-       unsigned long deadproc;
-       unsigned long bad_endpoint;
-       unsigned long dst_not_allowed;
-       unsigned long bad_call;
-       unsigned long call_not_allowed;
-       unsigned long bad_buffer;
-       unsigned long deadlock;
-       unsigned long not_ready;
-       unsigned long src_died;
-       unsigned long dst_died;
-       unsigned long no_priv;
-       unsigned long bad_size;
-       unsigned long bad_senda;
-       u64_t total;
-} ipc_stats;
-extern endpoint_t ipc_stats_target;
-
-EXTERN struct system_stats
-{
-       unsigned long bad_req;
-       unsigned long not_allowed;
-       u64_t total;
-} sys_stats;
-
 /* Miscellaneous. */
 EXTERN reg_t mon_ss, mon_sp;           /* boot monitor stack */
 EXTERN int mon_return;                 /* true if we can return to monitor */
@@ -85,18 +59,14 @@ EXTERN char params_buffer[512];             /* boot monitor parameters */
 EXTERN int minix_panicing;
 EXTERN int locklevel;
 
-EXTERN unsigned long cr3switch;
-EXTERN unsigned long cr3reload;
+#if DEBUG_TRACE
+EXTERN int verboseflags;
+#endif
 
 /* VM */
-EXTERN phys_bytes vm_base;
-EXTERN phys_bytes vm_size;
-EXTERN phys_bytes vm_mem_high;
 EXTERN int vm_running;
-EXTERN int must_notify_vm;
-
-/* Verbose flags (debugging). */
-EXTERN int verbose_vm;
+EXTERN int catch_pagefaults;
+EXTERN struct proc *ptproc;
 
 /* Timing */
 EXTERN util_timingdata_t timingdata[TIMING_CATEGORIES];
index f07997cdd5fee61f451b8ede535666294b9c0fe1..b847b2bef14104e5719496a96b209805b2248b9a 100755 (executable)
@@ -17,6 +17,7 @@
 #include <minix/com.h>
 #include <minix/endpoint.h>
 #include "proc.h"
+#include "debug.h"
 
 /* Prototype declarations for PRIVATE functions. */
 FORWARD _PROTOTYPE( void announce, (void));    
@@ -161,6 +162,9 @@ PUBLIC void main()
                rp->p_reg.sp -= sizeof(reg_t);
        }
 
+       /* scheduling functions depend on proc_ptr pointing somewhere. */
+       if(!proc_ptr) proc_ptr = rp;
+
        /* If this process has its own page table, VM will set the
         * PT up and manage it. VM will signal the kernel when it has
         * done this; until then, don't let it run.
@@ -186,8 +190,21 @@ PUBLIC void main()
   /* MINIX is now ready. All boot image processes are on the ready queue.
    * Return to the assembly code to start running the current process. 
    */
-  bill_ptr = proc_addr(IDLE);          /* it has to point somewhere */
+  bill_ptr = proc_addr(IDLE);  /* it has to point somewhere */
   announce();                          /* print MINIX startup banner */
+/* Warnings for sanity checks that take time. These warnings are printed
+ * so it's a clear warning no full release should be done with them
+ * enabled.
+ */
+#if DEBUG_SCHED_CHECK
+  FIXME("DEBUG_SCHED_CHECK enabled");
+#endif
+#if DEBUG_VMASSERT
+  FIXME("DEBUG_VMASSERT enabled");
+#endif
+#if DEBUG_PROC_CHECK
+  FIXME("PROC check enabled");
+#endif
   restart();
 }
 
@@ -204,6 +221,8 @@ PRIVATE void announce(void)
       "Copyright 2009, Vrije Universiteit, Amsterdam, The Netherlands\n",
       OS_RELEASE, OS_VERSION);
   kprintf("MINIX is open source software, see http://www.minix3.org\n");
+
+  FIXME("pm, vfs, etc own page table");
 }
 
 /*===========================================================================*
index bb3d8543f34d5fd84ecb85771a70f2f3983d8882..7b92501732c48f68f616de9abbb6afc8a864c202 100755 (executable)
@@ -6,10 +6,7 @@
  *
  * As well as several entry points used from the interrupt and task level:
  *
- *   lock_notify:     notify a process of a system event
  *   lock_send:              send a message to a process
- *   lock_enqueue:    put a process on one of the scheduling queues 
- *   lock_dequeue:    remove a process from the scheduling queues
  *
  * Changes:
  *   Aug 19, 2005     rewrote scheduling code  (Jorrit N. Herder)
@@ -57,7 +54,6 @@ FORWARD _PROTOTYPE( int mini_send, (struct proc *caller_ptr, int dst_e,
                message *m_ptr, int flags));
 FORWARD _PROTOTYPE( int mini_receive, (struct proc *caller_ptr, int src,
                message *m_ptr, int flags));
-FORWARD _PROTOTYPE( int mini_notify, (struct proc *caller_ptr, int dst));
 FORWARD _PROTOTYPE( int mini_senda, (struct proc *caller_ptr,
        asynmsg_t *table, size_t size));
 FORWARD _PROTOTYPE( int deadlock, (int function,
@@ -67,8 +63,10 @@ FORWARD _PROTOTYPE( int try_one, (struct proc *src_ptr, struct proc *dst_ptr));
 FORWARD _PROTOTYPE( void sched, (struct proc *rp, int *queue, int *front));
 FORWARD _PROTOTYPE( void pick_proc, (void));
 
-#define BuildMess(m_ptr, src, dst_ptr) \
-       (m_ptr)->m_source = proc_addr(src)->p_endpoint;         \
+#define PICK_ANY       1
+#define PICK_HIGHERONLY        2
+
+#define BuildNotifyMessage(m_ptr, src, dst_ptr) \
        (m_ptr)->m_type = NOTIFY_FROM(src);                             \
        (m_ptr)->NOTIFY_TIMESTAMP = get_uptime();                       \
        switch (src) {                                                  \
@@ -82,49 +80,88 @@ FORWARD _PROTOTYPE( void pick_proc, (void));
                break;                                                  \
        }
 
-#define CopyMess(s,sp,sm,dp,dm) do {                   \
-       vir_bytes dstlin;                               \
-       endpoint_t e = proc_addr(s)->p_endpoint;        \
-       struct vir_addr src, dst;                       \
-       int r;                                          \
-       if((dstlin = umap_local((dp), D, (vir_bytes) dm, sizeof(message))) == 0){\
-               minix_panic("CopyMess: umap_local failed", __LINE__);   \
-       }                                               \
-                       \
-       if(vm_running &&        \
-        (r=vm_checkrange((dp), (dp), dstlin, sizeof(message), 1, 0)) != OK) { \
-               if(r != VMSUSPEND)                      \
-                 minix_panic("CopyMess: vm_checkrange error", __LINE__); \
-               (dp)->p_vmrequest.saved.msgcopy.dst = (dp);     \
-               (dp)->p_vmrequest.saved.msgcopy.dst_v = (vir_bytes) dm; \
-               if(data_copy((sp)->p_endpoint,  \
-                       (vir_bytes) (sm), SYSTEM,       \
-                       (vir_bytes) &(dp)->p_vmrequest.saved.msgcopy.msgbuf, \
-                       sizeof(message)) != OK) {               \
-                               minix_panic("CopyMess: data_copy failed", __LINE__);\
-                       }                               \
-                       (dp)->p_vmrequest.saved.msgcopy.msgbuf.m_source = e; \
-                       (dp)->p_vmrequest.type = VMSTYPE_MSGCOPY; \
-       } else  {                                       \
-               src.proc_nr_e = (sp)->p_endpoint;               \
-               dst.proc_nr_e = (dp)->p_endpoint;               \
-               src.segment = dst.segment = D;                  \
-               src.offset = (vir_bytes) (sm);                  \
-               dst.offset = (vir_bytes) (dm);                  \
-               if(virtual_copy(&src, &dst, sizeof(message)) != OK) {   \
-                       kprintf("copymess: copy %d:%lx to %d:%lx failed\n",\
-                               (sp)->p_endpoint, (sm), (dp)->p_endpoint, dm);\
-                       minix_panic("CopyMess: virtual_copy (1) failed", __LINE__); \
-               }               \
-               src.proc_nr_e = SYSTEM;                         \
-               src.offset = (vir_bytes) &e;                    \
-               if(virtual_copy(&src, &dst, sizeof(e)) != OK) {         \
-                       kprintf("copymess: copy %d:%lx to %d:%lx\n",    \
-                               (sp)->p_endpoint, (sm), (dp)->p_endpoint, dm);\
-                       minix_panic("CopyMess: virtual_copy (2) failed", __LINE__); \
-               }                                       \
-       }       \
-} while(0)
+/*===========================================================================*
+ *                             QueueMess                                    * 
+ *===========================================================================*/
+PRIVATE int QueueMess(endpoint_t ep, vir_bytes msg_lin, struct proc *dst)
+{
+       int k;
+       phys_bytes addr;
+       NOREC_ENTER(queuemess);
+       /* Queue a message from the src process (in memory) to the dst
+        * process (using dst process table entry). Do actual copy to
+        * kernel here; it's an error if the copy fails into kernel.
+        */
+       vmassert(!(dst->p_misc_flags & MF_DELIVERMSG)); 
+       vmassert(dst->p_delivermsg_lin);
+       vmassert(isokendpt(ep, &k));
+
+#if 0
+       if(INMEMORY(dst)) {
+               PHYS_COPY_CATCH(msg_lin, dst->p_delivermsg_lin,
+                       sizeof(message), addr);
+               if(!addr) {
+                       PHYS_COPY_CATCH(vir2phys(&ep), dst->p_delivermsg_lin,
+                               sizeof(ep), addr);
+                       if(!addr) {
+                               NOREC_RETURN(queuemess, OK);
+                       }
+               }
+       }
+#else
+       FIXME("in-memory process copy");
+#endif
+
+       PHYS_COPY_CATCH(msg_lin, vir2phys(&dst->p_delivermsg), sizeof(message), addr);
+       if(addr) {
+               NOREC_RETURN(queuemess, EFAULT);
+       }
+
+       dst->p_delivermsg.m_source = ep;
+       dst->p_misc_flags |= MF_DELIVERMSG;
+
+       NOREC_RETURN(queuemess, OK);
+}
+
+/*===========================================================================*
+ *                             schedcheck                                   * 
+ *===========================================================================*/
+PUBLIC void schedcheck(void)
+{
+       /* This function is called an instant before proc_ptr is
+        * to be scheduled again.
+        */
+       NOREC_ENTER(schedch);
+       vmassert(intr_disabled());
+       if(next_ptr) {
+               proc_ptr = next_ptr;
+               next_ptr = NULL;
+       }
+       vmassert(proc_ptr);
+       vmassert(!proc_ptr->p_rts_flags);
+       while(proc_ptr->p_misc_flags & MF_DELIVERMSG) {
+               vmassert(!next_ptr);
+               vmassert(!proc_ptr->p_rts_flags);
+               TRACE(VF_SCHEDULING, printf("delivering to %s / %d\n",
+                       proc_ptr->p_name, proc_ptr->p_endpoint););
+               if(delivermsg(proc_ptr) == VMSUSPEND) {
+                       vmassert(next_ptr);
+                       TRACE(VF_SCHEDULING, printf("suspending %s / %d\n",
+                               proc_ptr->p_name, proc_ptr->p_endpoint););
+                       vmassert(proc_ptr->p_rts_flags);
+                       vmassert(next_ptr != proc_ptr);
+                       proc_ptr = next_ptr;
+                       vmassert(!proc_ptr->p_rts_flags);
+                       next_ptr = NULL;
+               } 
+       }
+       TRACE(VF_SCHEDULING, printf("starting %s / %d\n",
+               proc_ptr->p_name, proc_ptr->p_endpoint););
+#if DEBUG_TRACE
+       proc_ptr->p_schedules++;
+#endif
+       NOREC_RETURN(schedch, );
+}
 
 /*===========================================================================*
  *                             sys_call                                     * 
@@ -146,8 +183,13 @@ long bit_map;                      /* notification event set or flags */
   int src_dst_p;                               /* Process slot number */
   size_t msg_size;
 
-  if (caller_ptr->p_endpoint == ipc_stats_target)
-       ipc_stats.total= add64u(ipc_stats.total, 1);
+#if DEBUG_SCHED_CHECK
+  if(caller_ptr->p_misc_flags & MF_DELIVERMSG) {
+       kprintf("sys_call: MF_DELIVERMSG on for %s / %d\n",
+               caller_ptr->p_name, caller_ptr->p_endpoint);
+       minix_panic("MF_DELIVERMSG on", NO_NUM);
+  }
+#endif
 
 #if 0
   if(src_dst_e != 4 && src_dst_e != 5 &&
@@ -163,12 +205,10 @@ long bit_map;                     /* notification event set or flags */
   }
 #endif
 
-#if 1
+#if DEBUG_SCHED_CHECK
   if (RTS_ISSET(caller_ptr, SLOT_FREE))
   {
        kprintf("called by the dead?!?\n");
-       if (caller_ptr->p_endpoint == ipc_stats_target)
-               ipc_stats.deadproc++;
        return EINVAL;
   }
 #endif
@@ -188,12 +228,10 @@ long bit_map;                     /* notification event set or flags */
   {
        if (call_nr != RECEIVE)
        {
-#if DEBUG_ENABLE_IPC_WARNINGS
+#if 0
                kprintf("sys_call: trap %d by %d with bad endpoint %d\n", 
                        call_nr, proc_nr(caller_ptr), src_dst_e);
 #endif
-               if (caller_ptr->p_endpoint == ipc_stats_target)
-                       ipc_stats.bad_endpoint++;
                return EINVAL;
        }
        src_dst_p = src_dst_e;
@@ -202,12 +240,10 @@ long bit_map;                     /* notification event set or flags */
   {
        /* Require a valid source and/or destination process. */
        if(!isokendpt(src_dst_e, &src_dst_p)) {
-#if DEBUG_ENABLE_IPC_WARNINGS
+#if 0
                kprintf("sys_call: trap %d by %d with bad endpoint %d\n", 
                        call_nr, proc_nr(caller_ptr), src_dst_e);
 #endif
-               if (caller_ptr->p_endpoint == ipc_stats_target)
-                       ipc_stats.bad_endpoint++;
                return EDEADSRCDST;
        }
 
@@ -221,10 +257,8 @@ long bit_map;                      /* notification event set or flags */
 #if DEBUG_ENABLE_IPC_WARNINGS
                        kprintf(
                        "sys_call: ipc mask denied trap %d from %d to %d\n",
-                               call_nr, proc_nr(caller_ptr), src_dst_p);
+                               call_nr, caller_ptr->p_endpoint, src_dst_e);
 #endif
-                       if (caller_ptr->p_endpoint == ipc_stats_target)
-                               ipc_stats.dst_not_allowed++;
                        return(ECALLDENIED);    /* call denied by ipc mask */
                }
        }
@@ -237,8 +271,6 @@ long bit_map;                       /* notification event set or flags */
       kprintf("sys_call: trap %d not allowed, caller %d, src_dst %d\n", 
           call_nr, proc_nr(caller_ptr), src_dst_p);
 #endif
-       if (caller_ptr->p_endpoint == ipc_stats_target)
-               ipc_stats.bad_call++;
        return(ETRAPDENIED);            /* trap denied by mask or kernel */
   }
 
@@ -251,8 +283,6 @@ long bit_map;                       /* notification event set or flags */
       kprintf("sys_call: trap %d not allowed, caller %d, src_dst %d\n", 
           call_nr, proc_nr(caller_ptr), src_dst_p);
 #endif
-       if (caller_ptr->p_endpoint == ipc_stats_target)
-               ipc_stats.call_not_allowed++;
        return(ETRAPDENIED);            /* trap denied by mask or kernel */
   }
 
@@ -261,8 +291,6 @@ long bit_map;                       /* notification event set or flags */
       kprintf("sys_call: trap %d not allowed, caller %d, src_dst %d\n", 
           call_nr, proc_nr(caller_ptr), src_dst_e);
 #endif
-       if (caller_ptr->p_endpoint == ipc_stats_target)
-               ipc_stats.call_not_allowed++;
        return(ETRAPDENIED);            /* trap denied by mask or kernel */
   }
 
@@ -283,61 +311,6 @@ long bit_map;                      /* notification event set or flags */
        msg_size = sizeof(*m_ptr);
   }
 
-  /* If the call involves a message buffer, i.e., for SEND, SENDREC, 
-   * or RECEIVE, check the message pointer. This check allows a message to be 
-   * anywhere in data or stack or gap. It will have to be made more elaborate 
-   * for machines which don't have the gap mapped. 
-   *
-   * We use msg_size decided above.
-   */
-  if (call_nr == SEND || call_nr == SENDREC ||
-       call_nr == RECEIVE || call_nr == SENDA || call_nr == SENDNB) {
-       int r;
-       phys_bytes lin;
-
-       /* Map to linear address. */
-       if(msg_size > 0 && 
-               (lin = umap_local(caller_ptr, D, (vir_bytes) m_ptr, msg_size)) == 0) {
-               kprintf("umap_local failed for %s / %d on 0x%lx size %d\n",
-                       caller_ptr->p_name, caller_ptr->p_endpoint,
-                       m_ptr, msg_size);
-               return EFAULT;
-       }
-
-       /* Check if message pages in calling process are mapped.
-        * We don't have to check the recipient if this is a send,
-        * because this code will do that before its receive() starts.
-        *
-        * It is important the range is verified as _writable_, because
-        * the kernel will want to write to the SENDA buffer in the future,
-        * and those pages may not be shared between processes.
-        */
-
-       if(vm_running && msg_size > 0 &&
-        (r=vm_checkrange(caller_ptr, caller_ptr, lin, msg_size, 1, 0)) != OK) {
-               if(r != VMSUSPEND) {
-                       kprintf("SYSTEM:sys_call:vm_checkrange: err %d\n", r);
-                       return r;
-               }
-               
-               /* We can't go ahead with this call. Caller is suspended
-                * and we have to save the state in its process struct.
-                */
-               caller_ptr->p_vmrequest.saved.sys_call.call_nr = call_nr;
-               caller_ptr->p_vmrequest.saved.sys_call.m_ptr = m_ptr;
-               caller_ptr->p_vmrequest.saved.sys_call.src_dst_e = src_dst_e;
-               caller_ptr->p_vmrequest.saved.sys_call.bit_map = bit_map;
-               caller_ptr->p_vmrequest.type = VMSTYPE_SYS_CALL;
-
-               kprintf("SYSTEM: %s:%d: suspending call 0x%lx on ipc buffer 0x%lx length 0x%lx\n",
-                       caller_ptr->p_name, caller_ptr->p_endpoint, call_nr, m_ptr, msg_size);
-
-               /* vm_checkrange() will have suspended caller with VMREQUEST. */
-               return OK;
-       }
-
-  } 
-
   /* Check for a possible deadlock for blocking SEND(REC) and RECEIVE. */
   if (call_nr == SEND || call_nr == SENDREC || call_nr == RECEIVE) {
       if (group_size = deadlock(call_nr, caller_ptr, src_dst_p)) {
@@ -345,8 +318,6 @@ long bit_map;                       /* notification event set or flags */
           kprintf("sys_call: trap %d from %d to %d deadlocked, group size %d\n",
               call_nr, proc_nr(caller_ptr), src_dst_p, group_size);
 #endif
-       if (caller_ptr->p_endpoint == ipc_stats_target)
-               ipc_stats.deadlock++;
         return(ELOCKED);
       }
   }
@@ -362,7 +333,7 @@ long bit_map;                       /* notification event set or flags */
   switch(call_nr) {
   case SENDREC:
        /* A flag is set so that notifications cannot interrupt SENDREC. */
-       caller_ptr->p_misc_flags |= REPLY_PENDING;
+       caller_ptr->p_misc_flags |= MF_REPLY_PEND;
        /* fall through */
   case SEND:                   
        result = mini_send(caller_ptr, src_dst_e, m_ptr, 0);
@@ -371,11 +342,11 @@ long bit_map;                     /* notification event set or flags */
        /* fall through for SENDREC */
   case RECEIVE:                        
        if (call_nr == RECEIVE)
-               caller_ptr->p_misc_flags &= ~REPLY_PENDING;
+               caller_ptr->p_misc_flags &= ~MF_REPLY_PEND;
        result = mini_receive(caller_ptr, src_dst_e, m_ptr, 0);
        break;
   case NOTIFY:
-       result = mini_notify(caller_ptr, src_dst_p);
+       result = mini_notify(caller_ptr, src_dst_e);
        break;
   case SENDNB:
         result = mini_send(caller_ptr, src_dst_e, m_ptr, NON_BLOCKING);
@@ -460,22 +431,6 @@ int src_dst;                                       /* src or dst process */
   return(0);                                   /* not a deadlock */
 }
 
-/*===========================================================================*
- *                             sys_call_restart                             * 
- *===========================================================================*/
-PUBLIC void sys_call_restart(caller)
-struct proc *caller;
-{
-       int r;
-       kprintf("restarting sys_call code 0x%lx, "
-               "m_ptr 0x%lx, srcdst %d, bitmap 0x%lx, but not really\n",
-               caller->p_vmrequest.saved.sys_call.call_nr,
-               caller->p_vmrequest.saved.sys_call.m_ptr,
-               caller->p_vmrequest.saved.sys_call.src_dst_e,
-               caller->p_vmrequest.saved.sys_call.bit_map);
-       caller->p_reg.retreg = r;
-}
-
 /*===========================================================================*
  *                             mini_send                                    * 
  *===========================================================================*/
@@ -492,14 +447,19 @@ int flags;
   register struct proc *dst_ptr;
   register struct proc **xpp;
   int dst_p;
+  phys_bytes linaddr;
+  vir_bytes addr;
+  int r;
 
+  if(!(linaddr = umap_local(caller_ptr, D, (vir_bytes) m_ptr,
+       sizeof(message)))) {
+       return EFAULT;
+  }
   dst_p = _ENDPOINT_P(dst_e);
   dst_ptr = proc_addr(dst_p);
 
   if (RTS_ISSET(dst_ptr, NO_ENDPOINT))
   {
-       if (caller_ptr->p_endpoint == ipc_stats_target)
-               ipc_stats.dst_died++;
        return EDSTDIED;
   }
 
@@ -508,18 +468,20 @@ int flags;
    */
   if (WILLRECEIVE(dst_ptr, caller_ptr->p_endpoint)) {
        /* Destination is indeed waiting for this message. */
-       CopyMess(caller_ptr->p_nr, caller_ptr, m_ptr, dst_ptr,
-                dst_ptr->p_messbuf);
+       vmassert(!(dst_ptr->p_misc_flags & MF_DELIVERMSG));     
+       if((r=QueueMess(caller_ptr->p_endpoint, linaddr, dst_ptr)) != OK)
+               return r;
        RTS_UNSET(dst_ptr, RECEIVING);
   } else {
        if(flags & NON_BLOCKING) {
-               if (caller_ptr->p_endpoint == ipc_stats_target)
-                       ipc_stats.not_ready++;
                return(ENOTREADY);
        }
 
        /* Destination is not waiting.  Block and dequeue caller. */
-       caller_ptr->p_messbuf = m_ptr;
+       PHYS_COPY_CATCH(linaddr, vir2phys(&caller_ptr->p_sendmsg),
+               sizeof(message), addr);
+
+       if(addr) { return EFAULT; }
        RTS_SET(caller_ptr, SENDING);
        caller_ptr->p_sendto_e = dst_e;
 
@@ -552,6 +514,18 @@ int flags;
   sys_map_t *map;
   bitchunk_t *chunk;
   int i, r, src_id, src_proc_nr, src_p;
+  phys_bytes linaddr;
+
+  vmassert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
+
+  if(!(linaddr = umap_local(caller_ptr, D, (vir_bytes) m_ptr,
+       sizeof(message)))) {
+       return EFAULT;
+  }
+
+  /* This is where we want our message. */
+  caller_ptr->p_delivermsg_lin = linaddr;
+  caller_ptr->p_delivermsg_vir = (vir_bytes) m_ptr;
 
   if(src_e == ANY) src_p = ANY;
   else
@@ -559,8 +533,6 @@ int flags;
        okendpt(src_e, &src_p);
        if (RTS_ISSET(proc_addr(src_p), NO_ENDPOINT))
        {
-               if (caller_ptr->p_endpoint == ipc_stats_target)
-                       ipc_stats.src_died++;
                return ESRCDIED;
        }
   }
@@ -573,10 +545,11 @@ int flags;
   if (!RTS_ISSET(caller_ptr, SENDING)) {
 
     /* Check if there are pending notifications, except for SENDREC. */
-    if (! (caller_ptr->p_misc_flags & REPLY_PENDING)) {
+    if (! (caller_ptr->p_misc_flags & MF_REPLY_PEND)) {
 
         map = &priv(caller_ptr)->s_notify_pending;
         for (chunk=&map->chunk[0]; chunk<&map->chunk[NR_SYS_CHUNKS]; chunk++) {
+               endpoint_t hisep;
 
             /* Find a pending notification from the requested source. */ 
             if (! *chunk) continue;                    /* no bits in chunk */
@@ -593,8 +566,13 @@ int flags;
             *chunk &= ~(1 << i);                       /* no longer pending */
 
             /* Found a suitable source, deliver the notification message. */
-           BuildMess(&m, src_proc_nr, caller_ptr);     /* assemble message */
-            CopyMess(src_proc_nr, proc_addr(HARDWARE), &m, caller_ptr, m_ptr);
+           BuildNotifyMessage(&m, src_proc_nr, caller_ptr);    /* assemble message */
+           hisep = proc_addr(src_proc_nr)->p_endpoint;
+           vmassert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));      
+           vmassert(src_e == ANY || hisep == src_e);
+           if((r=QueueMess(hisep, vir2phys(&m), caller_ptr)) != OK)  {
+               minix_panic("mini_receive: local QueueMess failed", NO_NUM);
+           }
             return(OK);                                        /* report success */
         }
     }
@@ -603,20 +581,20 @@ int flags;
     xpp = &caller_ptr->p_caller_q;
     while (*xpp != NIL_PROC) {
         if (src_e == ANY || src_p == proc_nr(*xpp)) {
-#if 1
+#if DEBUG_SCHED_CHECK
            if (RTS_ISSET(*xpp, SLOT_FREE) || RTS_ISSET(*xpp, NO_ENDPOINT))
            {
                kprintf("%d: receive from %d; found dead %d (%s)?\n",
                        caller_ptr->p_endpoint, src_e, (*xpp)->p_endpoint,
                        (*xpp)->p_name);
-               if (caller_ptr->p_endpoint == ipc_stats_target)
-                       ipc_stats.deadproc++;
                return EINVAL;
            }
 #endif
 
            /* Found acceptable message. Copy it and update status. */
-           CopyMess((*xpp)->p_nr, *xpp, (*xpp)->p_messbuf, caller_ptr, m_ptr);
+           vmassert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
+           QueueMess((*xpp)->p_endpoint,
+               vir2phys(&(*xpp)->p_sendmsg), caller_ptr);
            RTS_UNSET(*xpp, SENDING);
             *xpp = (*xpp)->p_q_link;           /* remove from queue */
             return(OK);                                /* report success */
@@ -635,7 +613,6 @@ int flags;
        }
        else
        {
-               caller_ptr->p_messbuf = m_ptr;
                r= try_async(caller_ptr);
        }
        if (r == OK)
@@ -648,12 +625,9 @@ int flags;
    */
   if ( ! (flags & NON_BLOCKING)) {
       caller_ptr->p_getfrom_e = src_e;         
-      caller_ptr->p_messbuf = m_ptr;
       RTS_SET(caller_ptr, RECEIVING);
       return(OK);
   } else {
-       if (caller_ptr->p_endpoint == ipc_stats_target)
-               ipc_stats.not_ready++;
        return(ENOTREADY);
   }
 }
@@ -661,26 +635,41 @@ int flags;
 /*===========================================================================*
  *                             mini_notify                                  * 
  *===========================================================================*/
-PRIVATE int mini_notify(caller_ptr, dst)
+PUBLIC int mini_notify(caller_ptr, dst_e)
 register struct proc *caller_ptr;      /* sender of the notification */
-int dst;                               /* which process to notify */
+endpoint_t dst_e;                      /* which process to notify */
 {
-  register struct proc *dst_ptr = proc_addr(dst);
+  register struct proc *dst_ptr;
   int src_id;                          /* source id for late delivery */
   message m;                           /* the notification message */
+  int r;
+  int proc_nr;
+  int dst_p;
+
+  vmassert(intr_disabled());
+
+  if (!isokendpt(dst_e, &dst_p)) {
+       util_stacktrace();
+       kprintf("mini_notify: bogus endpoint %d\n", dst_e);
+       return EDEADSRCDST;
+  }
+
+  dst_ptr = proc_addr(dst_p);
 
   /* Check to see if target is blocked waiting for this message. A process 
    * can be both sending and receiving during a SENDREC system call.
    */
     if (WILLRECEIVE(dst_ptr, caller_ptr->p_endpoint) &&
-      ! (dst_ptr->p_misc_flags & REPLY_PENDING)) {
+      ! (dst_ptr->p_misc_flags & MF_REPLY_PEND)) {
       /* Destination is indeed waiting for a message. Assemble a notification 
        * message and deliver it. Copy from pseudo-source HARDWARE, since the
        * message is in the kernel's address space.
        */ 
-      BuildMess(&m, proc_nr(caller_ptr), dst_ptr);
-      CopyMess(proc_nr(caller_ptr), proc_addr(HARDWARE), &m, 
-          dst_ptr, dst_ptr->p_messbuf);
+      BuildNotifyMessage(&m, proc_nr(caller_ptr), dst_ptr);
+      vmassert(!(dst_ptr->p_misc_flags & MF_DELIVERMSG));
+      if((r=QueueMess(caller_ptr->p_endpoint, vir2phys(&m), dst_ptr)) != OK) {
+       minix_panic("mini_notify: local QueueMess failed", NO_NUM);
+      }
       RTS_UNSET(dst_ptr, RECEIVING);
       return(OK);
   } 
@@ -725,21 +714,20 @@ struct proc *caller_ptr;
 asynmsg_t *table;
 size_t size;
 {
-       int i, dst_p, done, do_notify;
+       int i, dst_p, done, do_notify, r;
        unsigned flags;
        struct proc *dst_ptr;
        struct priv *privp;
        message *m_ptr;
        asynmsg_t tabent;
        vir_bytes table_v = (vir_bytes) table;
+       vir_bytes linaddr;
 
        privp= priv(caller_ptr);
        if (!(privp->s_flags & SYS_PROC))
        {
                kprintf(
                "mini_senda: warning caller has no privilege structure\n");
-               if (caller_ptr->p_endpoint == ipc_stats_target)
-                       ipc_stats.no_priv++;
                return EPERM;
        }
 
@@ -753,6 +741,13 @@ size_t size;
                return OK;
        }
 
+       if(!(linaddr = umap_local(caller_ptr, D, (vir_bytes) table,
+               size * sizeof(*table)))) {
+               printf("mini_senda: umap_local failed; 0x%lx len 0x%lx\n",
+                       table, size * sizeof(*table));
+               return EFAULT;
+       }
+
        /* Limit size to something reasonable. An arbitrary choice is 16
         * times the number of process table entries.
         *
@@ -761,8 +756,6 @@ size_t size;
         */
        if (size > 16*(NR_TASKS + NR_PROCS))
        {
-               if (caller_ptr->p_endpoint == ipc_stats_target)
-                       ipc_stats.bad_size++;
                return EDOM;
        }
        
@@ -784,8 +777,6 @@ size_t size;
                if (flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY) ||
                        !(flags & AMF_VALID))
                {
-                       if (caller_ptr->p_endpoint == ipc_stats_target)
-                               ipc_stats.bad_senda++;
                        return EINVAL;
                }
 
@@ -799,9 +790,6 @@ size_t size;
                if (!isokendpt(tabent.dst, &dst_p))
                {
                        /* Bad destination, report the error */
-                       if (caller_ptr->p_endpoint == ipc_stats_target)
-                               ipc_stats.bad_endpoint++;
-
                        tabent.result= EDEADSRCDST;
                        A_INSERT(i, result);
                        tabent.flags= flags | AMF_DONE;
@@ -815,9 +803,6 @@ size_t size;
                if (!may_send_to(caller_ptr, dst_p))
                {
                        /* Send denied by IPC mask */
-                       if (caller_ptr->p_endpoint == ipc_stats_target)
-                               ipc_stats.dst_not_allowed++;
-
                        tabent.result= ECALLDENIED;
                        A_INSERT(i, result);
                        tabent.flags= flags | AMF_DONE;
@@ -838,9 +823,6 @@ size_t size;
                /* NO_ENDPOINT should be removed */
                if (dst_ptr->p_rts_flags & NO_ENDPOINT)
                {
-                       if (caller_ptr->p_endpoint == ipc_stats_target)
-                               ipc_stats.dst_died++;
-
                        tabent.result= EDSTDIED;
                        A_INSERT(i, result);
                        tabent.flags= flags | AMF_DONE;
@@ -864,12 +846,13 @@ size_t size;
                        m_ptr= &table[i].msg;   /* Note: pointer in the
                                                 * caller's address space.
                                                 */
-                       CopyMess(caller_ptr->p_nr, caller_ptr, m_ptr, dst_ptr,
-                               dst_ptr->p_messbuf);
+                       /* Copy message from sender. */
+                       tabent.result= QueueMess(caller_ptr->p_endpoint,
+                               linaddr + (vir_bytes) &table[i].msg -
+                                       (vir_bytes) table, dst_ptr);
+                       if(tabent.result == OK)
+                               RTS_UNSET(dst_ptr, RECEIVING);
 
-                       RTS_UNSET(dst_ptr, RECEIVING);
-
-                       tabent.result= OK;
                        A_INSERT(i, result);
                        tabent.flags= flags | AMF_DONE;
                        A_INSERT(i, flags);
@@ -892,13 +875,6 @@ size_t size;
        {
                privp->s_asyntab= (vir_bytes)table;
                privp->s_asynsize= size;
-#if 0
-               if(caller_ptr->p_endpoint > INIT_PROC_NR) {
-                       kprintf("kernel: %s (%d) asynsend table at 0x%lx, %d\n", 
-                               caller_ptr->p_name, caller_ptr->p_endpoint,
-                               table, size);
-               }
-#endif
        }
        return OK;
 }
@@ -913,7 +889,7 @@ struct proc *caller_ptr;
        int r;
        struct priv *privp;
        struct proc *src_ptr;
-
+       
        /* Try all privilege structures */
        for (privp = BEG_PRIV_ADDR; privp < END_PRIV_ADDR; ++privp) 
        {
@@ -921,13 +897,10 @@ struct proc *caller_ptr;
                        continue;
                if (privp->s_asynsize == 0)
                        continue;
-#if 0
-               kprintf("try_async: found asyntable for proc %d\n",
-                       privp->s_proc_nr);
-#endif
                src_ptr= proc_addr(privp->s_proc_nr);
                if (!may_send_to(src_ptr, proc_nr(caller_ptr)))
                        continue;
+               vmassert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
                r= try_one(src_ptr, caller_ptr);
                if (r == OK)
                        return r;
@@ -957,6 +930,7 @@ struct proc *dst_ptr;
        asynmsg_t tabent;
        vir_bytes table_v;
        struct proc *caller_ptr;
+       int r;
 
        privp= priv(src_ptr);
        size= privp->s_asynsize;
@@ -986,8 +960,6 @@ struct proc *dst_ptr;
                {
                        kprintf("try_one: bad bits in table\n");
                        privp->s_asynsize= 0;
-                       if (src_ptr->p_endpoint == ipc_stats_target)
-                               ipc_stats.bad_senda++;
                        return EINVAL;
                }
 
@@ -1015,10 +987,11 @@ struct proc *dst_ptr;
                m_ptr= &table_ptr[i].msg;       /* Note: pointer in the
                                                 * caller's address space.
                                                 */
-               CopyMess(src_ptr->p_nr, src_ptr, m_ptr, dst_ptr,
-                       dst_ptr->p_messbuf);
+               A_RETRIEVE(i, msg);
+               r = QueueMess(src_ptr->p_endpoint, vir2phys(&tabent.msg),
+                       dst_ptr);
 
-               tabent.result= OK;
+               tabent.result= r;
                A_INSERT(i, result);
                tabent.flags= flags | AMF_DONE;
                A_INSERT(i, flags);
@@ -1034,7 +1007,7 @@ struct proc *dst_ptr;
        return EAGAIN;
 }
 
-/*===========================================================================*
+ /*===========================================================================*
  *                             lock_notify                                  *
  *===========================================================================*/
 PUBLIC int lock_notify(src_e, dst_e)
@@ -1047,60 +1020,23 @@ int dst_e;                      /* (endpoint) who is to be notified */
  * the first kernel entry (hardware interrupt, trap, or exception). Locking
  * is done by temporarily disabling interrupts. 
  */
-  int result, src, dst;
+  int result, src_p;
 
-  if(!isokendpt(src_e, &src) || !isokendpt(dst_e, &dst))
-       return EDEADSRCDST;
+  vmassert(!intr_disabled());
 
-  /* Exception or interrupt occurred, thus already locked. */
-  if (k_reenter >= 0) {
-      result = mini_notify(proc_addr(src), dst); 
+  if (!isokendpt(src_e, &src_p)) {
+       kprintf("lock_notify: bogus src: %d\n", src_e);
+       return EDEADSRCDST;
   }
 
-  /* Call from task level, locking is required. */
-  else {
       lock;
-      result = mini_notify(proc_addr(src), dst); 
+  vmassert(intr_disabled());
+      result = mini_notify(proc_addr(src_p), dst_e); 
+  vmassert(intr_disabled());
       unlock;
-  }
-  return(result);
-}
-
-/*===========================================================================*
- *                             soft_notify                                  *
- *===========================================================================*/
-PUBLIC int soft_notify(dst_e)
-int dst_e;                     /* (endpoint) who is to be notified */
-{
-       int dst, u = 0;
-       struct proc *dstp, *sys = proc_addr(SYSTEM);
+  vmassert(!intr_disabled());
 
-/* Delayed interface to notify() from SYSTEM that is safe/easy to call
- * from more places than notify().
- */
-       if(!intr_disabled()) { lock; u = 1; }
-
-       {
-               if(!isokendpt(dst_e, &dst))
-                       minix_panic("soft_notify to dead ep", dst_e);
-
-               dstp = proc_addr(dst);
-
-               if(!dstp->p_softnotified) {
-                       dstp->next_soft_notify = softnotify;
-                       softnotify = dstp;
-                       dstp->p_softnotified = 1;
-       
-                       if (RTS_ISSET(sys, RECEIVING)) {
-                               sys->p_messbuf->m_source = SYSTEM;
-                               RTS_UNSET(sys, RECEIVING);
-                       }
-               }
-       }
-
-       if(u) { unlock; }
-
-       return OK;
+  return(result);
 }
 
 /*===========================================================================*
@@ -1117,15 +1053,19 @@ register struct proc *rp;       /* this process is now runnable */
   int q;                                       /* scheduling queue to use */
   int front;                                   /* add to front or back */
 
+  NOREC_ENTER(enqueuefunc);
+
 #if DEBUG_SCHED_CHECK
   if(!intr_disabled()) { minix_panic("enqueue with interrupts enabled", NO_NUM); }
-  CHECK_RUNQUEUES;
   if (rp->p_ready) minix_panic("enqueue already ready process", NO_NUM);
 #endif
 
   /* Determine where to insert to process. */
   sched(rp, &q, &front);
 
+  vmassert(q >= 0);
+  vmassert(q < IDLE_Q || rp->p_endpoint == IDLE);
+
   /* Now add the process to the queue. */
   if (rdy_head[q] == NIL_PROC) {               /* add to empty queue */
       rdy_head[q] = rdy_tail[q] = rp;          /* create a new queue */
@@ -1141,19 +1081,25 @@ register struct proc *rp;       /* this process is now runnable */
       rp->p_nextready = NIL_PROC;              /* mark new end */
   }
 
+#if DEBUG_SCHED_CHECK
+  rp->p_ready = 1;
+  CHECK_RUNQUEUES;
+#endif
+
   /* Now select the next process to run, if there isn't a current
    * process yet or current process isn't ready any more, or
    * it's PREEMPTIBLE.
    */
-  if(!proc_ptr || proc_ptr->p_rts_flags ||
-    (priv(proc_ptr)->s_flags & PREEMPTIBLE)) {
+       vmassert(proc_ptr);
+  if((proc_ptr->p_priority > rp->p_priority) &&
+   (priv(proc_ptr)->s_flags & PREEMPTIBLE)) 
      pick_proc();
-  }
 
 #if DEBUG_SCHED_CHECK
-  rp->p_ready = 1;
   CHECK_RUNQUEUES;
 #endif
+
+  NOREC_RETURN(enqueuefunc, );
 }
 
 /*===========================================================================*
@@ -1170,14 +1116,17 @@ register struct proc *rp;       /* this process is no longer runnable */
   register struct proc **xpp;                  /* iterate over queue */
   register struct proc *prev_xp;
 
+  NOREC_ENTER(dequeuefunc);
+
+#if DEBUG_STACK_CHECK
   /* Side-effect for kernel: check if the task's stack still is ok? */
   if (iskernelp(rp)) {                                 
        if (*priv(rp)->s_stack_guard != STACK_GUARD)
                minix_panic("stack overrun by task", proc_nr(rp));
   }
+#endif
 
 #if DEBUG_SCHED_CHECK
-  CHECK_RUNQUEUES;
   if(!intr_disabled()) { minix_panic("dequeue with interrupts enabled", NO_NUM); }
   if (! rp->p_ready) minix_panic("dequeue() already unready process", NO_NUM);
 #endif
@@ -1193,17 +1142,23 @@ register struct proc *rp;       /* this process is no longer runnable */
           *xpp = (*xpp)->p_nextready;          /* replace with next chain */
           if (rp == rdy_tail[q])               /* queue tail removed */
               rdy_tail[q] = prev_xp;           /* set new tail */
+
+#if DEBUG_SCHED_CHECK
+               rp->p_ready = 0;
+                 CHECK_RUNQUEUES;
+#endif
           if (rp == proc_ptr || rp == next_ptr)        /* active process removed */
-              pick_proc();                     /* pick new process to run */
+              pick_proc();             /* pick new process to run */
           break;
       }
       prev_xp = *xpp;                          /* save previous in chain */
   }
 
 #if DEBUG_SCHED_CHECK
-  rp->p_ready = 0;
   CHECK_RUNQUEUES;
 #endif
+
+  NOREC_RETURN(dequeuefunc, );
 }
 
 /*===========================================================================*
@@ -1249,25 +1204,29 @@ PRIVATE void pick_proc()
  * clock task can tell who to bill for system time.
  */
   register struct proc *rp;                    /* process to run */
-  int q;                                       /* iterate over queues */
+  int q;                               /* iterate over queues */
+
+  NOREC_ENTER(pick);
 
   /* Check each of the scheduling queues for ready processes. The number of
    * queues is defined in proc.h, and priorities are set in the task table.
    * The lowest queue contains IDLE, which is always ready.
    */
   for (q=0; q < NR_SCHED_QUEUES; q++) {        
-      if ( (rp = rdy_head[q]) != NIL_PROC) {
-          next_ptr = rp;                       /* run process 'rp' next */
-#if 0
-         if(rp->p_endpoint != 4 && rp->p_endpoint != 5 && rp->p_endpoint != IDLE && rp->p_endpoint != SYSTEM)
-               kprintf("[run %s]",  rp->p_name);
-#endif
-          if (priv(rp)->s_flags & BILLABLE)            
-              bill_ptr = rp;                   /* bill for system time */
-          return;                               
-      }
+       int found = 0;
+       if(!(rp = rdy_head[q])) {
+               TRACE(VF_PICKPROC, printf("queue %d empty\n", q););
+               continue;
+       }
+       TRACE(VF_PICKPROC, printf("found %s / %d on queue %d\n", 
+               rp->p_name, rp->p_endpoint, q););
+       next_ptr = rp;                  /* run process 'rp' next */
+       vmassert(proc_ptr != next_ptr);
+       vmassert(!next_ptr->p_rts_flags);
+       if (priv(rp)->s_flags & BILLABLE)               
+               bill_ptr = rp;          /* bill for system time */
+       NOREC_RETURN(pick, );
   }
-  minix_panic("no ready process", NO_NUM);
 }
 
 /*===========================================================================*
@@ -1286,9 +1245,11 @@ timer_t *tp;                                     /* watchdog timer pointer */
   clock_t next_period;                         /* time of next period  */
   int ticks_added = 0;                         /* total time added */
 
+  vmassert(!intr_disabled());
+
+  lock;
   for (rp=BEG_PROC_ADDR; rp<END_PROC_ADDR; rp++) {
       if (! isemptyp(rp)) {                            /* check slot use */
-         lock;
          if (rp->p_priority > rp->p_max_priority) {    /* update priority? */
              if (rp->p_rts_flags == 0) dequeue(rp);    /* take off queue */
              ticks_added += rp->p_quantum_size;        /* do accounting */
@@ -1299,12 +1260,9 @@ timer_t *tp;                                     /* watchdog timer pointer */
              ticks_added += rp->p_quantum_size - rp->p_ticks_left;
               rp->p_ticks_left = rp->p_quantum_size;   /* give new quantum */
          }
-         unlock;
       }
   }
-#if DEBUG
-  kprintf("ticks_added: %d\n", ticks_added);
-#endif
+  unlock;
 
   /* Now schedule a new watchdog timer to balance the queues again.  The 
    * period depends on the total amount of quantum ticks added.
@@ -1328,37 +1286,6 @@ message *m_ptr;                  /* pointer to message buffer */
   return(result);
 }
 
-/*===========================================================================*
- *                             lock_enqueue                                 *
- *===========================================================================*/
-PUBLIC void lock_enqueue(rp)
-struct proc *rp;               /* this process is now runnable */
-{
-/* Safe gateway to enqueue() for tasks. */
-  lock;
-  enqueue(rp);
-  unlock;
-}
-
-/*===========================================================================*
- *                             lock_dequeue                                 *
- *===========================================================================*/
-PUBLIC void lock_dequeue(rp)
-struct proc *rp;               /* this process is no longer runnable */
-{
-/* Safe gateway to dequeue() for tasks. */
-  if (k_reenter >= 0) {
-       /* We're in an exception or interrupt, so don't lock (and ... 
-        * don't unlock).
-        */
-       dequeue(rp);
-  } else {
-       lock;
-       dequeue(rp);
-       unlock;
-  }
-}
-
 /*===========================================================================*
  *                             endpoint_lookup                              *
  *===========================================================================*/
@@ -1401,24 +1328,18 @@ int *p, fatalflag;
        *p = _ENDPOINT_P(e);
        if(!isokprocn(*p)) {
 #if DEBUG_ENABLE_IPC_WARNINGS
-#if 0
                kprintf("kernel:%s:%d: bad endpoint %d: proc %d out of range\n",
                file, line, e, *p);
-#endif
 #endif
        } else if(isemptyn(*p)) {
-#if DEBUG_ENABLE_IPC_WARNINGS
 #if 0
        kprintf("kernel:%s:%d: bad endpoint %d: proc %d empty\n", file, line, e, *p);
-#endif
 #endif
        } else if(proc_addr(*p)->p_endpoint != e) {
 #if DEBUG_ENABLE_IPC_WARNINGS
-#if 0
                kprintf("kernel:%s:%d: bad endpoint %d: proc %d has ept %d (generation %d vs. %d)\n", file, line,
                e, *p, proc_addr(*p)->p_endpoint,
                _ENDPOINT_G(e), _ENDPOINT_G(proc_addr(*p)->p_endpoint));
-#endif
 #endif
        } else ok = 1;
        if(!ok && fatalflag) {
index aa3752f0070e78ae726a002244cae9fc072a45df..ac07514b04468354f264f8db948df8d8a1d1117a 100755 (executable)
@@ -10,6 +10,7 @@
  * struct proc, be sure to change sconst.h to match.
  */
 #include <minix/com.h>
+#include <minix/portio.h>
 #include "const.h"
 #include "priv.h"
 
@@ -39,7 +40,6 @@ struct proc {
   struct proc *p_nextready;    /* pointer to next ready process */
   struct proc *p_caller_q;     /* head of list of procs wishing to send */
   struct proc *p_q_link;       /* link to next proc wishing to send */
-  message *p_messbuf;          /* pointer to passed message buffer */
   int p_getfrom_e;             /* from whom does process want to receive? */
   int p_sendto_e;              /* to whom does process want to send? */
 
@@ -49,6 +49,11 @@ struct proc {
 
   endpoint_t p_endpoint;       /* endpoint number, generation-aware */
 
+  message p_sendmsg;           /* Message from this process if SENDING */
+  message p_delivermsg;                /* Message for this process if MF_DELIVERMSG */
+  vir_bytes p_delivermsg_vir;  /* Virtual addr this proc wants message at */
+  vir_bytes p_delivermsg_lin;  /* Linear addr this proc wants message at */
+
   /* If handler functions detect a process wants to do something with
    * memory that isn't present, VM has to fix it. Until it has asked
    * what needs to be done and fixed it, save necessary state here.
@@ -60,28 +65,12 @@ struct proc {
        struct proc     *nextrestart;   /* next in vmrestart chain */
        struct proc     *nextrequestor; /* next in vmrequest chain */
 #define VMSTYPE_SYS_NONE       0
-#define VMSTYPE_SYS_MESSAGE    1
-#define VMSTYPE_SYS_CALL       2
-#define VMSTYPE_MSGCOPY                3
+#define VMSTYPE_KERNELCALL     1
+#define VMSTYPE_DELIVERMSG     2
        int             type;           /* suspended operation */
        union {
                /* VMSTYPE_SYS_MESSAGE */
                message         reqmsg; /* suspended request message */
-
-               /* VMSTYPE_SYS_CALL */
-               struct {
-                       int call_nr;
-                       message *m_ptr;
-                       int src_dst_e;
-                       long bit_map;
-               } sys_call;
-
-               /* VMSTYPE_MSGCOPY */
-               struct {
-                       struct proc     *dst;
-                       vir_bytes       dst_v;
-                       message         msgbuf;
-               } msgcopy;
        } saved;
 
        /* Parameters of request to VM */
@@ -92,10 +81,9 @@ struct proc {
        /* VM result when available */
        int             vmresult;
 
-       /* Target gets this set. (But caller and target can be
-        * the same, so we can't put this in the 'saved' union.)
-       */
-       struct proc     *requestor;
+#if DEBUG_VMASSERT
+       char stacktrace[200];
+#endif
 
        /* If the suspended operation is a sys_call, its details are
         * stored here.
@@ -110,21 +98,26 @@ struct proc {
 #define PMAGIC 0xC0FFEE1
   int p_magic; /* check validity of proc pointers */
 #endif
+
+#if DEBUG_TRACE
+  int p_schedules;
+#endif
 };
 
 /* Bits for the runtime flags. A process is runnable iff p_rts_flags == 0. */
-#define SLOT_FREE      0x01    /* process slot is free */
-#define NO_PRIORITY     0x02   /* process has been stopped */
-#define SENDING                0x04    /* process blocked trying to send */
-#define RECEIVING      0x08    /* process blocked trying to receive */
-#define SIGNALED       0x10    /* set when new kernel signal arrives */
-#define SIG_PENDING    0x20    /* unready while signal being processed */
-#define P_STOP         0x40    /* set when process is being traced */
-#define NO_PRIV                0x80    /* keep forked system process from running */
-#define NO_ENDPOINT    0x100   /* process cannot send or receive messages */
-#define VMINHIBIT      0x200   /* not scheduled until pagetable set by VM */
-#define PAGEFAULT      0x400   /* process has unhandled pagefault */
-#define VMREQUEST      0x800   /* originator of vm memory request */
+#define SLOT_FREE       0x01   /* process slot is free */
+#define NO_PRIORITY      0x02  /* process has been stopped */
+#define SENDING                 0x04   /* process blocked trying to send */
+#define RECEIVING       0x08   /* process blocked trying to receive */
+#define SIGNALED        0x10   /* set when new kernel signal arrives */
+#define SIG_PENDING     0x20   /* unready while signal being processed */
+#define P_STOP          0x40   /* set when process is being traced */
+#define NO_PRIV                 0x80   /* keep forked system process from running */
+#define NO_ENDPOINT     0x100  /* process cannot send or receive messages */
+#define VMINHIBIT       0x200  /* not scheduled until pagetable set by VM */
+#define PAGEFAULT       0x400  /* process has unhandled pagefault */
+#define VMREQUEST       0x800  /* originator of vm memory request */
+#define VMREQTARGET    0x1000  /* target of vm memory request */
 
 /* These runtime flags can be tested and manipulated by these macros. */
 
@@ -134,49 +127,62 @@ struct proc {
 /* Set flag and dequeue if the process was runnable. */
 #define RTS_SET(rp, f)                                                 \
        do {                                                            \
+               vmassert(intr_disabled());                              \
                if(!(rp)->p_rts_flags) { dequeue(rp); }                 \
                (rp)->p_rts_flags |=  (f);                              \
+               vmassert(intr_disabled());                              \
        } while(0)
 
 /* Clear flag and enqueue if the process was not runnable but is now. */
 #define RTS_UNSET(rp, f)                                               \
        do {                                                            \
                int rts;                                                \
-               rts = (rp)->p_rts_flags;                                        \
+               vmassert(intr_disabled());                              \
+               rts = (rp)->p_rts_flags;                                \
                (rp)->p_rts_flags &= ~(f);                              \
                if(rts && !(rp)->p_rts_flags) { enqueue(rp); }          \
+               vmassert(intr_disabled());                              \
        } while(0)
 
 /* Set flag and dequeue if the process was runnable. */
 #define RTS_LOCK_SET(rp, f)                                            \
        do {                                                            \
-               if(!(rp)->p_rts_flags) { lock_dequeue(rp); }            \
+               int u = 0;                                              \
+               if(!intr_disabled()) { u = 1; lock; }                   \
+               if(!(rp)->p_rts_flags) { dequeue(rp); }                 \
                (rp)->p_rts_flags |=  (f);                              \
+               if(u) { unlock; }                                       \
        } while(0)
 
 /* Clear flag and enqueue if the process was not runnable but is now. */
 #define RTS_LOCK_UNSET(rp, f)                                          \
        do {                                                            \
                int rts;                                                \
-               rts = (rp)->p_rts_flags;                                        \
+               int u = 0;                                              \
+               if(!intr_disabled()) { u = 1; lock; }                   \
+               rts = (rp)->p_rts_flags;                                \
                (rp)->p_rts_flags &= ~(f);                              \
-               if(rts && !(rp)->p_rts_flags) { lock_enqueue(rp); }     \
+               if(rts && !(rp)->p_rts_flags) { enqueue(rp); }          \
+               if(u) { unlock; }                                       \
        } while(0)
 
 /* Set flags to this value. */
 #define RTS_LOCK_SETFLAGS(rp, f)                                       \
        do {                                                            \
-               if(!(rp)->p_rts_flags && (f)) { lock_dequeue(rp); }     \
-               (rp)->p_rts_flags = (f);                                        \
+               int u = 0;                                              \
+               if(!intr_disabled()) { u = 1; lock; }                   \
+               if(!(rp)->p_rts_flags && (f)) { dequeue(rp); }          \
+               (rp)->p_rts_flags = (f);                                \
+               if(u) { unlock; }                                       \
        } while(0)
 
 /* Misc flags */
-#define REPLY_PENDING  0x01    /* reply to IPC_REQUEST is pending */
-#define VIRT_TIMER     0x02    /* process-virtual timer is running */
-#define PROF_TIMER     0x04    /* process-virtual profile timer is running */
-#define MF_VM          0x08    /* process uses VM */
+#define MF_REPLY_PEND  0x01    /* reply to IPC_REQUEST is pending */
+#define MF_VIRT_TIMER  0x02    /* process-virtual timer is running */
+#define MF_PROF_TIMER  0x04    /* process-virtual profile timer is running */
 #define MF_ASYNMSG     0x10    /* Asynchrous message pending */
 #define MF_FULLVM      0x20
+#define MF_DELIVERMSG  0x40    /* Copy message for him before running */
 
 /* Scheduling priorities for p_priority. Values must start at zero (highest
  * priority) and increment.  Priorities of the processes in the boot image 
index 7536765c594216e42c49859210c34619e10eeb01..929950585813574d96e21e1612075c1325dc6edd 100755 (executable)
@@ -33,13 +33,12 @@ _PROTOTYPE( int sys_call, (int call_nr, int src_dst,
                                        message *m_ptr, long bit_map)   );
 _PROTOTYPE( void sys_call_restart, (struct proc *caller)               );
 _PROTOTYPE( int lock_notify, (int src, int dst)                                );
-_PROTOTYPE( int soft_notify, (int dst)                                 );
+_PROTOTYPE( int mini_notify, (struct proc *src, endpoint_t dst)                );
 _PROTOTYPE( int lock_send, (int dst, message *m_ptr)                   );
-_PROTOTYPE( void lock_enqueue, (struct proc *rp)                       );
-_PROTOTYPE( void lock_dequeue, (struct proc *rp)                       );
 _PROTOTYPE( void enqueue, (struct proc *rp)                            );
 _PROTOTYPE( void dequeue, (struct proc *rp)                            );
 _PROTOTYPE( void balance_queues, (struct timer *tp)                    );
+_PROTOTYPE( void schedcheck, (void)                                    );
 _PROTOTYPE( struct proc *endpoint_lookup, (endpoint_t ep)              );
 #if DEBUG_ENABLE_IPC_WARNINGS
 _PROTOTYPE( int isokendpt_f, (char *file, int line, endpoint_t e, int *p, int f));
@@ -91,6 +90,8 @@ _PROTOTYPE( void cons_seth, (int pos, int n)                          );
 #define CHECK_RUNQUEUES check_runqueues_f(__FILE__, __LINE__)
 _PROTOTYPE( void check_runqueues_f, (char *file, int line) );
 #endif
+_PROTOTYPE( char *rtsflagstr, (int flags) );
+_PROTOTYPE( char *miscflagstr, (int flags) );
 
 /* system/do_safecopy.c */
 _PROTOTYPE( int verify_grant, (endpoint_t, endpoint_t, cp_grant_id_t, vir_bytes,
@@ -106,18 +107,21 @@ _PROTOTYPE( void stop_profile_clock, (void)                               );
 #endif
 
 /* functions defined in architecture-dependent files. */
-_PROTOTYPE( void phys_copy, (phys_bytes source, phys_bytes dest,
+_PROTOTYPE( phys_bytes phys_copy, (phys_bytes source, phys_bytes dest,
                 phys_bytes count)                                       );
+_PROTOTYPE( void phys_copy_fault, (void));
 #define virtual_copy(src, dst, bytes) virtual_copy_f(src, dst, bytes, 0)
 #define virtual_copy_vmcheck(src, dst, bytes) virtual_copy_f(src, dst, bytes, 1)
 _PROTOTYPE( int virtual_copy_f, (struct vir_addr *src, struct vir_addr *dst, 
                                vir_bytes bytes, int vmcheck)           );
 _PROTOTYPE( int data_copy, (endpoint_t from, vir_bytes from_addr,
        endpoint_t to, vir_bytes to_addr, size_t bytes));
+_PROTOTYPE( int data_copy_vmcheck, (endpoint_t from, vir_bytes from_addr,
+       endpoint_t to, vir_bytes to_addr, size_t bytes));
 #define data_copy_to(d, p, v, n) data_copy(SYSTEM, (d), (p), (v), (n));
 #define data_copy_from(d, p, v, n) data_copy((p), (v), SYSTEM, (d), (n));
 _PROTOTYPE( void alloc_segments, (struct proc *rp)                      );
-_PROTOTYPE( void vm_init, (void)                                               );
+_PROTOTYPE( void vm_init, (struct proc *first)                         );
 _PROTOTYPE( void vm_map_range, (u32_t base, u32_t size, u32_t offset)   );
 _PROTOTYPE( int vm_copy, (vir_bytes src, struct proc *srcproc,
        vir_bytes dst, struct proc *dstproc, phys_bytes bytes));
@@ -130,7 +134,7 @@ _PROTOTYPE( phys_bytes umap_remote, (struct proc* rp, int seg,
 _PROTOTYPE( phys_bytes umap_virtual, (struct proc* rp, int seg,
         vir_bytes vir_addr, vir_bytes bytes)                           );
 _PROTOTYPE( phys_bytes seg2phys, (U16_t)                                );
-_PROTOTYPE( void phys_memset, (phys_bytes source, unsigned long pattern,
+_PROTOTYPE( int vm_phys_memset, (phys_bytes source, u8_t pattern,
                 phys_bytes count)                                       );
 _PROTOTYPE( vir_bytes alloc_remote_segment, (u32_t *, segframe_t *,
         int, phys_bytes, vir_bytes, int));
@@ -164,5 +168,10 @@ _PROTOTYPE( int vm_checkrange, (struct proc *caller, struct proc *target,
        vir_bytes start, vir_bytes length, int writeflag, int checkonly));
 _PROTOTYPE( void proc_stacktrace, (struct proc *proc)           );
 _PROTOTYPE( int vm_lookup, (struct proc *proc, vir_bytes virtual, vir_bytes *result, u32_t *ptent));
+_PROTOTYPE( int vm_suspend, (struct proc *caller, struct proc *target,
+       phys_bytes lin, phys_bytes size, int wrflag, int type));
+_PROTOTYPE( int delivermsg, (struct proc *target));
+_PROTOTYPE( phys_bytes arch_switch_copymsg, (struct proc *rp, message *m,
+       phys_bytes lin));
 
 #endif /* PROTO_H */
index 992770ee7cf0895eb9699eaa661dfa2310b5f132..cdc4cc65667989cbe871fabcf53f09aada8fa454 100755 (executable)
@@ -40,6 +40,7 @@
 #include <sys/sigcontext.h>
 #include <minix/endpoint.h>
 #include <minix/safecopies.h>
+#include <minix/portio.h>
 #include <minix/u64.h>
 #include <sys/vm_i386.h>
 
@@ -58,7 +59,6 @@ char *callnames[NR_SYS_CALLS];
     call_vec[(call_nr-KERNEL_CALL)] = (handler)  
 
 FORWARD _PROTOTYPE( void initialize, (void));
-FORWARD _PROTOTYPE( void softnotify_check, (void));
 FORWARD _PROTOTYPE( struct proc *vmrestart_check, (message *));
 
 /*===========================================================================*
@@ -77,26 +77,18 @@ PUBLIC void sys_task()
   /* Initialize the system task. */
   initialize();
 
+
   while (TRUE) {
       struct proc *restarting;
 
       restarting = vmrestart_check(&m);
-      softnotify_check();
-       if(softnotify)
-               minix_panic("softnotify non-NULL before receive (1)", NO_NUM);
 
       if(!restarting) {
         int r;
        /* Get work. Block and wait until a request message arrives. */
-       if(softnotify)
-               minix_panic("softnotify non-NULL before receive (2)", NO_NUM);
        if((r=receive(ANY, &m)) != OK)
                minix_panic("receive() failed", r);
-       if(m.m_source == SYSTEM)
-               continue;
-       if(softnotify)
-               minix_panic("softnotify non-NULL after receive", NO_NUM);
-      }
+      } 
 
       sys_call_code = (unsigned) m.m_type;
       call_nr = sys_call_code - KERNEL_CALL;   
@@ -104,37 +96,13 @@ PUBLIC void sys_task()
       okendpt(who_e, &who_p);
       caller_ptr = proc_addr(who_p);
 
-       if (caller_ptr->p_endpoint == ipc_stats_target)
-               sys_stats.total= add64u(sys_stats.total, 1);
-
       /* See if the caller made a valid request and try to handle it. */
       if (call_nr < 0 || call_nr >= NR_SYS_CALLS) {    /* check call number */
-#if DEBUG_ENABLE_IPC_WARNINGS
          kprintf("SYSTEM: illegal request %d from %d.\n",
                call_nr,m.m_source);
-#endif
-       if (caller_ptr->p_endpoint == ipc_stats_target)
-               sys_stats.bad_req++;
          result = EBADREQUEST;                 /* illegal message type */
       } 
       else if (!GET_BIT(priv(caller_ptr)->s_k_call_mask, call_nr)) {
-#if DEBUG_ENABLE_IPC_WARNINGS
-       static int curr= 0, limit= 100, extra= 20;
-
-       if (curr < limit+extra)
-       {
-               kprintf("SYSTEM: request %d from %d denied.\n",
-                       call_nr, m.m_source);
-       } else if (curr == limit+extra)
-       {
-               kprintf("sys_task: no debug output for a while\n");
-       }
-       else if (curr == 2*limit-1)
-               limit *= 2;
-       curr++;
-#endif
-       if (caller_ptr->p_endpoint == ipc_stats_target)
-               sys_stats.not_allowed++;
          result = ECALLDENIED;                 /* illegal message type */
       }
       else {
@@ -146,15 +114,20 @@ PUBLIC void sys_task()
         * until VM tells us it's allowed. VM has been notified
         * and we must wait for its reply to restart the call.
         */
+        vmassert(RTS_ISSET(caller_ptr, VMREQUEST));
+       vmassert(caller_ptr->p_vmrequest.type == VMSTYPE_KERNELCALL);
        memcpy(&caller_ptr->p_vmrequest.saved.reqmsg, &m, sizeof(m));
-       caller_ptr->p_vmrequest.type = VMSTYPE_SYS_MESSAGE;
       } else if (result != EDONTREPLY) {
        /* Send a reply, unless inhibited by a handler function.
         * Use the kernel function lock_send() to prevent a system
         * call trap.
         */
-               if(restarting)
-                       RTS_LOCK_UNSET(restarting, VMREQUEST);
+               if(restarting) {
+                       vmassert(!RTS_ISSET(restarting, VMREQUEST));
+#if 0
+                       vmassert(!RTS_ISSET(restarting, VMREQTARGET));
+#endif
+               }
                m.m_type = result;              /* report status of call */
                if(WILLRECEIVE(caller_ptr, SYSTEM)) {
                  if (OK != (s=lock_send(m.m_source, &m))) {
@@ -222,7 +195,6 @@ PRIVATE void initialize(void)
   map(SYS_NEWMAP, do_newmap);          /* set up a process memory map */
   map(SYS_SEGCTL, do_segctl);          /* add segment and get selector */
   map(SYS_MEMSET, do_memset);          /* write char to memory area */
-  map(SYS_VM_SETBUF, do_vm_setbuf);    /* PM passes buffer for page tables */
   map(SYS_VMCTL, do_vmctl);            /* various VM process settings */
 
   /* Copying. */
@@ -350,7 +322,11 @@ PUBLIC void send_sig(int proc_nr, int sig_nr)
 
   rp = proc_addr(proc_nr);
   sigaddset(&priv(rp)->s_sig_pending, sig_nr);
-  soft_notify(rp->p_endpoint); 
+  if(!intr_disabled()) {
+         lock_notify(SYSTEM, rp->p_endpoint); 
+  } else {
+         mini_notify(proc_addr(SYSTEM), rp->p_endpoint); 
+  }
 }
 
 /*===========================================================================*
@@ -467,7 +443,9 @@ register struct proc *rc;           /* slot of process to clean up */
 
   if(isemptyp(rc)) minix_panic("clear_proc: empty process", rc->p_endpoint);
 
-  if(rc->p_endpoint == PM_PROC_NR || rc->p_endpoint == VFS_PROC_NR) {
+  if(rc->p_endpoint == PM_PROC_NR || rc->p_endpoint == VFS_PROC_NR ||
+       rc->p_endpoint == VM_PROC_NR)
+  {
        /* This test is great for debugging system processes dying,
         * but as this happens normally on reboot, not good permanent code.
         */
@@ -543,13 +521,6 @@ register struct proc *rc;          /* slot of process to clean up */
 #endif
       } 
   }
-
-  /* No pending soft notifies. */
-  for(np = softnotify; np; np = np->next_soft_notify) {
-    if(np == rc) {
-       minix_panic("dying proc was on next_soft_notify", np->p_endpoint);
-    }
-  }
 }
 
 /*===========================================================================*
@@ -583,28 +554,6 @@ int access;                     /* does grantee want to CPF_READ or _WRITE? */
         return umap_virtual(proc_addr(proc_nr), D, v_offset, bytes);
 } 
 
-/*===========================================================================*
- *                              softnotify_check                            *
- *===========================================================================*/
-PRIVATE void softnotify_check(void)
-{  
-       struct proc *np, *nextnp;
-
-       if(!softnotify) 
-               return;
-
-       for(np = softnotify; np; np = nextnp) {
-               if(!np->p_softnotified)
-                       minix_panic("softnotify but no p_softnotified", NO_NUM);
-               lock_notify(SYSTEM, np->p_endpoint);
-               nextnp = np->next_soft_notify;
-               np->next_soft_notify = NULL;
-               np->p_softnotified = 0;
-       }
-
-       softnotify = NULL;
-}
-
 /*===========================================================================*
  *                              vmrestart_check                            *
  *===========================================================================*/
@@ -618,23 +567,18 @@ PRIVATE struct proc *vmrestart_check(message *m)
        if(!(restarting = vmrestart))
                return NULL;
 
-       if(restarting->p_rts_flags & SLOT_FREE)
-          minix_panic("SYSTEM: VMREQUEST set for empty process", NO_NUM);
+       vmassert(!RTS_ISSET(restarting, SLOT_FREE));
+       vmassert(RTS_ISSET(restarting, VMREQUEST));
 
        type = restarting->p_vmrequest.type;
        restarting->p_vmrequest.type = VMSTYPE_SYS_NONE;
        vmrestart = restarting->p_vmrequest.nextrestart;
 
-       if(!RTS_ISSET(restarting, VMREQUEST))
-          minix_panic("SYSTEM: VMREQUEST not set for process on vmrestart queue",
-               restarting->p_endpoint);
-
        switch(type) {
-               case VMSTYPE_SYS_MESSAGE:
+               case VMSTYPE_KERNELCALL:
                        memcpy(m, &restarting->p_vmrequest.saved.reqmsg, sizeof(*m));
-                       if(m->m_source != restarting->p_endpoint)
-                          minix_panic("SYSTEM: vmrestart source doesn't match",
-                               NO_NUM);
+                       restarting->p_vmrequest.saved.reqmsg.m_source = NONE;
+                       vmassert(m->m_source == restarting->p_endpoint);
                        /* Original caller could've disappeared in the meantime. */
                        if(!isokendpt(m->m_source, &who_p)) {
                                kprintf("SYSTEM: ignoring call %d from dead %d\n",
@@ -653,26 +597,6 @@ PRIVATE struct proc *vmrestart_check(message *m)
                                }
                        }
                        return restarting;
-               case VMSTYPE_SYS_CALL:
-                       kprintf("SYSTEM: restart sys_call\n");
-                       /* Restarting a kernel trap. */
-                       sys_call_restart(restarting);
-
-                       /* Handled; restart system loop. */
-                       return NULL;
-               case VMSTYPE_MSGCOPY:
-                       /* Do delayed message copy. */
-                       if((r=data_copy(SYSTEM,
-                               (vir_bytes) &restarting->p_vmrequest.saved.msgcopy.msgbuf,
-                               restarting->p_vmrequest.saved.msgcopy.dst->p_endpoint,
-                               (vir_bytes) restarting->p_vmrequest.saved.msgcopy.dst_v,
-                               sizeof(message))) != OK) {
-                               minix_panic("SYSTEM: delayed msgcopy failed", r);
-                       }
-                       RTS_LOCK_UNSET(restarting, VMREQUEST);
-
-                       /* Handled; restart system loop. */
-                       return NULL;
                default:
                        minix_panic("strange restart type", type);
        }
index 14f55df3c0ac3715be9a7357e9023dc5a0561be5..d35c7a474ddad94db61733ed23e48fc345f9466a 100644 (file)
@@ -91,9 +91,6 @@ _PROTOTYPE( int do_memset, (message *m_ptr) );
 #define do_memset do_unused
 #endif
 
-_PROTOTYPE( int do_vm_setbuf, (message *m_ptr) );
-_PROTOTYPE( int do_vm_map, (message *m_ptr) );
-
 _PROTOTYPE( int do_abort, (message *m_ptr) );
 #if ! USE_ABORT
 #define do_abort do_unused
index 496663d1d57e0433de610be5d6333e30bfd0ff69..a93e64f532d55f92bef899e0ce0da70c0a4a777a 100644 (file)
@@ -52,7 +52,6 @@ OBJECTS       = \
        $(SYSTEM)(do_sigreturn.o) \
        $(SYSTEM)(do_abort.o) \
        $(SYSTEM)(do_getinfo.o) \
-       $(SYSTEM)(do_vm_setbuf.o) \
        $(SYSTEM)(do_sprofile.o) \
        $(SYSTEM)(do_cprofile.o) \
        $(SYSTEM)(do_profbuf.o) \
@@ -166,9 +165,6 @@ $(SYSTEM)(do_vm.o): do_vm.o
 do_vm.o:       do_vm.c
        $(CC) do_vm.c
 
-$(SYSTEM)(do_vm_setbuf.o):     do_vm_setbuf.c
-       $(CC) do_vm_setbuf.c
-
 $(SYSTEM)(do_sprofile.o):       do_sprofile.c
        $(CC) do_sprofile.c
 
index ee7e0a912ed444b5fe0567685ad2385cba9e8742..65834be388e9c472848e4fc09f0618790fd7e079 100644 (file)
@@ -63,19 +63,8 @@ register message *m_ptr;     /* pointer to request message */
        }
        if (i >= nr_io_range)
        {
-               static int curr= 0, limit= 100, extra= 20;
-
-               if (curr < limit+extra)
-               {
                        kprintf("do_devio: port 0x%x (size %d) not allowed\n",
                                m_ptr->DIO_PORT, size);
-               } else if (curr == limit+extra)
-               {
-                       kprintf("do_devio: no debug output for a while\n");
-               }
-               else if (curr == 2*limit-1)
-                       limit *= 2;
-               curr++;
                return EPERM;
        }
     }
@@ -83,19 +72,8 @@ register message *m_ptr;     /* pointer to request message */
 doit:
     if (m_ptr->DIO_PORT & (size-1))
     {
-       static int curr= 0, limit= 100, extra= 20;
-
-       if (curr < limit+extra)
-       {
                kprintf("do_devio: unaligned port 0x%x (size %d)\n",
                        m_ptr->DIO_PORT, size);
-       } else if (curr == limit+extra)
-       {
-               kprintf("do_devio: no debug output for a while\n");
-       }
-       else if (curr == 2*limit-1)
-               limit *= 2;
-       curr++;
        return EPERM;
     }
 
index c37eeb7c7bec5704d0ced9ae4bed413537244777..a608c21cf6b0aa6939549d4654ee13b68c7f9fa9 100644 (file)
@@ -31,6 +31,11 @@ register message *m_ptr;     /* pointer to request message */
 
   rp = proc_addr(proc_nr);
 
+  if(rp->p_misc_flags & MF_DELIVERMSG) {
+       rp->p_misc_flags &= ~MF_DELIVERMSG;
+       rp->p_delivermsg_lin = 0;
+  }
+
   /* Save command name for debugging, ps(1) output, etc. */
   if(data_copy(who_e, (vir_bytes) m_ptr->PR_NAME_PTR,
        SYSTEM, (vir_bytes) rp->p_name, (phys_bytes) P_NAME_LEN - 1) != OK)
index 2e5ee9135db7c8e51d4457a54c5d8b528eba054c..aa94a3418fceaf89f8311dbaf29a3930d015a9aa 100644 (file)
@@ -9,6 +9,7 @@
  */
 
 #include "../system.h"
+#include "../vm.h"
 #include <signal.h>
 
 #include <minix/endpoint.h>
@@ -33,10 +34,25 @@ register message *m_ptr;    /* pointer to request message */
 
   if(!isokendpt(m_ptr->PR_ENDPT, &p_proc))
        return EINVAL;
+
   rpp = proc_addr(p_proc);
   rpc = proc_addr(m_ptr->PR_SLOT);
   if (isemptyp(rpp) || ! isemptyp(rpc)) return(EINVAL);
 
+  vmassert(!(rpp->p_misc_flags & MF_DELIVERMSG));
+
+  /* needs to be receiving so we know where the message buffer is */
+  if(!RTS_ISSET(rpp, RECEIVING)) {
+       printf("kernel: fork not done synchronously?\n");
+       return EINVAL;
+  }
+
+  /* memory becomes readonly */
+  if (priv(rpp)->s_asynsize > 0) {
+       printf("kernel: process with waiting asynsend table can't fork\n");
+       return EINVAL;
+  }
+
   map_ptr= (struct mem_map *) m_ptr->PR_MEM_PTR;
 
   /* Copy parent 'proc' struct to child. And reinitialize some fields. */
@@ -59,7 +75,7 @@ register message *m_ptr;      /* pointer to request message */
 
   rpc->p_reg.psw &= ~TRACEBIT;         /* clear trace bit */
 
-  rpc->p_misc_flags &= ~(VIRT_TIMER | PROF_TIMER);
+  rpc->p_misc_flags &= ~(MF_VIRT_TIMER | MF_PROF_TIMER);
   rpc->p_virt_left = 0;                /* disable, clear the process-virtual timers */
   rpc->p_prof_left = 0;
 
@@ -81,9 +97,11 @@ register message *m_ptr;     /* pointer to request message */
 
   /* Calculate endpoint identifier, so caller knows what it is. */
   m_ptr->PR_ENDPT = rpc->p_endpoint;
+  m_ptr->PR_FORK_MSGADDR = (char *) rpp->p_delivermsg_vir;
 
   /* Install new map */
   r = newmap(rpc, map_ptr);
+  FIXLINMSG(rpc);
 
   /* Don't schedule process in VM mode until it has a new pagetable. */
   if(m_ptr->PR_FORK_FLAGS & PFF_VMINHIBIT) {
index e40889e1c51c52638a1f22952bba44f6391e0fb1..ecafdc2cb765e9a15f44cdb4c40e3d2982f21c5d 100644 (file)
@@ -28,9 +28,8 @@ register message *m_ptr;      /* pointer to request message */
  */
   size_t length;
   vir_bytes src_vir; 
-  int proc_nr, nr_e, nr;
+  int proc_nr, nr_e, nr, r;
   struct proc *caller;
-  phys_bytes ph;
   int wipe_rnd_bin = -1;
 
   caller = proc_addr(who_p);
@@ -67,19 +66,6 @@ register message *m_ptr;     /* pointer to request message */
         src_vir = (vir_bytes) irq_hooks;
         break;
     }
-    case GET_SCHEDINFO: {
-        /* This is slightly complicated because we need two data structures
-         * at once, otherwise the scheduling information may be incorrect.
-         * Copy the queue heads and fall through to copy the process table. 
-         */
-       if((ph=umap_local(caller, D, (vir_bytes) m_ptr->I_VAL_PTR2,length)) == 0)
-               return EFAULT;
-        length = sizeof(struct proc *) * NR_SCHED_QUEUES;
-       CHECKRANGE_OR_SUSPEND(proc_addr(who_p), ph, length, 1);
-       data_copy(SYSTEM, (vir_bytes) rdy_head,
-               who_e, (vir_bytes) m_ptr->I_VAL_PTR2, length);
-        /* fall through to GET_PROCTAB */
-    }
     case GET_PROCTAB: {
         length = sizeof(struct proc) * (NR_PROCS + NR_TASKS);
         src_vir = (vir_bytes) proc;
@@ -174,15 +160,16 @@ register message *m_ptr;  /* pointer to request message */
 
   /* Try to make the actual copy for the requested data. */
   if (m_ptr->I_VAL_LEN > 0 && length > m_ptr->I_VAL_LEN) return (E2BIG);
-  if((ph=umap_local(caller, D, (vir_bytes) m_ptr->I_VAL_PTR,length)) == 0)
-       return EFAULT;
-  CHECKRANGE_OR_SUSPEND(caller, ph, length, 1);
-  if(data_copy(SYSTEM, src_vir, who_e, (vir_bytes) m_ptr->I_VAL_PTR, length) == OK) {
+  r = data_copy_vmcheck(SYSTEM, src_vir, who_e,
+       (vir_bytes) m_ptr->I_VAL_PTR, length);
+
+  if(r != OK) return r;
+
        if(wipe_rnd_bin >= 0 && wipe_rnd_bin < RANDOM_SOURCES) {
                krandom.bin[wipe_rnd_bin].r_size = 0;
                krandom.bin[wipe_rnd_bin].r_next = 0;
        }
-  }
+
   return(OK);
 }
 
index 041b77b4c78182170545f0940b2d45ac04049c5f..bc3a433241f14b333b6feaeeb9bf060f917a2e0b 100644 (file)
@@ -139,10 +139,16 @@ irq_hook_t *hook;
  */
   int proc_nr;
 
+  vmassert(intr_disabled());
+
   /* As a side-effect, the interrupt handler gathers random information by 
    * timestamping the interrupt events. This is used for /dev/random.
    */
+#if 0
   get_randomness(&krandom, hook->irq);
+#else
+  FIXME("get_randomness disabled");
+#endif
 
   /* Check if the handler is still alive.
    * If it's dead, this should never happen, as processes that die 
@@ -158,7 +164,8 @@ irq_hook_t *hook;
   priv(proc_addr(proc_nr))->s_int_pending |= (1 << hook->notify_id);
 
   /* Build notification message and return. */
-  lock_notify(HARDWARE, hook->proc_nr_e);
+  vmassert(intr_disabled());
+  mini_notify(proc_addr(HARDWARE), hook->proc_nr_e);
   return(hook->policy & IRQ_REENABLE);
 }
 
index 511507042d683eec1ad324c7dd35fe60e484139f..1359112b634ecd9d5e626a7d8ae1c53008aceba4 100644 (file)
@@ -8,6 +8,7 @@
  */
 
 #include "../system.h"
+#include "../vm.h"
 
 #if USE_MEMSET
 
@@ -18,10 +19,8 @@ PUBLIC int do_memset(m_ptr)
 register message *m_ptr;
 {
 /* Handle sys_memset(). This writes a pattern into the specified memory. */
-  unsigned long p;
   unsigned char c = m_ptr->MEM_PATTERN;
-  p = c | (c << 8) | (c << 16) | (c << 24);
-  phys_memset((phys_bytes) m_ptr->MEM_PTR, p, (phys_bytes) m_ptr->MEM_COUNT);
+  vm_phys_memset((phys_bytes) m_ptr->MEM_PTR, c, (phys_bytes) m_ptr->MEM_COUNT);
   return(OK);
 }
 
index 55744b7144543ecd710ac37a4e815942f6fc4e7d..2e7d7f8ee35400af66f9540913520e5138fcfa93 100644 (file)
@@ -61,22 +61,11 @@ endpoint_t *e_granter;              /* new granter (magic grants) */
        if(!HASGRANTTABLE(granter_proc)) return EPERM;
 
        if(priv(granter_proc)->s_grant_entries <= grant) {
-               static int curr= 0, limit= 100, extra= 20;
-
-               if (curr < limit+extra)
-               {
                        kprintf(
                        "verify_grant: grant verify failed in ep %d proc %d: "
                        "grant %d out of range for table size %d\n",
                                granter, proc_nr, grant,
                                priv(granter_proc)->s_grant_entries);
-               } else if (curr == limit+extra)
-               {
-                       kprintf("verify_grant: no debug output for a while\n");
-               }
-               else if (curr == 2*limit-1)
-                       limit *= 2;
-               curr++;
                return(EPERM);
        }
 
@@ -219,23 +208,9 @@ int access;                        /* CPF_READ for a copy from granter to grantee, CPF_WRITE
        /* Verify permission exists. */
        if((r=verify_grant(granter, grantee, grantid, bytes, access,
            g_offset, &v_offset, &new_granter)) != OK) {
-               static int curr= 0, limit= 100, extra= 20;
-
-               if (curr < limit+extra)
-               {
-#if 0
                        kprintf(
                "grant %d verify to copy %d->%d by %d failed: err %d\n",
                                grantid, *src, *dst, grantee, r);
-#endif
-               } else if (curr == limit+extra)
-               {
-                       kprintf(
-                       "do_safecopy`safecopy: no debug output for a while\n");
-               }
-               else if (curr == 2*limit-1)
-                       limit *= 2;
-               curr++;
                return r;
        }
 
index ba340dec43c5a037f6b635d68f946124df29e9dd..879a05ad099265b460d662566c201bc98a8f3c00 100644 (file)
@@ -29,18 +29,13 @@ message *m_ptr;                     /* pointer to request message */
   struct sigcontext sc, *scp;
   struct sigframe fr, *frp;
   int proc_nr, r;
-  phys_bytes ph;
 
   if (!isokendpt(m_ptr->SIG_ENDPT, &proc_nr)) return(EINVAL);
   if (iskerneln(proc_nr)) return(EPERM);
   rp = proc_addr(proc_nr);
 
-  ph = umap_local(proc_addr(who_p), D, (vir_bytes) m_ptr->SIG_CTXT_PTR, sizeof(struct sigmsg));
-  if(!ph) return EFAULT;
-  CHECKRANGE_OR_SUSPEND(proc_addr(who_p), ph, sizeof(struct sigmsg), 1);
-
   /* Get the sigmsg structure into our address space.  */
-  if((r=data_copy(who_e, (vir_bytes) m_ptr->SIG_CTXT_PTR,
+  if((r=data_copy_vmcheck(who_e, (vir_bytes) m_ptr->SIG_CTXT_PTR,
        SYSTEM, (vir_bytes) &smsg, (phys_bytes) sizeof(struct sigmsg))) != OK)
        return r;
 
@@ -54,12 +49,9 @@ message *m_ptr;                      /* pointer to request message */
   sc.sc_flags = 0;     /* unused at this time */
   sc.sc_mask = smsg.sm_mask;
 
-  ph = umap_local(rp, D, (vir_bytes) scp, sizeof(struct sigcontext));
-  if(!ph) return EFAULT;
-  CHECKRANGE_OR_SUSPEND(rp, ph, sizeof(struct sigcontext), 1);
   /* Copy the sigcontext structure to the user's stack. */
-  if((r=data_copy(SYSTEM, (vir_bytes) &sc, m_ptr->SIG_ENDPT, (vir_bytes) scp,
-      (vir_bytes) sizeof(struct sigcontext))) != OK)
+  if((r=data_copy_vmcheck(SYSTEM, (vir_bytes) &sc, m_ptr->SIG_ENDPT,
+       (vir_bytes) scp, (vir_bytes) sizeof(struct sigcontext))) != OK)
       return r;
 
   /* Initialize the sigframe structure. */
@@ -73,11 +65,9 @@ message *m_ptr;                      /* pointer to request message */
   fr.sf_signo = smsg.sm_signo;
   fr.sf_retadr = (void (*)()) smsg.sm_sigreturn;
 
-  ph = umap_local(rp, D, (vir_bytes) frp, sizeof(struct sigframe));
-  if(!ph) return EFAULT;
-  CHECKRANGE_OR_SUSPEND(rp, ph, sizeof(struct sigframe), 1);
   /* Copy the sigframe structure to the user's stack. */
-  if((r=data_copy(SYSTEM, (vir_bytes) &fr, m_ptr->SIG_ENDPT, (vir_bytes) frp, 
+  if((r=data_copy_vmcheck(SYSTEM, (vir_bytes) &fr,
+       m_ptr->SIG_ENDPT, (vir_bytes) frp, 
       (vir_bytes) sizeof(struct sigframe))) != OK)
       return r;
 
index ff35a66214673b2cbc915c05d9e1dd252ddad9d7..69dc4f862c7984f271258d917862032b63a88fd9 100644 (file)
@@ -16,7 +16,6 @@
 PUBLIC int do_sysctl(m_ptr)
 register message *m_ptr;       /* pointer to request message */
 {
-  phys_bytes ph;
   vir_bytes len, buf;
   static char mybuf[DIAG_BUFSIZE];
   struct proc *caller, *target;
@@ -33,10 +32,7 @@ register message *m_ptr;     /* pointer to request message */
                        caller->p_endpoint, len);
                return EINVAL;
        }
-       if((ph=umap_local(caller, D, buf, len)) == 0)
-               return EFAULT;
-       CHECKRANGE_OR_SUSPEND(caller, ph, len, 1);
-       if((s=data_copy(who_e, buf, SYSTEM, (vir_bytes) mybuf, len)) != OK) {
+       if((s=data_copy_vmcheck(who_e, buf, SYSTEM, (vir_bytes) mybuf, len)) != OK) {
                kprintf("do_sysctl: diag for %d: len %d: copy failed: %d\n",
                        caller->p_endpoint, len, s);
                return s;
index 7c235ba460c1438f9ff21e59277da8d250eaf60c..39d59fc1f58444ca58c212c916c0554047b4b12b 100644 (file)
@@ -48,19 +48,15 @@ register message *m_ptr;    /* pointer to request message */
   case LOCAL_SEG:
       phys_addr = lin_addr = umap_local(targetpr, seg_index, offset, count); 
       if(!lin_addr) return EFAULT;
-      CHECKRANGE_OR_SUSPEND(targetpr, lin_addr, count, 1);
       naughty = 1;
       break;
   case REMOTE_SEG:
       phys_addr = lin_addr = umap_remote(targetpr, seg_index, offset, count); 
       if(!lin_addr) return EFAULT;
-      CHECKRANGE_OR_SUSPEND(targetpr, lin_addr, count, 1);
       naughty = 1;
       break;
-  case GRANT_SEG:
-      naughty = 1;
   case LOCAL_VM_SEG:
-    if(seg_index == MEM_GRANT || seg_type == GRANT_SEG) {
+    if(seg_index == MEM_GRANT) {
        vir_bytes newoffset;
        endpoint_t newep;
        int new_proc_nr;
@@ -93,7 +89,6 @@ register message *m_ptr;      /* pointer to request message */
        kprintf("SYSTEM:do_umap: umap_local failed\n");
        return EFAULT;
       }
-      CHECKRANGE_OR_SUSPEND(targetpr, lin_addr, count, 1);
       if(vm_lookup(targetpr, lin_addr, &phys_addr, NULL) != OK) {
        kprintf("SYSTEM:do_umap: vm_lookup failed\n");
        return EFAULT;
diff --git a/kernel/system/do_vm_setbuf.c b/kernel/system/do_vm_setbuf.c
deleted file mode 100644 (file)
index 484c435..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-/* The system call implemented in this file:
- *   m_type:   SYS_VM_SETBUF
- *
- * The parameters for this system call are:
- *    m4_l1:   Start of the buffer
- *    m4_l2:   Length of the buffer
- *    m4_l3:   End of main memory
- */
-#include "../system.h"
-
-#define VM_DEBUG 0             /* enable/ disable debug output */
-
-/*===========================================================================*
- *                             do_vm_setbuf                                 *
- *===========================================================================*/
-PUBLIC int do_vm_setbuf(m_ptr)
-message *m_ptr;                        /* pointer to request message */
-{
-       vm_base= m_ptr->m4_l1;
-       vm_size= m_ptr->m4_l2;
-       vm_mem_high= m_ptr->m4_l3;
-
-#if VM_DEBUG
-       kprintf("do_vm_setbuf: got 0x%x @ 0x%x for 0x%x\n",
-               vm_size, vm_base, vm_mem_high);
-#endif
-
-       return OK;
-}
index f522a20f8f7a35b5397f650c0c738e2bf4f0f7f6..03ed97c29566757648b21ff9f64670fb36f5b649 100644 (file)
@@ -21,12 +21,10 @@ register message *m_ptr;    /* pointer to request message */
 {
   int proc_nr, i;
   endpoint_t ep = m_ptr->SVMCTL_WHO;
-  struct proc *p, *rp;
+  struct proc *p, *rp, *target;
 
   if(ep == SELF) { ep = m_ptr->m_source; }
 
-  vm_init();
-
   if(!isokendpt(ep, &proc_nr)) {
        kprintf("do_vmctl: unexpected endpoint %d from VM\n", ep);
        return EINVAL;
@@ -42,14 +40,35 @@ register message *m_ptr;    /* pointer to request message */
                /* Send VM the information about the memory request.  */
                if(!(rp = vmrequest))
                        return ESRCH;
-               if(!RTS_ISSET(rp, VMREQUEST))
-                       minix_panic("do_vmctl: no VMREQUEST set", NO_NUM);
+               vmassert(RTS_ISSET(rp, VMREQUEST));
+
+#if 0
+               printf("kernel: vm request sent by: %s / %d about %d; 0x%lx-0x%lx, wr %d, stack: %s ",
+                       rp->p_name, rp->p_endpoint, rp->p_vmrequest.who,
+                       rp->p_vmrequest.start,
+                       rp->p_vmrequest.start + rp->p_vmrequest.length,
+                       rp->p_vmrequest.writeflag, rp->p_vmrequest.stacktrace);
+               printf("type %d\n", rp->p_vmrequest.type);
+#endif
+
+#if DEBUG_VMASSERT
+               okendpt(rp->p_vmrequest.who, &proc_nr);
+               target = proc_addr(proc_nr);
+#if 0
+               if(!RTS_ISSET(target, VMREQTARGET)) {
+                       printf("set stack: %s\n", rp->p_vmrequest.stacktrace);
+                       minix_panic("VMREQTARGET not set for target",
+                               NO_NUM);
+               }
+#endif
+#endif
 
                /* Reply with request fields. */
                m_ptr->SVMCTL_MRG_ADDR = (char *) rp->p_vmrequest.start;
                m_ptr->SVMCTL_MRG_LEN = rp->p_vmrequest.length;
                m_ptr->SVMCTL_MRG_WRITE = rp->p_vmrequest.writeflag;
                m_ptr->SVMCTL_MRG_EP = rp->p_vmrequest.who;
+               m_ptr->SVMCTL_MRG_REQUESTOR = (void *) rp->p_endpoint;
                rp->p_vmrequest.vmresult = VMSUSPEND;
 
                /* Remove from request chain. */
@@ -57,46 +76,61 @@ register message *m_ptr;    /* pointer to request message */
 
                return OK;
        case VMCTL_MEMREQ_REPLY:
-               if(!(rp = p->p_vmrequest.requestor))
-                       minix_panic("do_vmctl: no requestor set", ep);
-               p->p_vmrequest.requestor = NULL;
-               if(!RTS_ISSET(rp, VMREQUEST))
-                       minix_panic("do_vmctl: no VMREQUEST set", ep);
-               if(rp->p_vmrequest.vmresult != VMSUSPEND)
-                       minix_panic("do_vmctl: result not VMSUSPEND set",
-                               rp->p_vmrequest.vmresult);
-               rp->p_vmrequest.vmresult = m_ptr->SVMCTL_VALUE;
-               if(rp->p_vmrequest.vmresult == VMSUSPEND)
-                       minix_panic("VM returned VMSUSPEND?", NO_NUM);
-               if(rp->p_vmrequest.vmresult != OK)
+               vmassert(RTS_ISSET(p, VMREQUEST));
+               vmassert(p->p_vmrequest.vmresult == VMSUSPEND);
+               okendpt(p->p_vmrequest.who, &proc_nr);
+               target = proc_addr(proc_nr);
+               p->p_vmrequest.vmresult = m_ptr->SVMCTL_VALUE;
+               vmassert(p->p_vmrequest.vmresult != VMSUSPEND);
+               if(p->p_vmrequest.vmresult != OK)
                        kprintf("SYSTEM: VM replied %d to mem request\n",
-                               rp->p_vmrequest.vmresult);
+                               p->p_vmrequest.vmresult);
 
-               /* Put on restart chain. */
-               rp->p_vmrequest.nextrestart = vmrestart;
-               vmrestart = rp;
 
+#if 0
+               printf("memreq reply: vm request sent by: %s / %d about %d; 0x%lx-0x%lx, wr %d, stack: %s ",
+                       p->p_name, p->p_endpoint, p->p_vmrequest.who,
+                       p->p_vmrequest.start,
+                       p->p_vmrequest.start + p->p_vmrequest.length,
+                       p->p_vmrequest.writeflag, p->p_vmrequest.stacktrace);
+               printf("type %d\n", p->p_vmrequest.type);
+
+               vmassert(RTS_ISSET(target, VMREQTARGET));
+               RTS_LOCK_UNSET(target, VMREQTARGET);
+#endif
+
+               if(p->p_vmrequest.type == VMSTYPE_KERNELCALL) {
+                       /* Put on restart chain. */
+                       p->p_vmrequest.nextrestart = vmrestart;
+                       vmrestart = p;
+               } else if(p->p_vmrequest.type == VMSTYPE_DELIVERMSG) {
+                       vmassert(p->p_misc_flags & MF_DELIVERMSG);
+                       vmassert(p == target);
+                       vmassert(RTS_ISSET(p, VMREQUEST));
+                       RTS_LOCK_UNSET(p, VMREQUEST);
+               } else {
 #if DEBUG_VMASSERT
-               /* Sanity check. */
-               if(rp->p_vmrequest.vmresult == OK) {
-                       if(CHECKRANGE(p,
-                               rp->p_vmrequest.start,
-                               rp->p_vmrequest.length,
-                               rp->p_vmrequest.writeflag) != OK) {
-kprintf("SYSTEM: request %d:0x%lx-0x%lx, wrflag %d, failed\n",
-       rp->p_endpoint,
-       rp->p_vmrequest.start,  rp->p_vmrequest.start + rp->p_vmrequest.length,
-       rp->p_vmrequest.writeflag); 
-       
-                               minix_panic("SYSTEM: fail but VM said OK", NO_NUM);
-                       }
-               }
+                       printf("suspended with stack: %s\n",
+                               p->p_vmrequest.stacktrace);
 #endif
+                       minix_panic("strange request type",
+                               p->p_vmrequest.type);
+               }
+
                return OK;
-#if VM_KERN_NOPAGEZERO
-       case VMCTL_NOPAGEZERO:
+       case VMCTL_ENABLE_PAGING:
+               if(vm_running) 
+                       minix_panic("do_vmctl: paging already enabled", NO_NUM);
+               vm_init(p);
+               if(!vm_running)
+                       minix_panic("do_vmctl: paging enabling failed", NO_NUM);
+               vmassert(p->p_delivermsg_lin ==
+                 umap_local(p, D, p->p_delivermsg_vir, sizeof(message)));
+               if(newmap(p, (struct mem_map *) m_ptr->SVMCTL_VALUE) != OK)
+                       minix_panic("do_vmctl: newmap failed", NO_NUM);
+               FIXLINMSG(p);
+               vmassert(p->p_delivermsg_lin);
                return OK;
-#endif
   }
 
   /* Try architecture-specific vmctls. */
index 8904ab60da4b2e2023a1b1d68064df8f277f573f..50a957ab79e095484287b4094cefad00442b09fe 100644 (file)
@@ -46,10 +46,10 @@ message *m_ptr;                     /* pointer to request message */
    * VT_VIRTUAL and VT_PROF multiple times below.
    */
   if (m_ptr->VT_WHICH == VT_VIRTUAL) {
-      pt_flag = VIRT_TIMER;
+      pt_flag = MF_VIRT_TIMER;
       pt_left = &rp->p_virt_left;
   } else { /* VT_PROF */
-      pt_flag = PROF_TIMER;
+      pt_flag = MF_PROF_TIMER;
       pt_left = &rp->p_prof_left;
   }
 
@@ -101,15 +101,15 @@ struct proc *rp;                  /* pointer to the process */
    */
 
   /* Check if the virtual timer expired. If so, send a SIGVTALRM signal. */
-  if ((rp->p_misc_flags & VIRT_TIMER) && rp->p_virt_left <= 0) {
-      rp->p_misc_flags &= ~VIRT_TIMER;
+  if ((rp->p_misc_flags & MF_VIRT_TIMER) && rp->p_virt_left <= 0) {
+      rp->p_misc_flags &= ~MF_VIRT_TIMER;
       rp->p_virt_left = 0;
       cause_sig(rp->p_nr, SIGVTALRM);
   }
 
   /* Check if the profile timer expired. If so, send a SIGPROF signal. */
-  if ((rp->p_misc_flags & PROF_TIMER) && rp->p_prof_left <= 0) {
-      rp->p_misc_flags &= ~PROF_TIMER;
+  if ((rp->p_misc_flags & MF_PROF_TIMER) && rp->p_prof_left <= 0) {
+      rp->p_misc_flags &= ~MF_PROF_TIMER;
       rp->p_prof_left = 0;
       cause_sig(rp->p_nr, SIGPROF);
   }
index 01a0c0edb403a9fdd4d68871958c9cf33c91382f..96080198151ef951b574363c0039cf76d027a589 100755 (executable)
@@ -35,7 +35,7 @@
 
 /* Define stack sizes for the kernel tasks included in the system image. */
 #define NO_STACK       0
-#define SMALL_STACK    (256 * sizeof(char *))
+#define SMALL_STACK    (1024 * sizeof(char *))
 #define IDL_S  SMALL_STACK     /* 3 intr, 3 temps, 4 db for Intel */
 #define        HRD_S   NO_STACK        /* dummy task, uses kernel stack */
 #define        TSK_S   SMALL_STACK     /* system and clock task */
@@ -48,6 +48,7 @@ PUBLIC char *t_stack[TOT_STACK_SPACE / sizeof(char *)];
 #define IDL_F  (SYS_PROC | PREEMPTIBLE | BILLABLE)     /* idle task */
 #define TSK_F  (SYS_PROC)                              /* kernel tasks */
 #define SRV_F  (SYS_PROC | PREEMPTIBLE)                /* system services */
+#define VM_F   (SYS_PROC)                              /* vm  */
 #define USR_F  (BILLABLE | PREEMPTIBLE | PROC_FULLVM)  /* user processes */
 #define SVM_F  (SRV_F | PROC_FULLVM)                   /* servers with VM */
 
@@ -91,6 +92,7 @@ PRIVATE int
   ds_c[] = { SYS_ALL_CALLS },
   vm_c[] = { SYS_ALL_CALLS },
   drv_c[] = { DRV_C },
+  usr_c[] = { SYS_SYSCTL },
   tty_c[] = { DRV_C, SYS_PHYSCOPY, SYS_ABORT, SYS_IOPENABLE,
                SYS_READBIOS },
   mem_c[] = { DRV_C, SYS_PHYSCOPY, SYS_PHYSVCOPY, SYS_IOPENABLE };
@@ -115,16 +117,16 @@ PUBLIC struct boot_image image[] = {
 {CLOCK,clock_task,TSK_F,  8, TASK_Q, TSK_S, TSK_T,     0, no_c,"clock" },
 {SYSTEM, sys_task,TSK_F,  8, TASK_Q, TSK_S, TSK_T,     0, no_c,"system"},
 {HARDWARE,      0,TSK_F,  8, TASK_Q, HRD_S,     0,     0, no_c,"kernel"},
-{PM_PROC_NR,    0,SVM_F, 32,      4, 0,     SRV_T, SRV_M, c(pm_c),"pm"    },
-{FS_PROC_NR,    0,SVM_F, 32,      5, 0,     SRV_T, SRV_M, c(fs_c),"vfs"   },
+{PM_PROC_NR,    0,SRV_F, 32,      4, 0,     SRV_T, SRV_M, c(pm_c),"pm"    },
+{FS_PROC_NR,    0,SRV_F, 32,      5, 0,     SRV_T, SRV_M, c(fs_c),"vfs"   },
 {RS_PROC_NR,    0,SVM_F,  4,      4, 0,     SRV_T, SYS_M, c(rs_c),"rs"    },
 {MEM_PROC_NR,   0,SVM_F,  4,      3, 0,     SRV_T, SYS_M,c(mem_c),"memory"},
-{LOG_PROC_NR,   0,SVM_F,  4,      2, 0,     SRV_T, SYS_M,c(drv_c),"log"   },
+{LOG_PROC_NR,   0,SRV_F,  4,      2, 0,     SRV_T, SYS_M,c(drv_c),"log"   },
 {TTY_PROC_NR,   0,SVM_F,  4,      1, 0,     SRV_T, SYS_M,c(tty_c),"tty"   },
 {DS_PROC_NR,    0,SVM_F,  4,      4, 0,     SRV_T, SYS_M, c(ds_c),"ds"    },
 {MFS_PROC_NR,   0,SVM_F, 32,      5, 0,     SRV_T, SRV_M, c(fs_c),"mfs"   },
-{VM_PROC_NR,    0,SRV_F, 32,      2, 0,     SRV_T, SRV_M, c(vm_c),"vm"    },
-{INIT_PROC_NR,  0,USR_F,  8, USER_Q, 0,     USR_T, USR_M, no_c,"init"  },
+{VM_PROC_NR,    0,VM_F, 32,      2, 0,     SRV_T, SRV_M, c(vm_c),"vm"    },
+{INIT_PROC_NR,  0,USR_F,  8, USER_Q, 0,     USR_T, USR_M, c(usr_c),"init"  },
 };
 
 /* Verify the size of the system image table at compile time. Also verify that 
@@ -137,5 +139,3 @@ PUBLIC struct boot_image image[] = {
 extern int dummy[(NR_BOOT_PROCS==sizeof(image)/
        sizeof(struct boot_image))?1:-1];
 extern int dummy[(BITCHUNK_BITS > NR_BOOT_PROCS - 1) ? 1 : -1];
-
-PUBLIC endpoint_t ipc_stats_target= NONE;
index 9e0b615da9d57fb8f164f9fc5f18c1025b965e4b..6c3473f2bf65eea45e8d4967bfe83e0feacba631 100644 (file)
@@ -2,18 +2,19 @@
 #ifndef _VM_H
 #define _VM_H 1
 
-#define CHECKRANGE_OR_SUSPEND(pr, start, length, wr)  { int mr; \
-       if(vm_running && (mr=vm_checkrange(proc_addr(who_p), pr, start, length, wr, 0)) != OK) { \
-               return mr;                                       \
-       } }
+/* Pseudo error codes */
+#define VMSUSPEND       -996
+#define EFAULT_SRC     -995
+#define EFAULT_DST     -994
 
-#define CHECKRANGE(pr, start, length, wr)   \
-       vm_checkrange(proc_addr(who_p), pr, start, length, wr, 1)
+#define FIXLINMSG(prp) { prp->p_delivermsg_lin = umap_local(prp, D, prp->p_delivermsg_vir, sizeof(message)); }
 
-/* Pseudo error code indicating a process request has to be
- * restarted after an OK from VM.
- */
-#define VMSUSPEND       -996
+#define PHYS_COPY_CATCH(src, dst, size, a) {   \
+       vmassert(intr_disabled());              \
+       catch_pagefaults++;                     \
+       a = phys_copy(src, dst, size);          \
+       catch_pagefaults--;                     \
+       }
 
 #endif