]> Zhao Yanbai Git Server - minix.git/commitdiff
- pages that points to page directory values of all processes,
authorBen Gras <ben@minix3.org>
Mon, 21 Sep 2009 14:49:49 +0000 (14:49 +0000)
committerBen Gras <ben@minix3.org>
Mon, 21 Sep 2009 14:49:49 +0000 (14:49 +0000)
   shared with the kernel, mapped into kernel address space;
   kernel is notified of its location. kernel segment size is
   increased to make it fit.
 - map in kernel and other processes that don't have their
   own page table using single 4MB (global) mapping.
 - new sanity check facility: objects that are allocated with
   the slab allocator are, when running with sanity checking on,
   marked readonly until they are explicitly unlocked using the USE()
   macro.
 - another sanity check facility: collect all uses of memory and
   see if they don't overlap with (a) eachother and (b) free memory
 - own munmap() and munmap_text() functions.
 - exec() recovers from out-of-memory conditions properly now; this
   solves some weird exec() behaviour
 - chew off memory from the same side of the chunk as where we
   start scanning, solving some memory fragmentation issues
 - use avl trees for freelist and phys_ranges in regions
 - implement most useful part of munmap()
 - remap() stuff is GQ's for shared memory

36 files changed:
servers/vm/Makefile
servers/vm/addravl.c [new file with mode: 0644]
servers/vm/addravl.h [new file with mode: 0644]
servers/vm/alloc.c
servers/vm/break.c
servers/vm/cavl_if.h [new file with mode: 0755]
servers/vm/cavl_impl.h [new file with mode: 0755]
servers/vm/exec.c
servers/vm/exit.c
servers/vm/fork.c
servers/vm/glo.h
servers/vm/i386/arch_pagefaults.c
servers/vm/i386/arch_vmproc.h
servers/vm/i386/memory.h
servers/vm/i386/pagetable.c
servers/vm/i386/pagetable.h
servers/vm/i386/vm.c
servers/vm/main.c
servers/vm/mmap.c
servers/vm/pagefaults.c
servers/vm/pagerange.h [new file with mode: 0644]
servers/vm/physravl.c [new file with mode: 0644]
servers/vm/physravl.h [new file with mode: 0644]
servers/vm/proto.h
servers/vm/queryexit.c [new file with mode: 0644]
servers/vm/region.c
servers/vm/region.h
servers/vm/rs.c [new file with mode: 0644]
servers/vm/sanitycheck.h
servers/vm/signal.c
servers/vm/slaballoc.c
servers/vm/util.h
servers/vm/utility.c
servers/vm/vfs.c
servers/vm/vm.h
servers/vm/vmproc.h

index bf3c6bfe30d565604b572441e261baf9c45c9b12..e108cc8cc9a830ab1a1c8c284819beb3ad98a823 100644 (file)
@@ -4,7 +4,8 @@ SERVER = vm
 include /etc/make.conf
 
 OBJ = main.o alloc.o utility.o exec.o exit.o fork.o break.o \
-       signal.o vfs.o mmap.o slaballoc.o region.o pagefaults.o
+       signal.o vfs.o mmap.o slaballoc.o region.o pagefaults.o addravl.o \
+       physravl.o rs.o queryexit.o
 ARCHOBJ =  $(ARCH)/vm.o $(ARCH)/pagetable.o $(ARCH)/arch_pagefaults.o $(ARCH)/util.o 
 
 CPPFLAGS=-I../../kernel/arch/$(ARCH)/include -I$(ARCH)
@@ -13,7 +14,7 @@ CFLAGS = $(CPROFILE) $(CPPFLAGS)
 # build local binary
 
 all build install:     $(SERVER)
-       #install $(SERVER)
+       install -S 100k $(SERVER)
 
 $(SERVER):     $(OBJ) phony
        cd $(ARCH) && $(MAKE)
diff --git a/servers/vm/addravl.c b/servers/vm/addravl.c
new file mode 100644 (file)
index 0000000..72c66c8
--- /dev/null
@@ -0,0 +1,8 @@
+
+#include "sanitycheck.h"
+#include "pagerange.h"
+#include "addravl.h"
+#include "proto.h"
+#include "util.h"
+#include "cavl_impl.h"
+
diff --git a/servers/vm/addravl.h b/servers/vm/addravl.h
new file mode 100644 (file)
index 0000000..1024ae8
--- /dev/null
@@ -0,0 +1,24 @@
+
+#ifndef ADDRAVL
+#define ADDRAVL 1
+
+#define AVL_UNIQUE(id) addr_ ## id
+#define AVL_HANDLE pagerange_t *
+#define AVL_KEY phys_bytes
+#define AVL_MAX_DEPTH 30 /* good for 2 million nodes */
+#define AVL_NULL NULL
+#define AVL_GET_LESS(h, a) (h)->less
+#define AVL_GET_GREATER(h, a) (h)->greater
+#define AVL_SET_LESS(h1, h2) USE((h1), (h1)->less = h2;);
+#define AVL_SET_GREATER(h1, h2) USE((h1), (h1)->greater = h2;);
+#define AVL_GET_BALANCE_FACTOR(h) (h)->factor
+#define AVL_SET_BALANCE_FACTOR(h, f) USE((h), (h)->factor = f;);
+#define AVL_SET_ROOT(h, v) (h)->root = v;
+#define AVL_COMPARE_KEY_KEY(k1, k2) ((k1) > (k2) ? 1 : ((k1) < (k2) ? -1 : 0))
+#define AVL_COMPARE_KEY_NODE(k, h) AVL_COMPARE_KEY_KEY((k), (h)->addr)
+#define AVL_COMPARE_NODE_NODE(h1, h2) AVL_COMPARE_KEY_KEY((h1)->addr, (h2)->addr)
+#define AVL_INSIDE_STRUCT char pad[4];
+
+#include "cavl_if.h"
+
+#endif
index 7ab0fd48861aac04f34270db30633d69efd47db9..f9387a200f312e981dccb148f5842a4a65fa7989 100644 (file)
@@ -23,6 +23,8 @@
 #include <minix/const.h>
 #include <minix/sysutil.h>
 #include <minix/syslib.h>
+#include <minix/debug.h>
+#include <minix/bitmap.h>
 
 #include <sys/mman.h>
 
 #include "proto.h"
 #include "util.h"
 #include "glo.h"
+#include "pagerange.h"
+#include "addravl.h"
+#include "sanitycheck.h"
 
-/* Initially, no free pages are known. */
-PRIVATE phys_bytes free_pages_head = NO_MEM;   /* Physical address in bytes. */
+/* AVL tree of free pages. */
+addr_avl addravl;
 
 /* Used for sanity check. */
 PRIVATE phys_bytes mem_low, mem_high;
@@ -54,6 +59,8 @@ struct hole {
        int holelist;
 };
 
+static int startpages;
+
 #define NIL_HOLE (struct hole *) 0
 
 #define _NR_HOLES (_NR_PROCS*2)  /* No. of memory holes maintained by VM */
@@ -71,6 +78,11 @@ FORWARD _PROTOTYPE( phys_bytes alloc_pages, (int pages, int flags)       );
 #if SANITYCHECKS
 FORWARD _PROTOTYPE( void holes_sanity_f, (char *fn, int line)              );
 #define CHECKHOLES holes_sanity_f(__FILE__, __LINE__)
+
+#define MAXPAGES (1024*1024*1024/VM_PAGE_SIZE) /* 1GB of memory */
+#define CHUNKS BITMAP_CHUNKS(MAXPAGES)
+PRIVATE bitchunk_t pagemap[CHUNKS];
+
 #else
 #define CHECKHOLES 
 #endif
@@ -102,26 +114,6 @@ FORWARD _PROTOTYPE( void holes_sanity_f, (char *fn, int line)                  );
 }
 
 
-void availbytes(vir_bytes *bytes, vir_bytes *chunks)
-{
-       phys_bytes p, nextp;
-       *bytes = 0;
-       *chunks = 0;
-       for(p = free_pages_head; p != NO_MEM; p = nextp) {
-               phys_bytes thissize, ret;
-               GET_PARAMS(p, thissize, nextp);
-               (*bytes) += thissize;
-               (*chunks)++;
-               if(nextp != NO_MEM)     {
-                       vm_assert(nextp > p);
-                       vm_assert(nextp > p + thissize);
-               }
-       }
-
-       return;
-}
-
-
 #if SANITYCHECKS
 
 /*===========================================================================*
@@ -400,6 +392,7 @@ struct memory *chunks;              /* list of free memory chunks */
  */
   int i, first = 0;
   register struct hole *hp;
+  int nodes, largest;
 
   /* Put all holes on the free list. */
   for (hp = &hole[0]; hp < &hole[_NR_HOLES]; hp++) {
@@ -410,6 +403,8 @@ struct memory *chunks;              /* list of free memory chunks */
   hole_head = NIL_HOLE;
   free_slots = &hole[0];
 
+  addr_init(&addravl);
+
   /* Use the chunks of physical memory to allocate holes. */
   for (i=NR_MEMS-1; i>=0; i--) {
        if (chunks[i].size > 0) {
@@ -422,217 +417,226 @@ struct memory *chunks;          /* list of free memory chunks */
        }
   }
 
+  memstats(&nodes, &startpages, &largest);
+
+  printf("VM: %d nodes, %d pages, largest chunk %d\n",
+       nodes, startpages, largest);
+
   CHECKHOLES;
 }
 
+#if SANITYCHECKS
+PRIVATE void sanitycheck(void)
+{
+       pagerange_t *p, *prevp = NULL;
+       addr_iter iter;
+       addr_start_iter_least(&addravl, &iter);
+       while((p=addr_get_iter(&iter))) {
+               SLABSANE(p);
+               vm_assert(p->size > 0);
+               if(prevp) {
+                       vm_assert(prevp->addr < p->addr);
+                       vm_assert(prevp->addr + p->addr < p->addr);
+               }
+               addr_incr_iter(&iter);
+       }
+}
+#endif
+
+PUBLIC void memstats(int *nodes, int *pages, int *largest)
+{
+       pagerange_t *p, *prevp = NULL;
+       addr_iter iter;
+       addr_start_iter_least(&addravl, &iter);
+       *nodes = 0;
+       *pages = 0;
+       *largest = 0;
+#if SANITYCHECKS
+       sanitycheck();
+#endif
+       while((p=addr_get_iter(&iter))) {
+               SLABSANE(p);
+               (*nodes)++;
+               (*pages)+= p->size;
+               if(p->size > *largest)
+                       *largest = p->size;
+               addr_incr_iter(&iter);
+       }
+}
+
 /*===========================================================================*
  *                             alloc_pages                                  *
  *===========================================================================*/
 PRIVATE PUBLIC phys_bytes alloc_pages(int pages, int memflags)
 {
-       phys_bytes bytes, p, nextp, prevp = NO_MEM;
-       phys_bytes prevsize = 0;
-
+       addr_iter iter;
+       pagerange_t *pr;
+       int incr;
+       phys_bytes boundary16 = 16 * 1024 * 1024 / VM_PAGE_SIZE;
+       phys_bytes boundary1  =  1 * 1024 * 1024 / VM_PAGE_SIZE;
+       phys_bytes mem;
 #if SANITYCHECKS
-       vir_bytes avail1, avail2, chunks1, chunks2;
-       availbytes(&avail1, &chunks1);
+       int firstnodes, firstpages, wantnodes, wantpages;
+       int finalnodes, finalpages;
+       int largest;
+
+       memstats(&firstnodes, &firstpages, &largest);
+       sanitycheck();
+       wantnodes = firstnodes;
+       wantpages = firstpages - pages;
 #endif
 
-       vm_assert(pages > 0);
-       bytes = CLICK2ABS(pages);
-       vm_assert(ABS2CLICK(bytes) == pages);
+       if(memflags & (PAF_LOWER16MB|PAF_LOWER1MB)) {
+               addr_start_iter_least(&addravl, &iter);
+               incr = 1;
+       } else {
+               addr_start_iter_greatest(&addravl, &iter);
+               incr = 0;
+       }
+
+       while((pr = addr_get_iter(&iter))) {
+               SLABSANE(pr);
+               if(pr->size >= pages) {
+                       if(memflags & PAF_LOWER16MB) {
+                               if(pr->addr + pages > boundary16)
+                                       return NO_MEM;
+                       }
+
+                       if(memflags & PAF_LOWER1MB) {
+                               if(pr->addr + pages > boundary1)
+                                       return NO_MEM;
+                       }
+
+                       /* good block found! */
+                       break;
+               }
+               if(incr)
+                       addr_incr_iter(&iter);
+               else
+                       addr_decr_iter(&iter);
+       }
 
+       if(!pr) {
+               printf("VM: alloc_pages: alloc failed of %d pages\n", pages);
+               util_stacktrace();
+               printmemstats();
 #if SANITYCHECKS
-#define ALLOCRETURNCHECK                       \
-       availbytes(&avail2, &chunks2);          \
-       vm_assert(avail1 - bytes == avail2);    \
-       vm_assert(chunks1 == chunks2 || chunks1-1 == chunks2);
-#else
-#define ALLOCRETURNCHECK
+               if(largest >= pages) {
+                       vm_panic("no memory but largest was enough", NO_NUM);
+               }
 #endif
+               return NO_MEM;
+       }
 
+       SLABSANE(pr);
 
-       for(p = free_pages_head; p != NO_MEM; p = nextp) {
-               phys_bytes thissize, ret;
-               GET_PARAMS(p, thissize, nextp);
-               if(thissize >= bytes) {
-                       /* We found a chunk that's big enough. */
-
-                       ret = p + thissize - bytes;
-                       thissize -= bytes;
-
-                       if(thissize == 0) {
-                               /* Special case: remove this link entirely. */
-                               if(prevp == NO_MEM)
-                                       free_pages_head = nextp;
-                               else {
-                                       vm_assert(prevsize > 0);
-                                       SET_PARAMS(prevp, prevsize, nextp);
-                               }
-                       } else {
-                               /* Remove memory from this chunk. */
-                               SET_PARAMS(p, thissize, nextp);
-                       }
+       /* Allocated chunk is off the end. */
+       mem = pr->addr + pr->size - pages;
 
-                       /* Clear memory if requested. */
-                       if(memflags & PAF_CLEAR) {
-                         int s;
-                         if ((s= sys_memset(0, ret, bytes)) != OK)   {
-                               vm_panic("alloc_pages: sys_memset failed", s);
-                         }
-                       }
+       vm_assert(pr->size >= pages);
+       if(pr->size == pages) {
+               pagerange_t *prr;
+               prr = addr_remove(&addravl, pr->addr);
+               vm_assert(prr);
+               vm_assert(prr == pr);
+               SLABFREE(pr);
+#if SANITYCHECKS
+               wantnodes--;
+#endif
+       } else {
+               USE(pr, pr->size -= pages;);
+       }
 
-                       /* Check if returned range is actual good memory. */
-                       vm_assert_range(ret, bytes);
+       if(memflags & PAF_CLEAR) {
+               int s;
+               if ((s= sys_memset(0, CLICK_SIZE*mem,
+                       VM_PAGE_SIZE*pages)) != OK) 
+                       vm_panic("alloc_mem: sys_memset failed", s);
+       }
 
-                       ALLOCRETURNCHECK;
+#if SANITYCHECKS
+       memstats(&finalnodes, &finalpages, &largest);
+       sanitycheck();
 
-                       /* Return it in clicks. */
-                       return ABS2CLICK(ret);
-               }
-               prevp = p;
-               prevsize = thissize;
-       }
-       return NO_MEM;
+       vm_assert(finalnodes == wantnodes);
+       vm_assert(finalpages == wantpages);
+#endif
+
+       return mem;
 }
 
 /*===========================================================================*
  *                             free_pages                                   *
  *===========================================================================*/
-PRIVATE PUBLIC void free_pages(phys_bytes pageno, int npages)
+PRIVATE void free_pages(phys_bytes pageno, int npages)
 {
-       phys_bytes p, origsize,
-               size, nextaddr, thissize, prevp = NO_MEM, pageaddr;
-
+       pagerange_t *pr, *p;
+       addr_iter iter;
 #if SANITYCHECKS
-       vir_bytes avail1, avail2, chunks1, chunks2;
-       availbytes(&avail1, &chunks1);
-#endif
+       int firstnodes, firstpages, wantnodes, wantpages;
+       int finalnodes, finalpages, largest;
 
-#if SANITYCHECKS
-#define FREERETURNCHECK                                                                \
-       availbytes(&avail2, &chunks2);                                  \
-       vm_assert(avail1 + origsize  == avail2);                        \
-       vm_assert(chunks1 == chunks2 || chunks1+1 == chunks2 || chunks1-1 == chunks2);
-#else
-#define FREERETURNCHECK
+       memstats(&firstnodes, &firstpages, &largest);
+       sanitycheck();
+
+       wantnodes = firstnodes;
+       wantpages = firstpages + npages;
 #endif
 
-       /* Basic sanity check. */
-       vm_assert(npages > 0);
-       vm_assert(pageno != NO_MEM);    /* Page number must be reasonable. */
-
-       /* Convert page and pages to bytes. */
-       pageaddr = CLICK2ABS(pageno);
-       origsize = size = npages * VM_PAGE_SIZE;        /* Size in bytes. */
-       vm_assert(pageaddr != NO_MEM);
-       vm_assert(ABS2CLICK(pageaddr) == pageno);
-       vm_assert_range(pageaddr, size);
-
-       /* More sanity checks. */
-       vm_assert(ABS2CLICK(size) == npages);   /* Sanity. */
-       vm_assert(pageaddr + size > pageaddr);          /* Must not overflow. */
-
-       /* Special case: no free pages. */
-       if(free_pages_head == NO_MEM) {
-               free_pages_head = pageaddr;
-               SET_PARAMS(pageaddr, size, NO_MEM);
-               FREERETURNCHECK;
-               return;
-       }
+       vm_assert(!addr_search(&addravl, pageno, AVL_EQUAL));
 
-       /* Special case: the free block is before the current head. */
-       if(pageaddr < free_pages_head) {
-               phys_bytes newsize, newnext, headsize, headnext;
-               vm_assert(pageaddr + size <= free_pages_head);
-               GET_PARAMS(free_pages_head, headsize, headnext);
-               newsize = size;
-               if(pageaddr + size == free_pages_head) {
-                       /* Special case: contiguous. */
-                       newsize += headsize;
-                       newnext = headnext;
-               } else {
-                       newnext = free_pages_head;
-               }
-               SET_PARAMS(pageaddr, newsize, newnext);
-               free_pages_head = pageaddr;
-               FREERETURNCHECK;
-               return;
-       }
+       /* try to merge with higher neighbour */
+       if((pr=addr_search(&addravl, pageno+npages, AVL_EQUAL))) {
+               USE(pr, pr->addr -= npages;
+                       pr->size += npages;);
+       } else {
+               if(!SLABALLOC(pr))
+                       vm_panic("alloc_pages: can't alloc", NO_NUM);
+#if SANITYCHECKS
+               memstats(&firstnodes, &firstpages, &largest);
 
-       /* Find where to put the block in the free list. */
-       for(p = free_pages_head; p < pageaddr; p = nextaddr) {
-               GET_PARAMS(p, thissize, nextaddr);
-
-               if(nextaddr == NO_MEM) {
-                       /* Special case: page is at the end of the list. */
-                       if(p + thissize == pageaddr) {
-                               /* Special case: contiguous. */
-                               SET_PARAMS(p, thissize + size, NO_MEM);
-                               FREERETURNCHECK;
-                       } else {
-                               SET_PARAMS(p, thissize, pageaddr);
-                               SET_PARAMS(pageaddr, size, NO_MEM);
-                               FREERETURNCHECK;
-                       }
-                       return;
-               }
+               wantnodes = firstnodes;
+               wantpages = firstpages + npages;
 
-               prevp = p;
+               sanitycheck();
+#endif
+               vm_assert(npages > 0);
+               USE(pr, pr->addr = pageno;
+                        pr->size = npages;);
+               addr_insert(&addravl, pr);
+#if SANITYCHECKS
+               wantnodes++;
+#endif
        }
 
-       /* Normal case: insert page block between two others.
-        * The first block starts at 'prevp' and is 'thissize'.
-        * The second block starts at 'p' and is 'nextsize'.
-        * The block that has to come in between starts at
-        * 'pageaddr' and is size 'size'.
-        */
-       vm_assert(p != NO_MEM);
-       vm_assert(prevp != NO_MEM);
-       vm_assert(prevp < p);
-       vm_assert(p == nextaddr);
-
+       addr_start_iter(&addravl, &iter, pr->addr, AVL_EQUAL);
+       p = addr_get_iter(&iter);
+       vm_assert(p);
+       vm_assert(p == pr);
+
+       addr_decr_iter(&iter);
+       if((p = addr_get_iter(&iter))) {
+               SLABSANE(p);
+               if(p->addr + p->size == pr->addr) {
+                       USE(p, p->size += pr->size;);
+                       addr_remove(&addravl, pr->addr);
+                       SLABFREE(pr);
 #if SANITYCHECKS
-  {
-       vir_bytes prevpsize, prevpnext;
-       GET_PARAMS(prevp, prevpsize, prevpnext);
-       vm_assert(prevpsize == thissize);
-       vm_assert(prevpnext == p);
-
-       availbytes(&avail2, &chunks2);
-       vm_assert(avail1 == avail2);
-  }
+                       wantnodes--;
 #endif
-
-       if(prevp + thissize == pageaddr) {
-               /* Special case: first block is contiguous with freed one. */
-               phys_bytes newsize = thissize + size;
-               SET_PARAMS(prevp, newsize, p);
-               pageaddr = prevp;
-               size = newsize;
-       } else {
-               SET_PARAMS(prevp, thissize, pageaddr);
+               }
        }
 
-       /* The block has been inserted (and possibly merged with the
-        * first one). Check if it has to be merged with the second one.
-        */
 
-       if(pageaddr + size == p) {
-               phys_bytes nextsize, nextnextaddr;
-               /* Special case: freed block is contiguous with next one. */
-               GET_PARAMS(p, nextsize, nextnextaddr);
-               SET_PARAMS(pageaddr, size+nextsize, nextnextaddr);
-               FREERETURNCHECK;
-       } else {
-               SET_PARAMS(pageaddr, size, p);
-               FREERETURNCHECK;
-       }
+#if SANITYCHECKS
+       memstats(&finalnodes, &finalpages,  &largest);
+       sanitycheck();
 
-       return;
+       vm_assert(finalnodes == wantnodes);
+       vm_assert(finalpages == wantpages);
+#endif
 }
 
-
 #define NR_DMA 16
 
 PRIVATE struct dmatab
@@ -850,3 +854,65 @@ PUBLIC int do_allocmem(message *m)
        return OK;
 }
 
+/*===========================================================================*
+ *                             do_allocmem                                  *
+ *===========================================================================*/
+void printmemstats(void)
+{
+       int nodes, pages, largest;
+        memstats(&nodes, &pages, &largest);
+        printf("%d blocks, %d pages (%ukB) free, largest %d pages (%ukB)\n",
+                nodes, pages, (u32_t) pages * (VM_PAGE_SIZE/1024),
+               largest, (u32_t) largest * (VM_PAGE_SIZE/1024));
+}
+
+
+#if SANITYCHECKS
+
+/*===========================================================================*
+ *                             usedpages_reset                              *
+ *===========================================================================*/
+void usedpages_reset(void)
+{
+       memset(pagemap, 0, sizeof(pagemap));
+}
+
+/*===========================================================================*
+ *                             usedpages_add                                *
+ *===========================================================================*/
+int usedpages_add_f(phys_bytes addr, phys_bytes len, char *file, int line)
+{
+       pagerange_t *pr;
+       u32_t pagestart, pages;
+
+       if(!incheck)
+               return OK;
+
+       vm_assert(!(addr % VM_PAGE_SIZE));
+       vm_assert(!(len % VM_PAGE_SIZE));
+       vm_assert(len > 0);
+       vm_assert_range(addr, len);
+
+       pagestart = addr / VM_PAGE_SIZE;
+       pages = len / VM_PAGE_SIZE;
+
+       while(pages > 0) {
+               phys_bytes thisaddr;
+               vm_assert(pagestart > 0);
+               vm_assert(pagestart < MAXPAGES);
+               thisaddr = pagestart * VM_PAGE_SIZE;
+               if(GET_BIT(pagemap, pagestart)) {
+                       int i;
+                       printf("%s:%d: usedpages_add: addr 0x%lx reused.\n",
+                               file, line, thisaddr);
+                       return EFAULT;
+               }
+               SET_BIT(pagemap, pagestart);
+               pages--;
+               pagestart++;
+       }
+
+       return OK;
+}
+
+#endif
index d392096c492b9300e5f80102aaba10b528db9b59..f2fadb50468a564371ae84e1b59f482d22ff10ae 100644 (file)
@@ -28,6 +28,7 @@
 #include <minix/ipc.h>
 #include <minix/sysutil.h>
 #include <minix/syslib.h>
+#include <minix/bitmap.h>
 
 #include <errno.h>
 #include <env.h>
diff --git a/servers/vm/cavl_if.h b/servers/vm/cavl_if.h
new file mode 100755 (executable)
index 0000000..a2df083
--- /dev/null
@@ -0,0 +1,216 @@
+/* Abstract AVL Tree Generic C Package.
+** Interface generation header file.
+**
+** This code is in the public domain.  See cavl_tree.html for interface
+** documentation.
+**
+** Version: 1.5  Author: Walt Karas
+*/
+
+/* This header contains the definition of CHAR_BIT (number of bits in a
+** char). */
+#include <limits.h>
+
+#undef L__
+#undef L__EST_LONG_BIT
+#undef L__SIZE
+#undef L__SC
+#undef L__LONG_BIT
+#undef L__BIT_ARR_DEFN
+
+#ifndef AVL_SEARCH_TYPE_DEFINED_
+#define AVL_SEARCH_TYPE_DEFINED_
+
+typedef enum
+  {
+    AVL_EQUAL = 1,
+    AVL_LESS = 2,
+    AVL_GREATER = 4,
+    AVL_LESS_EQUAL = AVL_EQUAL | AVL_LESS,
+    AVL_GREATER_EQUAL = AVL_EQUAL | AVL_GREATER
+  }
+avl_search_type;
+
+#endif
+
+#ifdef AVL_UNIQUE
+
+#define L__ AVL_UNIQUE
+
+#else
+
+#define L__(X) X
+
+#endif
+
+/* Determine storage class for function prototypes. */
+#ifdef AVL_PRIVATE
+
+#define L__SC static
+
+#else
+
+#define L__SC extern
+
+#endif
+
+#ifdef AVL_SIZE
+
+#define L__SIZE AVL_SIZE
+
+#else
+
+#define L__SIZE unsigned long
+
+#endif
+
+typedef struct
+  {
+    #ifdef AVL_INSIDE_STRUCT
+
+    AVL_INSIDE_STRUCT
+
+    #endif
+
+    AVL_HANDLE root;
+  }
+L__(avl);
+
+/* Function prototypes. */
+
+L__SC void L__(init)(L__(avl) *tree);
+
+L__SC int L__(is_empty)(L__(avl) *tree);
+
+L__SC AVL_HANDLE L__(insert)(L__(avl) *tree, AVL_HANDLE h);
+
+L__SC AVL_HANDLE L__(search)(L__(avl) *tree, AVL_KEY k, avl_search_type st);
+
+L__SC AVL_HANDLE L__(search_least)(L__(avl) *tree);
+
+L__SC AVL_HANDLE L__(search_greatest)(L__(avl) *tree);
+
+L__SC AVL_HANDLE L__(remove)(L__(avl) *tree, AVL_KEY k);
+
+L__SC AVL_HANDLE L__(subst)(L__(avl) *tree, AVL_HANDLE new_node);
+
+#ifdef AVL_BUILD_ITER_TYPE
+
+L__SC int L__(build)(
+  L__(avl) *tree, AVL_BUILD_ITER_TYPE p, L__SIZE num_nodes);
+
+#endif
+
+/* ANSI C/ISO C++ require that a long have at least 32 bits.  Set
+** L__EST_LONG_BIT to be the greatest multiple of 8 in the range
+** 32 - 64 (inclusive) that is less than or equal to the number of
+** bits in a long.
+*/
+
+#if (((LONG_MAX >> 31) >> 7) == 0)
+
+#define L__EST_LONG_BIT 32
+
+#elif (((LONG_MAX >> 31) >> 15) == 0)
+
+#define L__EST_LONG_BIT 40
+
+#elif (((LONG_MAX >> 31) >> 23) == 0)
+
+#define L__EST_LONG_BIT 48
+
+#elif (((LONG_MAX >> 31) >> 31) == 0)
+
+#define L__EST_LONG_BIT 56
+
+#else
+
+#define L__EST_LONG_BIT 64
+
+#endif
+
+/* Number of bits in a long. */
+#define L__LONG_BIT (sizeof(long) * CHAR_BIT)
+
+/* The macro L__BIT_ARR_DEFN defines a bit array whose index is a (0-based)
+** node depth.  The definition depends on whether the maximum depth is more
+** or less than the number of bits in a single long.
+*/
+
+#if ((AVL_MAX_DEPTH) > L__EST_LONG_BIT)
+
+/* Maximum depth may be more than number of bits in a long. */
+
+#define L__BIT_ARR_DEFN(NAME) \
+  unsigned long NAME[((AVL_MAX_DEPTH) + L__LONG_BIT - 1) / L__LONG_BIT];
+
+#else
+
+/* Maximum depth is definitely less than number of bits in a long. */
+
+#define L__BIT_ARR_DEFN(NAME) unsigned long NAME;
+
+#endif
+
+/* Iterator structure. */
+typedef struct
+  {
+    /* Tree being iterated over. */
+    L__(avl) *tree_;
+
+    /* Records a path into the tree.  If bit n is true, indicates
+    ** take greater branch from the nth node in the path, otherwise
+    ** take the less branch.  bit 0 gives branch from root, and
+    ** so on. */
+    L__BIT_ARR_DEFN(branch)
+
+    /* Zero-based depth of path into tree. */
+    unsigned depth;
+
+    /* Handles of nodes in path from root to current node (returned by *). */
+    AVL_HANDLE path_h[(AVL_MAX_DEPTH) - 1];
+  }
+L__(iter);
+
+/* Iterator function prototypes. */
+
+L__SC void L__(start_iter)(
+  L__(avl) *tree, L__(iter) *iter, AVL_KEY k, avl_search_type st);
+
+L__SC void L__(start_iter_least)(L__(avl) *tree, L__(iter) *iter);
+
+L__SC void L__(start_iter_greatest)(L__(avl) *tree, L__(iter) *iter);
+
+L__SC AVL_HANDLE L__(get_iter)(L__(iter) *iter);
+
+L__SC void L__(incr_iter)(L__(iter) *iter);
+
+L__SC void L__(decr_iter)(L__(iter) *iter);
+
+L__SC void L__(init_iter)(L__(iter) *iter);
+
+#define AVL_IMPL_INIT                  1
+#define AVL_IMPL_IS_EMPTY              (1 << 1)
+#define AVL_IMPL_INSERT                        (1 << 2)
+#define AVL_IMPL_SEARCH                        (1 << 3)
+#define AVL_IMPL_SEARCH_LEAST          (1 << 4)
+#define AVL_IMPL_SEARCH_GREATEST       (1 << 5)
+#define AVL_IMPL_REMOVE                        (1 << 6)
+#define AVL_IMPL_BUILD                 (1 << 7)
+#define AVL_IMPL_START_ITER            (1 << 8)
+#define AVL_IMPL_START_ITER_LEAST      (1 << 9)
+#define AVL_IMPL_START_ITER_GREATEST   (1 << 10)
+#define AVL_IMPL_GET_ITER              (1 << 11)
+#define AVL_IMPL_INCR_ITER             (1 << 12)
+#define AVL_IMPL_DECR_ITER             (1 << 13)
+#define AVL_IMPL_INIT_ITER             (1 << 14)
+#define AVL_IMPL_SUBST                 (1 << 15)
+
+#define AVL_IMPL_ALL                   (~0)
+
+#undef L__
+#undef L__EST_LONG_BIT
+#undef L__SIZE
+#undef L__SC
+#undef L__LONG_BIT
+#undef L__BIT_ARR_DEFN
diff --git a/servers/vm/cavl_impl.h b/servers/vm/cavl_impl.h
new file mode 100755 (executable)
index 0000000..ccf2e21
--- /dev/null
@@ -0,0 +1,1187 @@
+/* Abstract AVL Tree Generic C Package.
+** Implementation generation header file.
+**
+** This code is in the public domain.  See cavl_tree.html for interface
+** documentation.
+**
+** Version: 1.5  Author: Walt Karas
+*/
+
+#undef L__
+#undef L__EST_LONG_BIT
+#undef L__SIZE
+#undef L__tree
+#undef L__MASK_HIGH_BIT
+#undef L__LONG_BIT
+#undef L__BIT_ARR_DEFN
+#undef L__BIT_ARR_VAL
+#undef L__BIT_ARR_0
+#undef L__BIT_ARR_1
+#undef L__BIT_ARR_ALL
+#undef L__BIT_ARR_LONGS
+#undef L__IMPL_MASK
+#undef L__CHECK_READ_ERROR
+#undef L__CHECK_READ_ERROR_INV_DEPTH
+#undef L__SC
+#undef L__BALANCE_PARAM_PREFIX
+
+#ifdef AVL_UNIQUE
+
+#define L__ AVL_UNIQUE
+
+#else
+
+#define L__(X) X
+
+#endif
+
+/* Determine correct storage class for functions */
+#ifdef AVL_PRIVATE
+
+#define L__SC static
+
+#else
+
+#define L__SC
+
+#endif
+
+#ifdef AVL_SIZE
+
+#define L__SIZE AVL_SIZE
+
+#else
+
+#define L__SIZE unsigned long
+
+#endif
+
+#define L__MASK_HIGH_BIT ((int) ~ ((~ (unsigned) 0) >> 1))
+
+/* ANSI C/ISO C++ require that a long have at least 32 bits.  Set
+** L__EST_LONG_BIT to be the greatest multiple of 8 in the range
+** 32 - 64 (inclusive) that is less than or equal to the number of
+** bits in a long.
+*/
+
+#if (((LONG_MAX >> 31) >> 7) == 0)
+
+#define L__EST_LONG_BIT 32
+
+#elif (((LONG_MAX >> 31) >> 15) == 0)
+
+#define L__EST_LONG_BIT 40
+
+#elif (((LONG_MAX >> 31) >> 23) == 0)
+
+#define L__EST_LONG_BIT 48
+
+#elif (((LONG_MAX >> 31) >> 31) == 0)
+
+#define L__EST_LONG_BIT 56
+
+#else
+
+#define L__EST_LONG_BIT 64
+
+#endif
+
+#define L__LONG_BIT (sizeof(long) * CHAR_BIT)
+
+#if ((AVL_MAX_DEPTH) > L__EST_LONG_BIT)
+
+/* The maximum depth may be greater than the number of bits in a long,
+** so multiple longs are needed to hold a bit array indexed by node
+** depth. */
+
+#define L__BIT_ARR_LONGS (((AVL_MAX_DEPTH) + L__LONG_BIT - 1) / L__LONG_BIT)
+
+#define L__BIT_ARR_DEFN(NAME) unsigned long NAME[L__BIT_ARR_LONGS];
+
+#define L__BIT_ARR_VAL(BIT_ARR, BIT_NUM) \
+  ((BIT_ARR)[(BIT_NUM) / L__LONG_BIT] & (1L << ((BIT_NUM) % L__LONG_BIT)))
+
+#define L__BIT_ARR_0(BIT_ARR, BIT_NUM) \
+  (BIT_ARR)[(BIT_NUM) / L__LONG_BIT] &= ~(1L << ((BIT_NUM) % L__LONG_BIT));
+
+#define L__BIT_ARR_1(BIT_ARR, BIT_NUM) \
+  (BIT_ARR)[(BIT_NUM) / L__LONG_BIT] |= 1L << ((BIT_NUM) % L__LONG_BIT);
+
+#define L__BIT_ARR_ALL(BIT_ARR, BIT_VAL) \
+  { int i = L__BIT_ARR_LONGS; do (BIT_ARR)[--i] = 0L - (BIT_VAL); while(i); }
+
+#else /* The bit array can definitely fit in one long */
+
+#define L__BIT_ARR_DEFN(NAME) unsigned long NAME;
+
+#define L__BIT_ARR_VAL(BIT_ARR, BIT_NUM) ((BIT_ARR) & (1L << (BIT_NUM)))
+
+#define L__BIT_ARR_0(BIT_ARR, BIT_NUM) (BIT_ARR) &= ~(1L << (BIT_NUM));
+
+#define L__BIT_ARR_1(BIT_ARR, BIT_NUM) (BIT_ARR) |= 1L << (BIT_NUM);
+
+#define L__BIT_ARR_ALL(BIT_ARR, BIT_VAL) (BIT_ARR) = 0L - (BIT_VAL);
+
+#endif
+
+#ifdef AVL_READ_ERRORS_HAPPEN
+
+#define L__CHECK_READ_ERROR(ERROR_RETURN) \
+{ if (AVL_READ_ERROR) return(ERROR_RETURN); }
+
+#else
+
+#define L__CHECK_READ_ERROR(ERROR_RETURN)
+
+#endif
+
+/* The presumed reason that an instantiation places additional fields
+** inside the AVL tree structure is that the SET_ and GET_ macros
+** need these fields.  The "balance" function does not explicitly use
+** any fields in the AVL tree structure, so only pass an AVL tree
+** structure pointer to "balance" if it has instantiation-specific
+** fields that are (presumably) needed by the SET_/GET_ calls within
+** "balance".
+*/
+#ifdef AVL_INSIDE_STRUCT
+
+#define L__BALANCE_PARAM_CALL_PREFIX L__tree,
+#define L__BALANCE_PARAM_DECL_PREFIX L__(avl) *L__tree,
+
+#else
+
+#define L__BALANCE_PARAM_CALL_PREFIX
+#define L__BALANCE_PARAM_DECL_PREFIX
+
+#endif
+
+#ifdef AVL_IMPL_MASK
+
+#define L__IMPL_MASK (AVL_IMPL_MASK)
+
+#else
+
+/* Define all functions. */
+#define L__IMPL_MASK AVL_IMPL_ALL
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_INIT)
+
+L__SC void L__(init)(L__(avl) *L__tree) { AVL_SET_ROOT(L__tree, AVL_NULL); }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_IS_EMPTY)
+
+L__SC int L__(is_empty)(L__(avl) *L__tree)
+  { return(L__tree->root == AVL_NULL); }
+
+#endif
+
+/* Put the private balance function in the same compilation module as
+** the insert function.  */
+#if (L__IMPL_MASK & AVL_IMPL_INSERT)
+
+/* Balances subtree, returns handle of root node of subtree after balancing.
+*/
+L__SC AVL_HANDLE L__(balance)(L__BALANCE_PARAM_DECL_PREFIX AVL_HANDLE bal_h)
+  {
+    AVL_HANDLE deep_h;
+
+    /* Either the "greater than" or the "less than" subtree of
+    ** this node has to be 2 levels deeper (or else it wouldn't
+    ** need balancing).
+    */
+    if (AVL_GET_BALANCE_FACTOR(bal_h) > 0)
+      {
+       /* "Greater than" subtree is deeper. */
+
+       deep_h = AVL_GET_GREATER(bal_h, 1);
+
+       L__CHECK_READ_ERROR(AVL_NULL)
+
+       if (AVL_GET_BALANCE_FACTOR(deep_h) < 0)
+         {
+           int bf;
+
+           AVL_HANDLE old_h = bal_h;
+           bal_h = AVL_GET_LESS(deep_h, 1);
+           L__CHECK_READ_ERROR(AVL_NULL)
+           AVL_SET_GREATER(old_h, AVL_GET_LESS(bal_h, 1))
+           AVL_SET_LESS(deep_h, AVL_GET_GREATER(bal_h, 1))
+           AVL_SET_LESS(bal_h, old_h)
+           AVL_SET_GREATER(bal_h, deep_h)
+
+           bf = AVL_GET_BALANCE_FACTOR(bal_h);
+           if (bf != 0)
+             {
+               if (bf > 0)
+                 {
+                   AVL_SET_BALANCE_FACTOR(old_h, -1)
+                   AVL_SET_BALANCE_FACTOR(deep_h, 0)
+                 }
+               else
+                 {
+                   AVL_SET_BALANCE_FACTOR(deep_h, 1)
+                   AVL_SET_BALANCE_FACTOR(old_h, 0)
+                 }
+               AVL_SET_BALANCE_FACTOR(bal_h, 0)
+             }
+           else
+             {
+               AVL_SET_BALANCE_FACTOR(old_h, 0)
+               AVL_SET_BALANCE_FACTOR(deep_h, 0)
+             }
+         }
+       else
+         {
+           AVL_SET_GREATER(bal_h, AVL_GET_LESS(deep_h, 0))
+           AVL_SET_LESS(deep_h, bal_h)
+           if (AVL_GET_BALANCE_FACTOR(deep_h) == 0)
+             {
+               AVL_SET_BALANCE_FACTOR(deep_h, -1)
+               AVL_SET_BALANCE_FACTOR(bal_h, 1)
+             }
+           else
+             {
+               AVL_SET_BALANCE_FACTOR(deep_h, 0)
+               AVL_SET_BALANCE_FACTOR(bal_h, 0)
+             }
+           bal_h = deep_h;
+         }
+      }
+    else
+      {
+       /* "Less than" subtree is deeper. */
+
+       deep_h = AVL_GET_LESS(bal_h, 1);
+       L__CHECK_READ_ERROR(AVL_NULL)
+
+       if (AVL_GET_BALANCE_FACTOR(deep_h) > 0)
+         {
+           int bf;
+           AVL_HANDLE old_h = bal_h;
+           bal_h = AVL_GET_GREATER(deep_h, 1);
+           L__CHECK_READ_ERROR(AVL_NULL)
+           AVL_SET_LESS(old_h, AVL_GET_GREATER(bal_h, 0))
+           AVL_SET_GREATER(deep_h, AVL_GET_LESS(bal_h, 0))
+           AVL_SET_GREATER(bal_h, old_h)
+           AVL_SET_LESS(bal_h, deep_h)
+
+           bf = AVL_GET_BALANCE_FACTOR(bal_h);
+           if (bf != 0)
+             {
+               if (bf < 0)
+                 {
+                   AVL_SET_BALANCE_FACTOR(old_h, 1)
+                   AVL_SET_BALANCE_FACTOR(deep_h, 0)
+                 }
+               else
+                 {
+                   AVL_SET_BALANCE_FACTOR(deep_h, -1)
+                   AVL_SET_BALANCE_FACTOR(old_h, 0)
+                 }
+               AVL_SET_BALANCE_FACTOR(bal_h, 0)
+             }
+           else
+             {
+               AVL_SET_BALANCE_FACTOR(old_h, 0)
+               AVL_SET_BALANCE_FACTOR(deep_h, 0)
+             }
+         }
+       else
+         {
+           AVL_SET_LESS(bal_h, AVL_GET_GREATER(deep_h, 0))
+           AVL_SET_GREATER(deep_h, bal_h)
+           if (AVL_GET_BALANCE_FACTOR(deep_h) == 0)
+             {
+               AVL_SET_BALANCE_FACTOR(deep_h, 1)
+               AVL_SET_BALANCE_FACTOR(bal_h, -1)
+             }
+           else
+             {
+               AVL_SET_BALANCE_FACTOR(deep_h, 0)
+               AVL_SET_BALANCE_FACTOR(bal_h, 0)
+             }
+           bal_h = deep_h;
+         }
+      }
+
+    return(bal_h);
+  }
+
+L__SC AVL_HANDLE L__(insert)(L__(avl) *L__tree, AVL_HANDLE h)
+  {
+    AVL_SET_LESS(h, AVL_NULL)
+    AVL_SET_GREATER(h, AVL_NULL)
+    AVL_SET_BALANCE_FACTOR(h, 0)
+
+    if (L__tree->root == AVL_NULL) {
+      AVL_SET_ROOT(L__tree, h);
+    } else
+      {
+       /* Last unbalanced node encountered in search for insertion point. */
+       AVL_HANDLE unbal = AVL_NULL;
+       /* Parent of last unbalanced node. */
+       AVL_HANDLE parent_unbal = AVL_NULL;
+       /* Balance factor of last unbalanced node. */
+       int unbal_bf;
+
+       /* Zero-based depth in tree. */
+       unsigned depth = 0, unbal_depth = 0;
+
+       /* Records a path into the tree.  If bit n is true, indicates
+       ** take greater branch from the nth node in the path, otherwise
+       ** take the less branch.  bit 0 gives branch from root, and
+       ** so on. */
+       L__BIT_ARR_DEFN(branch)
+
+       AVL_HANDLE hh = L__tree->root;
+       AVL_HANDLE parent = AVL_NULL;
+       int cmp;
+
+       do
+         {
+           if (AVL_GET_BALANCE_FACTOR(hh) != 0)
+             {
+               unbal = hh;
+               parent_unbal = parent;
+               unbal_depth = depth;
+             }
+           cmp = AVL_COMPARE_NODE_NODE(h, hh);
+           if (cmp == 0)
+             /* Duplicate key. */
+             return(hh);
+           parent = hh;
+           if (cmp > 0)
+             {
+               hh = AVL_GET_GREATER(hh, 1);
+               L__BIT_ARR_1(branch, depth)
+             }
+           else
+             {
+               hh = AVL_GET_LESS(hh, 1);
+               L__BIT_ARR_0(branch, depth)
+             }
+           L__CHECK_READ_ERROR(AVL_NULL)
+           depth++;
+         }
+       while (hh != AVL_NULL);
+
+       /*  Add node to insert as leaf of tree. */
+       if (cmp < 0)
+         AVL_SET_LESS(parent, h)
+       else
+         AVL_SET_GREATER(parent, h)
+
+       depth = unbal_depth;
+
+       if (unbal == AVL_NULL)
+         hh = L__tree->root;
+       else
+         {
+           cmp = L__BIT_ARR_VAL(branch, depth) ? 1 : -1;
+           depth++;
+           unbal_bf = AVL_GET_BALANCE_FACTOR(unbal);
+           if (cmp < 0)
+             unbal_bf--;
+           else  /* cmp > 0 */
+             unbal_bf++;
+           hh = cmp < 0 ? AVL_GET_LESS(unbal, 1) : AVL_GET_GREATER(unbal, 1);
+           L__CHECK_READ_ERROR(AVL_NULL)
+           if ((unbal_bf != -2) && (unbal_bf != 2))
+             {
+               /* No rebalancing of tree is necessary. */
+               AVL_SET_BALANCE_FACTOR(unbal, unbal_bf)
+               unbal = AVL_NULL;
+             }
+         }
+
+       if (hh != AVL_NULL)
+         while (h != hh)
+           {
+             cmp = L__BIT_ARR_VAL(branch, depth) ? 1 : -1;
+             depth++;
+             if (cmp < 0)
+               {
+                 AVL_SET_BALANCE_FACTOR(hh, -1)
+                 hh = AVL_GET_LESS(hh, 1);
+               }
+             else /* cmp > 0 */
+               {
+                 AVL_SET_BALANCE_FACTOR(hh, 1)
+                 hh = AVL_GET_GREATER(hh, 1);
+               }
+             L__CHECK_READ_ERROR(AVL_NULL)
+           }
+
+       if (unbal != AVL_NULL)
+         {
+           unbal = L__(balance)(L__BALANCE_PARAM_CALL_PREFIX unbal);
+           L__CHECK_READ_ERROR(AVL_NULL)
+           if (parent_unbal == AVL_NULL)
+             {
+             AVL_SET_ROOT(L__tree, unbal);
+             }
+           else
+             {
+               depth = unbal_depth - 1;
+               cmp = L__BIT_ARR_VAL(branch, depth) ? 1 : -1;
+               if (cmp < 0)
+                 AVL_SET_LESS(parent_unbal, unbal)
+               else  /* cmp > 0 */
+                 AVL_SET_GREATER(parent_unbal, unbal)
+             }
+         }
+
+      }
+
+    return(h);
+  }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_SEARCH)
+
+L__SC AVL_HANDLE L__(search)(L__(avl) *L__tree, AVL_KEY k, avl_search_type st)
+  {
+    int cmp, target_cmp;
+    AVL_HANDLE match_h = AVL_NULL;
+    AVL_HANDLE h = L__tree->root;
+
+    if (st & AVL_LESS)
+      target_cmp = 1;
+    else if (st & AVL_GREATER)
+      target_cmp = -1;
+    else
+      target_cmp = 0;
+
+    while (h != AVL_NULL)
+      {
+       cmp = AVL_COMPARE_KEY_NODE(k, h);
+       if (cmp == 0)
+         {
+           if (st & AVL_EQUAL)
+             {
+               match_h = h;
+               break;
+             }
+           cmp = -target_cmp;
+         }
+       else if (target_cmp != 0)
+         if (!((cmp ^ target_cmp) & L__MASK_HIGH_BIT))
+           /* cmp and target_cmp are both positive or both negative. */
+           match_h = h;
+       h = cmp < 0 ? AVL_GET_LESS(h, 1) : AVL_GET_GREATER(h, 1);
+       L__CHECK_READ_ERROR(AVL_NULL)
+      }
+
+    return(match_h);
+  }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_SEARCH_LEAST)
+
+L__SC AVL_HANDLE L__(search_least)(L__(avl) *L__tree)
+  {
+    AVL_HANDLE h = L__tree->root;
+    AVL_HANDLE parent = AVL_NULL;
+
+    while (h != AVL_NULL)
+      {
+       parent = h;
+       h = AVL_GET_LESS(h, 1);
+       L__CHECK_READ_ERROR(AVL_NULL)
+      }
+
+    return(parent);
+  }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_SEARCH_GREATEST)
+
+L__SC AVL_HANDLE L__(search_greatest)(L__(avl) *L__tree)
+  {
+    AVL_HANDLE h = L__tree->root;
+    AVL_HANDLE parent = AVL_NULL;
+
+    while (h != AVL_NULL)
+      {
+       parent = h;
+       h = AVL_GET_GREATER(h, 1);
+       L__CHECK_READ_ERROR(AVL_NULL)
+      }
+
+    return(parent);
+  }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_REMOVE)
+
+/* Prototype of balance function (called by remove) in case not in
+** same compilation unit.
+*/
+L__SC AVL_HANDLE L__(balance)(L__BALANCE_PARAM_DECL_PREFIX AVL_HANDLE bal_h);
+
+L__SC AVL_HANDLE L__(remove)(L__(avl) *L__tree, AVL_KEY k)
+  {
+    /* Zero-based depth in tree. */
+    unsigned depth = 0, rm_depth;
+
+    /* Records a path into the tree.  If bit n is true, indicates
+    ** take greater branch from the nth node in the path, otherwise
+    ** take the less branch.  bit 0 gives branch from root, and
+    ** so on. */
+    L__BIT_ARR_DEFN(branch)
+
+    AVL_HANDLE h = L__tree->root;
+    AVL_HANDLE parent = AVL_NULL;
+    AVL_HANDLE child;
+    AVL_HANDLE path;
+    int cmp, cmp_shortened_sub_with_path;
+    int reduced_depth;
+    int bf;
+    AVL_HANDLE rm;
+    AVL_HANDLE parent_rm;
+
+    for ( ; ; )
+      {
+       if (h == AVL_NULL)
+         /* No node in tree with given key. */
+         return(AVL_NULL);
+       cmp = AVL_COMPARE_KEY_NODE(k, h);
+       if (cmp == 0)
+         /* Found node to remove. */
+         break;
+       parent = h;
+       if (cmp > 0)
+         {
+           h = AVL_GET_GREATER(h, 1);
+           L__BIT_ARR_1(branch, depth)
+         }
+       else
+         {
+           h = AVL_GET_LESS(h, 1);
+           L__BIT_ARR_0(branch, depth)
+         }
+       L__CHECK_READ_ERROR(AVL_NULL)
+       depth++;
+       cmp_shortened_sub_with_path = cmp;
+      }
+    rm = h;
+    parent_rm = parent;
+    rm_depth = depth;
+
+    /* If the node to remove is not a leaf node, we need to get a
+    ** leaf node, or a node with a single leaf as its child, to put
+    ** in the place of the node to remove.  We will get the greatest
+    ** node in the less subtree (of the node to remove), or the least
+    ** node in the greater subtree.  We take the leaf node from the
+    ** deeper subtree, if there is one. */
+
+    if (AVL_GET_BALANCE_FACTOR(h) < 0)
+      {
+       child = AVL_GET_LESS(h, 1);
+       L__BIT_ARR_0(branch, depth)
+       cmp = -1;
+      }
+    else
+      {
+       child = AVL_GET_GREATER(h, 1);
+       L__BIT_ARR_1(branch, depth)
+       cmp = 1;
+      }
+    L__CHECK_READ_ERROR(AVL_NULL)
+    depth++;
+
+    if (child != AVL_NULL)
+      {
+       cmp = -cmp;
+       do
+         {
+           parent = h;
+           h = child;
+           if (cmp < 0)
+             {
+               child = AVL_GET_LESS(h, 1);
+               L__BIT_ARR_0(branch, depth)
+             }
+           else
+             {
+               child = AVL_GET_GREATER(h, 1);
+               L__BIT_ARR_1(branch, depth)
+             }
+           L__CHECK_READ_ERROR(AVL_NULL)
+           depth++;
+         }
+       while (child != AVL_NULL);
+
+       if (parent == rm)
+         /* Only went through do loop once.  Deleted node will be replaced
+         ** in the tree structure by one of its immediate children. */
+         cmp_shortened_sub_with_path = -cmp;
+        else
+         cmp_shortened_sub_with_path = cmp;
+
+       /* Get the handle of the opposite child, which may not be null. */
+       child = cmp > 0 ? AVL_GET_LESS(h, 0) : AVL_GET_GREATER(h, 0);
+      }
+
+    if (parent == AVL_NULL) {
+      /* There were only 1 or 2 nodes in this tree. */
+      AVL_SET_ROOT(L__tree, child);
+    }
+    else if (cmp_shortened_sub_with_path < 0)
+      AVL_SET_LESS(parent, child)
+    else
+      AVL_SET_GREATER(parent, child)
+
+    /* "path" is the parent of the subtree being eliminated or reduced
+    ** from a depth of 2 to 1.  If "path" is the node to be removed, we
+    ** set path to the node we're about to poke into the position of the
+    ** node to be removed. */
+    path = parent == rm ? h : parent;
+
+    if (h != rm)
+      {
+       /* Poke in the replacement for the node to be removed. */
+       AVL_SET_LESS(h, AVL_GET_LESS(rm, 0))
+       AVL_SET_GREATER(h, AVL_GET_GREATER(rm, 0))
+       AVL_SET_BALANCE_FACTOR(h, AVL_GET_BALANCE_FACTOR(rm))
+       if (parent_rm == AVL_NULL) {
+          AVL_SET_ROOT(L__tree, h);
+       }
+       else
+         {
+           depth = rm_depth - 1;
+           if (L__BIT_ARR_VAL(branch, depth))
+             AVL_SET_GREATER(parent_rm, h)
+           else
+             AVL_SET_LESS(parent_rm, h)
+         }
+      }
+
+    if (path != AVL_NULL)
+      {
+       /* Create a temporary linked list from the parent of the path node
+       ** to the root node. */
+       h = L__tree->root;
+       parent = AVL_NULL;
+       depth = 0;
+       while (h != path)
+         {
+           if (L__BIT_ARR_VAL(branch, depth))
+             {
+               child = AVL_GET_GREATER(h, 1);
+               AVL_SET_GREATER(h, parent)
+             }
+           else
+             {
+               child = AVL_GET_LESS(h, 1);
+               AVL_SET_LESS(h, parent)
+             }
+           L__CHECK_READ_ERROR(AVL_NULL)
+           depth++;
+           parent = h;
+           h = child;
+         }
+
+       /* Climb from the path node to the root node using the linked
+       ** list, restoring the tree structure and rebalancing as necessary.
+       */
+       reduced_depth = 1;
+       cmp = cmp_shortened_sub_with_path;
+       for ( ; ; )
+         {
+           if (reduced_depth)
+             {
+               bf = AVL_GET_BALANCE_FACTOR(h);
+               if (cmp < 0)
+                 bf++;
+               else  /* cmp > 0 */
+                 bf--;
+               if ((bf == -2) || (bf == 2))
+                 {
+                   h = L__(balance)(L__BALANCE_PARAM_CALL_PREFIX h);
+                   L__CHECK_READ_ERROR(AVL_NULL)
+                   bf = AVL_GET_BALANCE_FACTOR(h);
+                 }
+               else
+                 AVL_SET_BALANCE_FACTOR(h, bf)
+               reduced_depth = (bf == 0);
+             }
+           if (parent == AVL_NULL)
+             break;
+           child = h;
+           h = parent;
+           depth--;
+           cmp = L__BIT_ARR_VAL(branch, depth) ? 1 : -1;
+           if (cmp < 0)
+             {
+               parent = AVL_GET_LESS(h, 1);
+               AVL_SET_LESS(h, child)
+             }
+           else
+             {
+               parent = AVL_GET_GREATER(h, 1);
+               AVL_SET_GREATER(h, child)
+             }
+           L__CHECK_READ_ERROR(AVL_NULL)
+         }
+        AVL_SET_ROOT(L__tree, h);
+      }
+
+    return(rm);
+  }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_SUBST)
+
+L__SC AVL_HANDLE L__(subst)(L__(avl) *L__tree, AVL_HANDLE new_node)
+  {
+    AVL_HANDLE h = L__tree->root;
+    AVL_HANDLE parent = AVL_NULL;
+    int cmp, last_cmp;
+
+    /* Search for node already in tree with same key. */
+    for ( ; ; )
+      {
+       if (h == AVL_NULL)
+         /* No node in tree with same key as new node. */
+         return(AVL_NULL);
+       cmp = AVL_COMPARE_NODE_NODE(new_node, h);
+       if (cmp == 0)
+         /* Found the node to substitute new one for. */
+         break;
+       last_cmp = cmp;
+       parent = h;
+       h = cmp < 0 ? AVL_GET_LESS(h, 1) : AVL_GET_GREATER(h, 1);
+       L__CHECK_READ_ERROR(AVL_NULL)
+      }
+
+    /* Copy tree housekeeping fields from node in tree to new node. */
+    AVL_SET_LESS(new_node, AVL_GET_LESS(h, 0))
+    AVL_SET_GREATER(new_node, AVL_GET_GREATER(h, 0))
+    AVL_SET_BALANCE_FACTOR(new_node, AVL_GET_BALANCE_FACTOR(h))
+
+    if (parent == AVL_NULL)
+     {
+      /* New node is also new root. */
+      AVL_SET_ROOT(L__tree, new_node);
+     }
+    else
+      {
+       /* Make parent point to new node. */
+       if (last_cmp < 0)
+         AVL_SET_LESS(parent, new_node)
+       else
+         AVL_SET_GREATER(parent, new_node)
+      }
+
+    return(h);
+  }
+
+#endif
+
+#ifdef AVL_BUILD_ITER_TYPE
+
+#if (L__IMPL_MASK & AVL_IMPL_BUILD)
+
+L__SC int L__(build)(
+  L__(avl) *L__tree, AVL_BUILD_ITER_TYPE p, L__SIZE num_nodes)
+  {
+    /* Gives path to subtree being built.  If bit n is false, branch
+    ** less from the node at depth n, if true branch greater. */
+    L__BIT_ARR_DEFN(branch)
+
+    /* If bit n is true, then for the current subtree at depth n, its
+    ** greater subtree has one more node than its less subtree. */
+    L__BIT_ARR_DEFN(rem)
+
+    /* Depth of root node of current subtree. */
+    unsigned depth = 0;
+
+    /* Number of nodes in current subtree. */
+    L__SIZE num_sub = num_nodes;
+
+    /* The algorithm relies on a stack of nodes whose less subtree has
+    ** been built, but whose greater subtree has not yet been built.
+    ** The stack is implemented as linked list.  The nodes are linked
+    ** together by having the "greater" handle of a node set to the
+    ** next node in the list.  "less_parent" is the handle of the first
+    ** node in the list. */
+    AVL_HANDLE less_parent = AVL_NULL;
+
+    /* h is root of current subtree, child is one of its children. */
+    AVL_HANDLE h;
+    AVL_HANDLE child;
+
+    if (num_nodes == 0)
+      {
+        AVL_SET_ROOT(L__tree, AVL_NULL);
+       return(1);
+      }
+
+    for ( ; ; )
+      {
+       while (num_sub > 2)
+         {
+           /* Subtract one for root of subtree. */
+           num_sub--;
+           if (num_sub & 1)
+             L__BIT_ARR_1(rem, depth)
+           else
+             L__BIT_ARR_0(rem, depth)
+           L__BIT_ARR_0(branch, depth)
+           depth++;
+           num_sub >>= 1;
+         }
+
+       if (num_sub == 2)
+         {
+           /* Build a subtree with two nodes, slanting to greater.
+           ** I arbitrarily chose to always have the extra node in the
+           ** greater subtree when there is an odd number of nodes to
+           ** split between the two subtrees. */
+
+           h = AVL_BUILD_ITER_VAL(p);
+           L__CHECK_READ_ERROR(0)
+           AVL_BUILD_ITER_INCR(p)
+           child = AVL_BUILD_ITER_VAL(p);
+           L__CHECK_READ_ERROR(0)
+           AVL_BUILD_ITER_INCR(p)
+           AVL_SET_LESS(child, AVL_NULL)
+           AVL_SET_GREATER(child, AVL_NULL)
+           AVL_SET_BALANCE_FACTOR(child, 0)
+           AVL_SET_GREATER(h, child)
+           AVL_SET_LESS(h, AVL_NULL)
+           AVL_SET_BALANCE_FACTOR(h, 1)
+         }
+       else  /* num_sub == 1 */
+         {
+           /* Build a subtree with one node. */
+
+           h = AVL_BUILD_ITER_VAL(p);
+           L__CHECK_READ_ERROR(0)
+           AVL_BUILD_ITER_INCR(p)
+           AVL_SET_LESS(h, AVL_NULL)
+           AVL_SET_GREATER(h, AVL_NULL)
+           AVL_SET_BALANCE_FACTOR(h, 0)
+         }
+
+       while (depth)
+         {
+           depth--;
+           if (!L__BIT_ARR_VAL(branch, depth))
+             /* We've completed a less subtree. */
+             break;
+
+           /* We've completed a greater subtree, so attach it to
+           ** its parent (that is less than it).  We pop the parent
+           ** off the stack of less parents. */
+           child = h;
+           h = less_parent;
+           less_parent = AVL_GET_GREATER(h, 1);
+           L__CHECK_READ_ERROR(0)
+           AVL_SET_GREATER(h, child)
+           /* num_sub = 2 * (num_sub - rem[depth]) + rem[depth] + 1 */
+           num_sub <<= 1;
+           num_sub += L__BIT_ARR_VAL(rem, depth) ? 0 : 1;
+           if (num_sub & (num_sub - 1))
+             /* num_sub is not a power of 2. */
+             AVL_SET_BALANCE_FACTOR(h, 0)
+           else
+             /* num_sub is a power of 2. */
+             AVL_SET_BALANCE_FACTOR(h, 1)
+         }
+
+       if (num_sub == num_nodes)
+         /* We've completed the full tree. */
+         break;
+
+       /* The subtree we've completed is the less subtree of the
+       ** next node in the sequence. */
+
+       child = h;
+       h = AVL_BUILD_ITER_VAL(p);
+       L__CHECK_READ_ERROR(0)
+       AVL_BUILD_ITER_INCR(p)
+       AVL_SET_LESS(h, child)
+
+       /* Put h into stack of less parents. */
+       AVL_SET_GREATER(h, less_parent)
+       less_parent = h;
+
+       /* Proceed to creating greater than subtree of h. */
+       L__BIT_ARR_1(branch, depth)
+       num_sub += L__BIT_ARR_VAL(rem, depth) ? 1 : 0;
+       depth++;
+
+      } /* end for ( ; ; ) */
+
+    AVL_SET_ROOT(L__tree, h);
+
+    return(1);
+  }
+
+#endif
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_INIT_ITER)
+
+/* Initialize depth to invalid value, to indicate iterator is
+** invalid.   (Depth is zero-base.)  It's not necessary to initialize
+** iterators prior to passing them to the "start" function.
+*/
+L__SC void L__(init_iter)(L__(iter) *iter) { iter->depth = ~0; }
+
+#endif
+
+#ifdef AVL_READ_ERRORS_HAPPEN
+
+#define L__CHECK_READ_ERROR_INV_DEPTH \
+{ if (AVL_READ_ERROR) { iter->depth = ~0; return; } }
+
+#else
+
+#define L__CHECK_READ_ERROR_INV_DEPTH
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_START_ITER)
+
+L__SC void L__(start_iter)(
+  L__(avl) *L__tree, L__(iter) *iter, AVL_KEY k, avl_search_type st)
+  {
+    AVL_HANDLE h = L__tree->root;
+    unsigned d = 0;
+    int cmp, target_cmp;
+
+    /* Save the tree that we're going to iterate through in a
+    ** member variable. */
+    iter->tree_ = L__tree;
+
+    iter->depth = ~0;
+
+    if (h == AVL_NULL)
+      /* Tree is empty. */
+      return;
+
+    if (st & AVL_LESS)
+      /* Key can be greater than key of starting node. */
+      target_cmp = 1;
+    else if (st & AVL_GREATER)
+      /* Key can be less than key of starting node. */
+      target_cmp = -1;
+    else
+      /* Key must be same as key of starting node. */
+      target_cmp = 0;
+
+    for ( ; ; )
+      {
+       cmp = AVL_COMPARE_KEY_NODE(k, h);
+       if (cmp == 0)
+         {
+           if (st & AVL_EQUAL)
+             {
+               /* Equal node was sought and found as starting node. */
+               iter->depth = d;
+               break;
+             }
+           cmp = -target_cmp;
+         }
+       else if (target_cmp != 0)
+         if (!((cmp ^ target_cmp) & L__MASK_HIGH_BIT))
+           /* cmp and target_cmp are both negative or both positive. */
+           iter->depth = d;
+       h = cmp < 0 ? AVL_GET_LESS(h, 1) : AVL_GET_GREATER(h, 1);
+       L__CHECK_READ_ERROR_INV_DEPTH
+       if (h == AVL_NULL)
+         break;
+       if (cmp > 0)
+         L__BIT_ARR_1(iter->branch, d)
+       else
+         L__BIT_ARR_0(iter->branch, d)
+       iter->path_h[d++] = h;
+      }
+  }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_START_ITER_LEAST)
+
+L__SC void L__(start_iter_least)(L__(avl) *L__tree, L__(iter) *iter)
+  {
+    AVL_HANDLE h = L__tree->root;
+
+    iter->tree_ = L__tree;
+
+    iter->depth = ~0;
+
+    L__BIT_ARR_ALL(iter->branch, 0)
+
+    while (h != AVL_NULL)
+      {
+       if (iter->depth != ~0)
+         iter->path_h[iter->depth] = h;
+       iter->depth++;
+       h = AVL_GET_LESS(h, 1);
+       L__CHECK_READ_ERROR_INV_DEPTH
+      }
+  }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_START_ITER_GREATEST)
+
+L__SC void L__(start_iter_greatest)(L__(avl) *L__tree, L__(iter) *iter)
+  {
+    AVL_HANDLE h = L__tree->root;
+
+    iter->tree_ = L__tree;
+
+    iter->depth = ~0;
+
+    L__BIT_ARR_ALL(iter->branch, 1)
+
+    while (h != AVL_NULL)
+      {
+       if (iter->depth != ~0)
+         iter->path_h[iter->depth] = h;
+       iter->depth++;
+       h = AVL_GET_GREATER(h, 1);
+       L__CHECK_READ_ERROR_INV_DEPTH
+      }
+  }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_GET_ITER)
+
+L__SC AVL_HANDLE L__(get_iter)(L__(iter) *iter)
+  {
+    if (iter->depth == ~0)
+      return(AVL_NULL);
+
+    return(iter->depth == 0 ?
+            iter->tree_->root : iter->path_h[iter->depth - 1]);
+  }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_INCR_ITER)
+
+L__SC void L__(incr_iter)(L__(iter) *iter)
+  {
+    #define L__tree (iter->tree_)
+
+    if (iter->depth != ~0)
+      {
+       AVL_HANDLE h =
+         AVL_GET_GREATER((iter->depth == 0 ?
+           iter->tree_->root : iter->path_h[iter->depth - 1]), 1);
+       L__CHECK_READ_ERROR_INV_DEPTH
+
+       if (h == AVL_NULL)
+         do
+           {
+             if (iter->depth == 0)
+               {
+                 iter->depth = ~0;
+                 break;
+               }
+             iter->depth--;
+           }
+         while (L__BIT_ARR_VAL(iter->branch, iter->depth));
+       else
+         {
+           L__BIT_ARR_1(iter->branch, iter->depth)
+           iter->path_h[iter->depth++] = h;
+           for ( ; ; )
+             {
+               h = AVL_GET_LESS(h, 1);
+               L__CHECK_READ_ERROR_INV_DEPTH
+               if (h == AVL_NULL)
+                 break;
+               L__BIT_ARR_0(iter->branch, iter->depth)
+               iter->path_h[iter->depth++] = h;
+             }
+         }
+      }
+
+    #undef L__tree
+  }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_DECR_ITER)
+
+L__SC void L__(decr_iter)(L__(iter) *iter)
+  {
+    #define L__tree (iter->tree_)
+
+    if (iter->depth != ~0)
+      {
+       AVL_HANDLE h =
+         AVL_GET_LESS((iter->depth == 0 ?
+           iter->tree_->root : iter->path_h[iter->depth - 1]), 1);
+       L__CHECK_READ_ERROR_INV_DEPTH
+
+       if (h == AVL_NULL)
+         do
+           {
+             if (iter->depth == 0)
+               {
+                 iter->depth = ~0;
+                 break;
+               }
+             iter->depth--;
+           }
+         while (!L__BIT_ARR_VAL(iter->branch, iter->depth));
+       else
+         {
+           L__BIT_ARR_0(iter->branch, iter->depth)
+           iter->path_h[iter->depth++] = h;
+           for ( ; ; )
+             {
+               h = AVL_GET_GREATER(h, 1);
+               L__CHECK_READ_ERROR_INV_DEPTH
+               if (h == AVL_NULL)
+                 break;
+               L__BIT_ARR_1(iter->branch, iter->depth)
+               iter->path_h[iter->depth++] = h;
+             }
+         }
+      }
+
+    #undef L__tree
+  }
+
+#endif
+
+/* Tidy up the preprocessor symbol name space. */
+#undef L__
+#undef L__EST_LONG_BIT
+#undef L__SIZE
+#undef L__MASK_HIGH_BIT
+#undef L__LONG_BIT
+#undef L__BIT_ARR_DEFN
+#undef L__BIT_ARR_VAL
+#undef L__BIT_ARR_0
+#undef L__BIT_ARR_1
+#undef L__BIT_ARR_ALL
+#undef L__CHECK_READ_ERROR
+#undef L__CHECK_READ_ERROR_INV_DEPTH
+#undef L__BIT_ARR_LONGS
+#undef L__IMPL_MASK
+#undef L__CHECK_READ_ERROR
+#undef L__CHECK_READ_ERROR_INV_DEPTH
+#undef L__SC
+#undef L__BALANCE_PARAM_CALL_PREFIX
+#undef L__BALANCE_PARAM_DECL_PREFIX
index 47ec93889ac5f2314ef72c62040d0b4cc76331ef..65106af72cd5c52e9f84c961f0328f327055a7d3 100644 (file)
@@ -14,6 +14,7 @@
 #include <minix/sysutil.h>
 #include <minix/syslib.h>
 #include <minix/const.h>
+#include <minix/bitmap.h>
 
 #include <errno.h>
 #include <assert.h>
@@ -31,7 +32,9 @@
 
 FORWARD _PROTOTYPE( int new_mem, (struct vmproc *vmp, struct vmproc *sh_vmp,
        vir_bytes text_bytes, vir_bytes data_bytes, vir_bytes bss_bytes,
-       vir_bytes stk_bytes, phys_bytes tot_bytes)      );
+       vir_bytes stk_bytes, phys_bytes tot_bytes, vir_bytes *stack_top));
+
+static int failcount;
 
 /*===========================================================================*
  *                              find_share                                   *
@@ -78,15 +81,17 @@ PUBLIC int do_exec_newmem(message *msg)
        proc_e= msg->VMEN_ENDPOINT;
        if (vm_isokendpt(proc_e, &proc_n) != OK)
        {
-               printf("VM:exec_newmem: bad endpoint %d from %d\n",
+               printf("VM: exec_newmem: bad endpoint %d from %d\n",
                        proc_e, msg->m_source);
                return ESRCH;
        }
        vmp= &vmproc[proc_n];
        ptr= msg->VMEN_ARGSPTR;
 
+       NOTRUNNABLE(vmp->vm_endpoint);
+
        if(msg->VMEN_ARGSSIZE != sizeof(args)) {
-               printf("VM:exec_newmem: args size %d != %ld\n",
+               printf("VM: exec_newmem: args size %d != %ld\n",
                        msg->VMEN_ARGSSIZE, sizeof(args));
                return EINVAL;
        }
@@ -97,18 +102,30 @@ SANITYCHECK(SCL_DETAIL);
        if (r != OK)
                vm_panic("exec_newmem: sys_datacopy failed", r);
 
+       /* Minimum stack region (not preallocated)
+        * Stopgap for better rlimit-based stack size system
+        */
+       if(args.tot_bytes < MINSTACKREGION) {
+               args.tot_bytes = MINSTACKREGION;
+       }
+
        /* Check to see if segment sizes are feasible. */
        tc = ((unsigned long) args.text_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT;
        dc = (args.data_bytes+args.bss_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT;
        totc = (args.tot_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT;
        sc = (args.args_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT;
-       if (dc >= totc) return(ENOEXEC); /* stack must be at least 1 click */
+       if (dc >= totc) {
+               printf("VM: newmem: no stack?\n");
+               return(ENOEXEC); /* stack must be at least 1 click */
+       }
 
        dvir = (args.sep_id ? 0 : tc);
        s_vir = dvir + (totc - sc);
        r = (dvir + dc > s_vir) ? ENOMEM : OK;
-       if (r != OK)
+       if (r != OK) {
+               printf("VM: newmem: no virtual space?\n");
                return r;
+       }
 
        /* Can the process' text be shared with that of one already running? */
        if(!vm_paged) {
@@ -121,29 +138,30 @@ SANITYCHECK(SCL_DETAIL);
         * kernel.
         */
        r = new_mem(vmp, sh_mp, args.text_bytes, args.data_bytes,
-               args.bss_bytes, args.args_bytes, args.tot_bytes);
-       if (r != OK) return(r);
+               args.bss_bytes, args.args_bytes, args.tot_bytes, &stack_top);
+       if (r != OK) {
+               printf("VM: newmem: new_mem failed\n");
+               return(r);
+       }
 
        /* Save file identification to allow it to be shared. */
        vmp->vm_ino = args.st_ino;
        vmp->vm_dev = args.st_dev;
        vmp->vm_ctime = args.st_ctime;
 
-       stack_top= ((vir_bytes)vmp->vm_arch.vm_seg[S].mem_vir << CLICK_SHIFT) +
-               ((vir_bytes)vmp->vm_arch.vm_seg[S].mem_len << CLICK_SHIFT);
-
        /* set/clear separate I&D flag */
        if (args.sep_id)
                vmp->vm_flags |= VMF_SEPARATE;  
        else
                vmp->vm_flags &= ~VMF_SEPARATE;
 
-       
        msg->VMEN_STACK_TOP = (void *) stack_top;
        msg->VMEN_FLAGS = 0;
        if (!sh_mp)                      /* Load text if sh_mp = NULL */
                msg->VMEN_FLAGS |= EXC_NM_RF_LOAD_TEXT;
 
+       NOTRUNNABLE(vmp->vm_endpoint);
+
        return OK;
 }
 
@@ -151,7 +169,7 @@ SANITYCHECK(SCL_DETAIL);
  *                             new_mem                                      *
  *===========================================================================*/
 PRIVATE int new_mem(rmp, sh_mp, text_bytes, data_bytes,
-       bss_bytes,stk_bytes,tot_bytes)
+       bss_bytes,stk_bytes,tot_bytes,stack_top)
 struct vmproc *rmp;            /* process to get a new memory map */
 struct vmproc *sh_mp;          /* text can be shared with this process */
 vir_bytes text_bytes;          /* text segment size in bytes */
@@ -159,6 +177,7 @@ vir_bytes data_bytes;               /* size of initialized data in bytes */
 vir_bytes bss_bytes;           /* size of bss in bytes */
 vir_bytes stk_bytes;           /* size of initial stack segment in bytes */
 phys_bytes tot_bytes;          /* total memory to allocate, including gap */
+vir_bytes *stack_top;          /* top of process stack */
 {
 /* Allocate new memory and release the old memory.  Change the map and report
  * the new map to the kernel.  Zero the new core image's bss, gap and stack.
@@ -166,10 +185,15 @@ phys_bytes tot_bytes;             /* total memory to allocate, including gap */
 
   vir_clicks text_clicks, data_clicks, gap_clicks, stack_clicks, tot_clicks;
   phys_bytes bytes, base, bss_offset;
-  int s, r2;
+  int s, r2, r, hadpt = 0;
+  struct vmproc *vmpold = &vmproc[VMP_EXECTMP];
 
   SANITYCHECK(SCL_FUNCTIONS);
 
+  if(rmp->vm_flags & VMF_HASPT) {
+       hadpt = 1;
+  }
+
   /* No need to allocate text if it can be shared. */
   if (sh_mp != NULL) {
        text_bytes = 0;
@@ -185,19 +209,31 @@ phys_bytes tot_bytes;             /* total memory to allocate, including gap */
   stack_clicks = (stk_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT;
   tot_clicks = (tot_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT;
   gap_clicks = tot_clicks - data_clicks - stack_clicks;
-  if ( (int) gap_clicks < 0) return(ENOMEM);
-
-SANITYCHECK(SCL_DETAIL);
-
+  if ( (int) gap_clicks < 0) {
+       printf("VM: new_mem: no gap?\n");
+       return(ENOMEM);
+  }
 
-  /* We've got memory for the new core image.  Release the old one. */
 
-  if(rmp->vm_flags & VMF_HASPT) {
-       /* Free page table and memory allocated by pagetable functions. */
-       rmp->vm_flags &= ~VMF_HASPT;
-       free_proc(rmp);
-  } else {
+  /* Keep previous process state for recovery; the sanity check functions
+   * know about the 'vmpold' slot, so the memory that the exec()ing
+   * process is still holding is referenced there.
+   *
+   * Throw away the old page table to avoid having two process slots
+   * using the same vm_pt.
+   * Just recreate it in the case that we have to revert.
+   */
+SANITYCHECK(SCL_DETAIL);
+  if(hadpt) {
+         pt_free(&rmp->vm_pt);
+         rmp->vm_flags &= ~VMF_HASPT;
+  }
+  vm_assert(!(vmpold->vm_flags & VMF_INUSE));
+  *vmpold = *rmp;      /* copy current state. */
+  rmp->vm_regions = NULL; /* exec()ing process regions thrown out. */
+SANITYCHECK(SCL_DETAIL);
 
+  if(!hadpt) {
        if (find_share(rmp, rmp->vm_ino, rmp->vm_dev, rmp->vm_ctime) == NULL) {
                /* No other process shares the text segment, so free it. */
                FREE_MEM(rmp->vm_arch.vm_seg[T].mem_phys, rmp->vm_arch.vm_seg[T].mem_len);
@@ -210,17 +246,20 @@ SANITYCHECK(SCL_DETAIL);
                - rmp->vm_arch.vm_seg[D].mem_vir);
   }
 
-  /* We have now passed the point of no return.  The old core image has been
-   * forever lost, memory for a new core image has been allocated.  Set up
-   * and report new map.
+  /* Build new process in current slot, without freeing old
+   * one. If it fails, revert.
    */
 
   if(vm_paged) {
-       if(pt_new(&rmp->vm_pt) != OK)
-               vm_panic("exec_newmem: no new pagetable", NO_NUM);
+       int ptok = 1;
+       SANITYCHECK(SCL_DETAIL);
+       if((r=pt_new(&rmp->vm_pt)) != OK) {
+               ptok = 0;
+               printf("exec_newmem: no new pagetable\n");
+       }
 
        SANITYCHECK(SCL_DETAIL);
-       proc_new(rmp,
+       if(r != OK || (r=proc_new(rmp,
         VM_PROCSTART,  /* where to start the process in the page table */
         CLICK2ABS(text_clicks),/* how big is the text in bytes, page-aligned */
         CLICK2ABS(data_clicks),/* how big is data+bss, page-aligned */
@@ -228,13 +267,48 @@ SANITYCHECK(SCL_DETAIL);
         CLICK2ABS(gap_clicks), /* how big is gap, page-aligned */
         0,0,                   /* not preallocated */
         VM_STACKTOP            /* regular stack top */
-        );
+        )) != OK) {
+               SANITYCHECK(SCL_DETAIL);
+               printf("VM: new_mem: failed\n");
+               if(ptok) {
+                       pt_free(&rmp->vm_pt);
+               }
+               *rmp = *vmpold; /* undo. */
+               clear_proc(vmpold);     /* disappear. */
+               SANITYCHECK(SCL_DETAIL);
+               if(hadpt) {
+                       if(pt_new(&rmp->vm_pt) != OK) {
+                       /* We secretly know that making a new pagetable
+                        * in the same slot if one was there will never fail.
+                        */
+                               vm_panic("new_mem: pt_new failed", s);
+                       }
+                       rmp->vm_flags |= VMF_HASPT;
+                       SANITYCHECK(SCL_DETAIL);
+                       if(map_writept(rmp) != OK) {
+                               printf("VM: warning: exec undo failed\n");
+                       }
+                       SANITYCHECK(SCL_DETAIL);
+               }
+               return r;
+       }
        SANITYCHECK(SCL_DETAIL);
+       /* new process is made; free and unreference
+        * page table and memory still held by exec()ing process.
+        */
+       SANITYCHECK(SCL_DETAIL);
+       free_proc(vmpold);
+       clear_proc(vmpold);     /* disappear. */
+       SANITYCHECK(SCL_DETAIL);
+       *stack_top = VM_STACKTOP;
   } else {
        phys_clicks new_base;
 
        new_base = ALLOC_MEM(text_clicks + tot_clicks, 0);
-       if (new_base == NO_MEM) return(ENOMEM);
+       if (new_base == NO_MEM) {
+               printf("VM: new_mem: ALLOC_MEM failed\n");
+               return(ENOMEM);
+       }
 
        if (sh_mp != NULL) {
                /* Share the text segment. */
@@ -294,6 +368,8 @@ SANITYCHECK(SCL_DETAIL);
          /* Tell kernel this thing has no page table. */
          if((s=pt_bind(NULL, rmp)) != OK)
                vm_panic("exec_newmem: pt_bind failed", s);
+       *stack_top= ((vir_bytes)rmp->vm_arch.vm_seg[S].mem_vir << CLICK_SHIFT) +
+               ((vir_bytes)rmp->vm_arch.vm_seg[S].mem_len << CLICK_SHIFT);
   }
 
 SANITYCHECK(SCL_FUNCTIONS);
@@ -348,13 +424,6 @@ PUBLIC int proc_new(struct vmproc *vmp,
        vm_assert(!(data_start % VM_PAGE_SIZE));
        vm_assert((!text_start && !data_start) || (text_start && data_start));
 
-#if 0
-       if(!map_proc_kernel(vmp)) {
-               printf("VM: exec: map_proc_kernel failed\n");
-               return ENOMEM;
-       }
-#endif
-
        /* Place text at start of process. */
        vmp->vm_arch.vm_seg[T].mem_phys = ABS2CLICK(vstart);
        vmp->vm_arch.vm_seg[T].mem_vir = 0;
@@ -371,6 +440,8 @@ PUBLIC int proc_new(struct vmproc *vmp,
                  VR_ANON | VR_WRITABLE, text_start ? 0 : MF_PREALLOC)) {
                        SANITYCHECK(SCL_DETAIL);
                        printf("VM: proc_new: map_page_region failed (text)\n");
+                       map_free_proc(vmp);
+                       SANITYCHECK(SCL_DETAIL);
                        return(ENOMEM);
                }
                SANITYCHECK(SCL_DETAIL);
@@ -385,6 +456,8 @@ PUBLIC int proc_new(struct vmproc *vmp,
          data_bytes, data_start ? data_start : MAP_NONE, VR_ANON | VR_WRITABLE,
                data_start ? 0 : MF_PREALLOC))) {
                printf("VM: exec: map_page_region for data failed\n");
+               map_free_proc(vmp);
+               SANITYCHECK(SCL_DETAIL);
                return ENOMEM;
        }
 
@@ -432,13 +505,8 @@ PUBLIC int proc_new(struct vmproc *vmp,
 
        vmp->vm_flags |= VMF_HASPT;
 
-       if((s=sys_newmap(vmp->vm_endpoint, vmp->vm_arch.vm_seg)) != OK) {
+       if((s=sys_newmap(vmp->vm_endpoint, vmp->vm_arch.vm_seg)) != OK)
                vm_panic("sys_newmap (vm) failed", s);
-       }
-
-
-       /* This is the real stack clicks. */
-       vmp->vm_arch.vm_seg[S].mem_len = ABS2CLICK(stack_bytes);
 
        if((s=pt_bind(&vmp->vm_pt, vmp)) != OK)
                vm_panic("exec_newmem: pt_bind failed", s);
index c311e17e0c3d413535d76a53bd5c7cc7a3899d19..7990684a038f9ae8e642d6070325029779fadfc5 100644 (file)
@@ -13,6 +13,7 @@
 #include <minix/ipc.h>
 #include <minix/sysutil.h>
 #include <minix/syslib.h>
+#include <minix/bitmap.h>
 
 #include <errno.h>
 #include <env.h>
 
 PUBLIC void free_proc(struct vmproc *vmp)
 {
-       vmp->vm_flags &= ~VMF_HASPT;
-       pt_free(&vmp->vm_pt);
+       if(vmp->vm_flags & VMF_HASPT) {
+               vmp->vm_flags &= ~VMF_HASPT;
+               pt_free(&vmp->vm_pt);
+       }
        map_free_proc(vmp);
        vmp->vm_regions = NULL;
 #if VMSTATS
index 8a5aa6ae7f14f5aa052c3f64cc13654c40d04c25..041c1243b1790eb695126a1e3aa62ac86a6f1ccd 100644 (file)
 #include <minix/ipc.h>
 #include <minix/sysutil.h>
 #include <minix/syslib.h>
+#include <minix/debug.h>
+#include <minix/bitmap.h>
 
+#include <string.h>
 #include <errno.h>
 #include <env.h>
 
@@ -31,6 +34,8 @@ PUBLIC int do_fork(message *msg)
 {
   int r, proc, s, childproc, fullvm;
   struct vmproc *vmp, *vmc;
+  pt_t origpt;
+  vir_bytes msgaddr;
 
   SANITYCHECK(SCL_FUNCTIONS);
 
@@ -49,6 +54,9 @@ PUBLIC int do_fork(message *msg)
 
   vmp = &vmproc[proc];         /* parent */
   vmc = &vmproc[childproc];    /* child */
+  vm_assert(vmc->vm_slot == childproc);
+
+  NOTRUNNABLE(vmp->vm_endpoint);
 
   if(vmp->vm_flags & VMF_HAS_DMA) {
        printf("VM: %d has DMA memory and may not fork\n", msg->VMF_ENDPOINT);
@@ -58,14 +66,20 @@ PUBLIC int do_fork(message *msg)
   fullvm = vmp->vm_flags & VMF_HASPT;
 
   /* The child is basically a copy of the parent. */
+  origpt = vmc->vm_pt;
   *vmc = *vmp;
+  vmc->vm_slot = childproc;
   vmc->vm_regions = NULL;
   vmc->vm_endpoint = NONE;     /* In case someone tries to use it. */
+  vmc->vm_pt = origpt;
+  vmc->vm_flags &= ~VMF_HASPT;
 
 #if VMSTATS
   vmc->vm_bytecopies = 0;
 #endif
 
+  SANITYCHECK(SCL_DETAIL);
+
   if(fullvm) {
        SANITYCHECK(SCL_DETAIL);
 
@@ -74,6 +88,8 @@ PUBLIC int do_fork(message *msg)
                return ENOMEM;
        }
 
+       vmc->vm_flags |= VMF_HASPT;
+
        SANITYCHECK(SCL_DETAIL);
 
        if(map_proc_copy(vmc, vmp) != OK) {
@@ -108,6 +124,7 @@ PUBLIC int do_fork(message *msg)
        /* Create a copy of the parent's core image for the child. */
        child_abs = (phys_bytes) child_base << CLICK_SHIFT;
        parent_abs = (phys_bytes) vmp->vm_arch.vm_seg[D].mem_phys << CLICK_SHIFT;
+       FIXME("VM uses kernel for abscopy");
        s = sys_abscopy(parent_abs, child_abs, prog_bytes);
        if (s < 0) vm_panic("do_fork can't copy", s);
 
@@ -124,14 +141,29 @@ PUBLIC int do_fork(message *msg)
   /* Only inherit these flags. */
   vmc->vm_flags &= (VMF_INUSE|VMF_SEPARATE|VMF_HASPT);
 
+  /* inherit the priv call bitmaps */
+  memcpy(&vmc->vm_call_priv_mask, &vmp->vm_call_priv_mask,
+        sizeof(vmc->vm_call_priv_mask));
+
   /* Tell kernel about the (now successful) FORK. */
   if((r=sys_fork(vmp->vm_endpoint, childproc,
        &vmc->vm_endpoint, vmc->vm_arch.vm_seg,
-       fullvm ? PFF_VMINHIBIT : 0)) != OK) {
+       fullvm ? PFF_VMINHIBIT : 0, &msgaddr)) != OK) {
         vm_panic("do_fork can't sys_fork", r);
   }
 
+  NOTRUNNABLE(vmp->vm_endpoint);
+  NOTRUNNABLE(vmc->vm_endpoint);
+
   if(fullvm) {
+       vir_bytes vir;
+       /* making these messages writable is an optimisation
+        * and its return value needn't be checked.
+        */
+       vir = arch_vir2map(vmc, msgaddr);
+       handle_memory(vmc, vir, sizeof(message), 1);
+       vir = arch_vir2map(vmp, msgaddr);
+       handle_memory(vmp, vir, sizeof(message), 1);
        if((r=pt_bind(&vmc->vm_pt, vmc)) != OK)
                vm_panic("fork can't pt_bind", r);
   }
index 593fb1820ab4305d3ebcc8459fd30f3b5d3a96ad..0889a8f0666cff5b3842d6abe7d25df82904d1fc 100644 (file)
 #define EXTERN
 #endif
 
-EXTERN struct vmproc vmproc[_NR_PROCS+1];
+#define VMP_SYSTEM     _NR_PROCS
+#define VMP_EXECTMP    _NR_PROCS+1
+#define VMP_NR         _NR_PROCS+2
+
+EXTERN struct vmproc vmproc[VMP_NR];
 
 #if SANITYCHECKS
 EXTERN int nocheck;
-u32_t data1[200];
-#define CHECKADDR 0
+EXTERN int incheck;
 EXTERN long vm_sanitychecklevel;
 #endif
 
-#define VMP_SYSTEM     _NR_PROCS
-
 /* vm operation mode state and values */
 EXTERN long vm_paged;
-EXTERN phys_bytes kernel_top_bytes;
+
+EXTERN int meminit_done;
index 9ec3246391b0eab0b4397e26b460ce89649858da..83c38723be1139deba7d6a917ac0d3a143bbda2f 100644 (file)
@@ -14,6 +14,7 @@
 #include <minix/sysutil.h>
 #include <minix/syslib.h>
 #include <minix/safecopies.h>
+#include <minix/bitmap.h>
 
 #include <errno.h>
 #include <string.h>
index cab28038375e7d09dbbaccafd30a89661762bfa5..e6ea34bae5d33d692b58f289d31a3ded3216cf65 100644 (file)
@@ -1,5 +1,12 @@
 
 #include <archtypes.h>
+#include <minix/config.h>
+#include <minix/const.h>
+#include <minix/type.h>
+#include <minix/com.h>
+#include <minix/ipc.h>
+#include <minix/safecopies.h>
+#include <timers.h>
 
 struct vm_arch {
        struct mem_map  vm_seg[NR_LOCAL_SEGS];  /* text, data, stack */
index 3e44243b70ebd58cbca488f86a011ce57f951a74..05af779f77187b30aa44cfdc71be0d8f12c58708 100644 (file)
@@ -15,7 +15,7 @@
 #define VM_PAGE_SIZE   I386_PAGE_SIZE
 
 /* Where do processes start in linear (i.e. page table) memory? */
-#define VM_PROCSTART   (I386_BIG_PAGE_SIZE*10)
+#define VM_PROCSTART   (I386_BIG_PAGE_SIZE*100)
 
 #define CLICKSPERPAGE (I386_PAGE_SIZE/CLICK_SIZE)
 
index 8e3c827ce1f1821e3449cea2df0ee0a5d1871aad..444729d90590bae3e93fa61972a6574c4caab80f 100644 (file)
@@ -17,6 +17,7 @@
 #include <minix/syslib.h>
 #include <minix/safecopies.h>
 #include <minix/cpufeature.h>
+#include <minix/bitmap.h>
 
 #include <errno.h>
 #include <assert.h>
 
 #include "memory.h"
 
-int global_bit_ok = 0;
-int bigpage_ok = 0;
+/* PDE used to map in kernel, kernel physical address. */
+PRIVATE int kernel_pde = -1, pagedir_pde = -1;
+PRIVATE u32_t kern_pde_val = 0, global_bit = 0, pagedir_pde_val;
 
-/* Location in our virtual address space where we can map in 
- * any physical page we want.
-*/
-static unsigned char *varmap = NULL;   /* Our address space. */
-static u32_t varmap_loc;               /* Our page table. */
+PRIVATE int proc_pde = 0;
+
+/* 4MB page size available in hardware? */
+PRIVATE int bigpage_ok = 0;
 
 /* Our process table entry. */
 struct vmproc *vmp = &vmproc[VM_PROC_NR];
@@ -52,7 +53,7 @@ struct vmproc *vmp = &vmproc[VM_PROC_NR];
  */
 #define SPAREPAGES 5
 int missing_spares = SPAREPAGES;
-static struct {
+PRIVATE struct {
        void *page;
        u32_t phys;
 } sparepages[SPAREPAGES];
@@ -78,7 +79,6 @@ static struct {
 u32_t page_directories_phys, *page_directories = NULL;
 
 #if SANITYCHECKS
-#define PT_SANE(p) { pt_sanitycheck((p), __FILE__, __LINE__); SANITYCHECK(SCL_DETAIL); }
 /*===========================================================================*
  *                             pt_sanitycheck                               *
  *===========================================================================*/
@@ -86,21 +86,37 @@ PUBLIC void pt_sanitycheck(pt_t *pt, char *file, int line)
 {
 /* Basic pt sanity check. */
        int i;
+       int slot;
 
        MYASSERT(pt);
        MYASSERT(pt->pt_dir);
        MYASSERT(pt->pt_dir_phys);
 
-       for(i = 0; i < I386_VM_DIR_ENTRIES; i++) {
+       for(slot = 0; slot < ELEMENTS(vmproc); slot++) {
+               if(pt == &vmproc[slot].vm_pt)
+                       break;
+       }
+
+       if(slot >= ELEMENTS(vmproc)) {
+               vm_panic("pt_sanitycheck: passed pt not in any proc", NO_NUM);
+       }
+
+       MYASSERT(usedpages_add(pt->pt_dir_phys, I386_PAGE_SIZE) == OK);
+
+       for(i = proc_pde; i < I386_VM_DIR_ENTRIES; i++) {
                if(pt->pt_pt[i]) {
+                       if(!(pt->pt_dir[i] & I386_VM_PRESENT)) {
+                               printf("slot %d: pt->pt_pt[%d] = 0x%lx, but pt_dir entry 0x%lx\n",
+                                       slot, i, pt->pt_pt[i], pt->pt_dir[i]);
+                       }
                        MYASSERT(pt->pt_dir[i] & I386_VM_PRESENT);
+                       MYASSERT(usedpages_add(I386_VM_PFA(pt->pt_dir[i]),
+                               I386_PAGE_SIZE) == OK);
                } else {
                        MYASSERT(!(pt->pt_dir[i] & I386_VM_PRESENT));
                }
        }
 }
-#else
-#define PT_SANE(p)
 #endif
 
 /*===========================================================================*
@@ -240,7 +256,6 @@ PRIVATE void *vm_getsparepage(u32_t *phys)
                        return sp;
                }
        }
-       vm_panic("VM: out of spare pages", NO_NUM);
        return NULL;
 }
 
@@ -255,17 +270,16 @@ PRIVATE void *vm_checkspares(void)
        for(s = 0; s < SPAREPAGES && missing_spares > 0; s++)
            if(!sparepages[s].page) {
                n++;
-               sparepages[s].page = vm_allocpages(&sparepages[s].phys, 1,
-                       VMP_SPARE);
-               missing_spares--;
-               vm_assert(missing_spares >= 0 && missing_spares <= SPAREPAGES);
+               if((sparepages[s].page = vm_allocpages(&sparepages[s].phys, 1,
+                       VMP_SPARE))) {
+                       missing_spares--;
+                       vm_assert(missing_spares >= 0);
+                       vm_assert(missing_spares <= SPAREPAGES);
+               }
        }
        if(worst < n) worst = n;
        total += n;
-#if 0
-       if(n > 0)
-               printf("VM: made %d spares, total %d, worst %d\n", n, total, worst);
-#endif
+
        return NULL;
 }
 
@@ -293,7 +307,7 @@ PUBLIC void *vm_allocpages(phys_bytes *phys, int pages, int reason)
        vm_assert(level >= 1);
        vm_assert(level <= 2);
 
-       if(level > 1 || !(vmp->vm_flags & VMF_HASPT)) {
+       if(level > 1 || !(vmp->vm_flags & VMF_HASPT) || !meminit_done) {
                int r;
                void *s;
                vm_assert(pages == 1);
@@ -336,6 +350,38 @@ PUBLIC void *vm_allocpages(phys_bytes *phys, int pages, int reason)
        return (void *) arch_map2vir(vmp, loc);
 }
 
+/*===========================================================================*
+ *                             vm_pagelock                                  *
+ *===========================================================================*/
+PUBLIC void vm_pagelock(void *vir, int lockflag)
+{
+/* Mark a page allocated by vm_allocpages() unwritable, i.e. only for VM. */
+       vir_bytes m;
+       int r;
+       u32_t flags = I386_VM_PRESENT | I386_VM_USER;
+       pt_t *pt;
+
+       pt = &vmp->vm_pt;
+       m = arch_vir2map(vmp, (vir_bytes) vir);
+
+       vm_assert(!(m % I386_PAGE_SIZE));
+
+       if(!lockflag)
+               flags |= I386_VM_WRITE;
+
+       /* Update flags. */
+       if((r=pt_writemap(pt, m, 0, I386_PAGE_SIZE,
+               flags, WMF_OVERWRITE | WMF_WRITEFLAGSONLY)) != OK) {
+               vm_panic("vm_lockpage: pt_writemap failed\n", NO_NUM);
+       }
+
+       if((r=sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) {
+               vm_panic("VMCTL_FLUSHTLB failed", r);
+       }
+
+       return;
+}
+
 /*===========================================================================*
  *                             pt_ptalloc                                   *
  *===========================================================================*/
@@ -347,14 +393,13 @@ PRIVATE int pt_ptalloc(pt_t *pt, int pde, u32_t flags)
 
        /* Argument must make sense. */
        vm_assert(pde >= 0 && pde < I386_VM_DIR_ENTRIES);
-       vm_assert(!(flags & ~(PTF_ALLFLAGS | PTF_MAPALLOC)));
+       vm_assert(!(flags & ~(PTF_ALLFLAGS)));
 
        /* We don't expect to overwrite page directory entry, nor
         * storage for the page table.
         */
        vm_assert(!(pt->pt_dir[pde] & I386_VM_PRESENT));
        vm_assert(!pt->pt_pt[pde]);
-       PT_SANE(pt);
 
        /* Get storage for the page table. */
         if(!(pt->pt_pt[pde] = vm_allocpages(&pt_phys, 1, VMP_PAGETABLE)))
@@ -370,7 +415,6 @@ PRIVATE int pt_ptalloc(pt_t *pt, int pde, u32_t flags)
        pt->pt_dir[pde] = (pt_phys & I386_VM_ADDR_MASK) | flags
                | I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE;
        vm_assert(flags & I386_VM_PRESENT);
-       PT_SANE(pt);
 
        return OK;
 }
@@ -385,10 +429,9 @@ PUBLIC int pt_writemap(pt_t *pt, vir_bytes v, phys_bytes physaddr,
 /* Page directory and table entries for this virtual address. */
        int p, pages, pde;
        int finalpde;
-       SANITYCHECK(SCL_FUNCTIONS);
 
        vm_assert(!(bytes % I386_PAGE_SIZE));
-       vm_assert(!(flags & ~(PTF_ALLFLAGS | PTF_MAPALLOC)));
+       vm_assert(!(flags & ~(PTF_ALLFLAGS)));
 
        pages = bytes / I386_PAGE_SIZE;
 
@@ -405,8 +448,6 @@ PUBLIC int pt_writemap(pt_t *pt, vir_bytes v, phys_bytes physaddr,
        }
 #endif
 
-       PT_SANE(pt);
-
        finalpde = I386_VM_PDE(v + I386_PAGE_SIZE * pages);
 
        /* First make sure all the necessary page tables are allocated,
@@ -417,6 +458,8 @@ PUBLIC int pt_writemap(pt_t *pt, vir_bytes v, phys_bytes physaddr,
        for(pde = I386_VM_PDE(v); pde <= finalpde; pde++) {
                vm_assert(pde >= 0 && pde < I386_VM_DIR_ENTRIES);
                if(pt->pt_dir[pde] & I386_VM_BIGPAGE) {
+                       printf("pt_writemap: trying to write 0x%lx into 0x%lx\n",
+                               physaddr, v);
                         vm_panic("pt_writemap: BIGPAGE found", NO_NUM);
                }
                if(!(pt->pt_dir[pde] & I386_VM_PRESENT)) {
@@ -436,13 +479,10 @@ PUBLIC int pt_writemap(pt_t *pt, vir_bytes v, phys_bytes physaddr,
                vm_assert(pt->pt_dir[pde] & I386_VM_PRESENT);
        }
 
-       PT_SANE(pt);
-
        /* Now write in them. */
        for(p = 0; p < pages; p++) {
                int pde = I386_VM_PDE(v);
                int pte = I386_VM_PTE(v);
-       PT_SANE(pt);
 
                vm_assert(!(v % I386_PAGE_SIZE));
                vm_assert(pte >= 0 && pte < I386_VM_PT_ENTRIES);
@@ -456,22 +496,25 @@ PUBLIC int pt_writemap(pt_t *pt, vir_bytes v, phys_bytes physaddr,
                 */
                vm_assert((pt->pt_dir[pde] & I386_VM_PRESENT) && pt->pt_pt[pde]);
 
-       PT_SANE(pt);
 #if SANITYCHECKS
                /* We don't expect to overwrite a page. */
                if(!(writemapflags & WMF_OVERWRITE))
                        vm_assert(!(pt->pt_pt[pde][pte] & I386_VM_PRESENT));
 #endif
+               if(writemapflags & WMF_WRITEFLAGSONLY) {
+                       physaddr = pt->pt_pt[pde][pte] & I386_VM_ADDR_MASK;
+               }
+
+               if(writemapflags & WMF_FREE) {
+                       printf("pt_writemap: should free 0x%lx\n", physaddr);
+               }
 
                /* Write pagetable entry. */
                pt->pt_pt[pde][pte] = (physaddr & I386_VM_ADDR_MASK) | flags;
 
                physaddr += I386_PAGE_SIZE;
                v += I386_PAGE_SIZE;
-       PT_SANE(pt);
        }
-       SANITYCHECK(SCL_FUNCTIONS);
-       PT_SANE(pt);
 
        return OK;
 }
@@ -488,7 +531,14 @@ PUBLIC int pt_new(pt_t *pt)
  */
        int i;
 
-        if(!(pt->pt_dir = vm_allocpages(&pt->pt_dir_phys, 1, VMP_PAGEDIR))) {
+       /* Don't ever re-allocate/re-move a certain process slot's
+        * page directory once it's been created. This is a fraction
+        * faster, but also avoids having to invalidate the page
+        * mappings from in-kernel page tables pointing to
+        * the page directories (the page_directories data).
+        */
+        if(!pt->pt_dir &&
+          !(pt->pt_dir = vm_allocpages(&pt->pt_dir_phys, 1, VMP_PAGEDIR))) {
                return ENOMEM;
        }
 
@@ -520,13 +570,14 @@ PUBLIC void pt_init(void)
  */
         pt_t *newpt;
         int s, r;
-        vir_bytes v;
+        vir_bytes v, kpagedir;
         phys_bytes lo, hi; 
         vir_bytes extra_clicks;
         u32_t moveup = 0;
-
-       global_bit_ok = _cpufeature(_CPUF_I386_PGE);
-       bigpage_ok = _cpufeature(_CPUF_I386_PSE);
+       int global_bit_ok = 0;
+       int free_pde;
+       int p;
+       vir_bytes kernlimit;
 
         /* Shorthand. */
         newpt = &vmp->vm_pt;
@@ -541,12 +592,37 @@ PUBLIC void pt_init(void)
         }
 
        missing_spares = 0;
-        
-        /* Make new page table for ourselves, partly copied
-         * from the current one.
-         */     
-        if(pt_new(newpt) != OK)
-                vm_panic("pt_init: pt_new failed", NO_NUM); 
+
+       /* global bit and 4MB pages available? */
+       global_bit_ok = _cpufeature(_CPUF_I386_PGE);
+       bigpage_ok = _cpufeature(_CPUF_I386_PSE);
+
+       /* Set bit for PTE's and PDE's if available. */
+       if(global_bit_ok)
+               global_bit = I386_VM_GLOBAL;
+
+       /* Figure out kernel pde slot. */
+       {
+               int pde1, pde2;
+               pde1 = I386_VM_PDE(KERNEL_TEXT);
+               pde2 = I386_VM_PDE(KERNEL_DATA+KERNEL_DATA_LEN);
+               if(pde1 != pde2)
+                       vm_panic("pt_init: kernel too big", NO_NUM); 
+
+               /* Map in kernel with this single pde value if 4MB pages
+                * supported.
+                */
+               kern_pde_val = (KERNEL_TEXT & I386_VM_ADDR_MASK_4MB) |
+                               I386_VM_BIGPAGE|
+                               I386_VM_USER|
+                               I386_VM_PRESENT|I386_VM_WRITE|global_bit;
+               kernel_pde = pde1;
+               vm_assert(kernel_pde >= 0);
+               free_pde = kernel_pde+1;
+       }
+
+       /* First unused pde. */
+       proc_pde = free_pde;
            
         /* Initial (current) range of our virtual address space. */
         lo = CLICK2ABS(vmp->vm_arch.vm_seg[T].mem_phys);
@@ -562,21 +638,27 @@ PUBLIC void pt_init(void)
                 vm_assert(!(lo % I386_PAGE_SIZE));
                 vm_assert(!(moveup % I386_PAGE_SIZE));
         }
+        
+        /* Make new page table for ourselves, partly copied
+         * from the current one.
+         */     
+        if(pt_new(newpt) != OK)
+                vm_panic("pt_init: pt_new failed", NO_NUM); 
+
+       /* Old position mapped in? */
+       pt_check(vmp);
                 
         /* Set up mappings for VM process. */
         for(v = lo; v < hi; v += I386_PAGE_SIZE)  {
                 phys_bytes addr;
                 u32_t flags; 
         
-                /* We have to write the old and new position in the PT,
+                /* We have to write the new position in the PT,
                  * so we can move our segments.
                  */ 
                 if(pt_writemap(newpt, v+moveup, v, I386_PAGE_SIZE,
                         I386_VM_PRESENT|I386_VM_WRITE|I386_VM_USER, 0) != OK)
                         vm_panic("pt_init: pt_writemap failed", NO_NUM);
-                if(pt_writemap(newpt, v, v, I386_PAGE_SIZE,
-                        I386_VM_PRESENT|I386_VM_WRITE|I386_VM_USER, 0) != OK)
-                        vm_panic("pt_init: pt_writemap failed", NO_NUM);
         }
        
         /* Move segments up too. */
@@ -584,21 +666,14 @@ PUBLIC void pt_init(void)
         vmp->vm_arch.vm_seg[D].mem_phys += ABS2CLICK(moveup);
         vmp->vm_arch.vm_seg[S].mem_phys += ABS2CLICK(moveup);
        
-#if 0
-        /* Map in kernel. */
-        if(pt_mapkernel(newpt) != OK)
-                vm_panic("pt_init: pt_mapkernel failed", NO_NUM);
-
        /* Allocate us a page table in which to remember page directory
         * pointers.
         */
        if(!(page_directories = vm_allocpages(&page_directories_phys,
                1, VMP_PAGETABLE)))
                 vm_panic("no virt addr for vm mappings", NO_NUM);
-#endif
-       
-        /* Give our process the new, copied, private page table. */
-        pt_bind(newpt, vmp);
+
+       memset(page_directories, 0, I386_PAGE_SIZE);
        
         /* Increase our hardware data segment to create virtual address
          * space above our stack. We want to increase it to VM_DATATOP,
@@ -614,19 +689,6 @@ PUBLIC void pt_init(void)
                 (vmp->vm_arch.vm_seg[S].mem_vir +
                 vmp->vm_arch.vm_seg[S].mem_len) << CLICK_SHIFT;
        
-        if((s=sys_newmap(VM_PROC_NR, vmp->vm_arch.vm_seg)) != OK)
-                vm_panic("VM: pt_init: sys_newmap failed", s);
-       
-        /* Back to reality - this is where the stack actually is. */
-        vmp->vm_arch.vm_seg[S].mem_len -= extra_clicks;
-       
-        /* Wipe old mappings from VM. */
-        for(v = lo; v < hi; v += I386_PAGE_SIZE)  {
-                if(pt_writemap(newpt, v, MAP_NONE, I386_PAGE_SIZE,
-                        0, WMF_OVERWRITE) != OK)
-                        vm_panic("pt_init: pt_writemap failed", NO_NUM);
-        }
-       
         /* Where our free virtual address space starts.
          * This is only a hint to the VM system.
          */
@@ -635,17 +697,49 @@ PUBLIC void pt_init(void)
         /* Let other functions know VM now has a private page table. */
         vmp->vm_flags |= VMF_HASPT;
 
-        /* Reserve a page in our virtual address space that we
-         * can use to map in arbitrary physical pages.
-         */
-        varmap_loc = findhole(newpt, I386_PAGE_SIZE,
-                arch_vir2map(vmp, vmp->vm_stacktop),
-                vmp->vm_arch.vm_data_top);
-        if(varmap_loc == NO_MEM) {
-                vm_panic("no virt addr for vm mappings", NO_NUM);
-        }
-        varmap = (unsigned char *) arch_map2vir(vmp, varmap_loc);
+       /* Find a PDE below processes available for mapping in the
+        * page directories (readonly).
+        */
+       pagedir_pde = free_pde++;
+       pagedir_pde_val = (page_directories_phys & I386_VM_ADDR_MASK) |
+                       I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE;
+
+       /* Tell kernel about free pde's. */
+       while(free_pde*I386_BIG_PAGE_SIZE < VM_PROCSTART) {
+               if((r=sys_vmctl(SELF, VMCTL_I386_FREEPDE, free_pde++)) != OK) {
+                       vm_panic("VMCTL_I386_FREEPDE failed", r);
+               }
+       }
+
+       /* first pde in use by process. */
+       proc_pde = free_pde;
+
+       kernlimit = free_pde*I386_BIG_PAGE_SIZE;
+
+       /* Increase kernel segment to address this memory. */
+       if((r=sys_vmctl(SELF, VMCTL_I386_KERNELLIMIT, kernlimit)) != OK) {
+                vm_panic("VMCTL_I386_KERNELLIMIT failed", r);
+       }
 
+       kpagedir = arch_map2vir(&vmproc[VMP_SYSTEM],
+               pagedir_pde*I386_BIG_PAGE_SIZE);
+
+       /* Tell kernel how to get at the page directories. */
+       if((r=sys_vmctl(SELF, VMCTL_I386_PAGEDIRS, kpagedir)) != OK) {
+                vm_panic("VMCTL_I386_KERNELLIMIT failed", r);
+       }
+       
+        /* Give our process the new, copied, private page table. */
+       pt_mapkernel(newpt);    /* didn't know about vm_dir pages earlier */
+        pt_bind(newpt, vmp);
+       
+       /* Now actually enable paging. */
+       if(sys_vmctl_enable_paging(vmp->vm_arch.vm_seg) != OK)
+               vm_panic("pt_init: enable paging failed", NO_NUM);
+
+        /* Back to reality - this is where the stack actually is. */
+        vmp->vm_arch.vm_seg[S].mem_len -= extra_clicks;
+       
         /* All OK. */
         return;
 }
@@ -656,24 +750,28 @@ PUBLIC void pt_init(void)
  *===========================================================================*/
 PUBLIC int pt_bind(pt_t *pt, struct vmproc *who)
 {
-       int slot;
+       int slot, ispt;
+       u32_t phys;
 
        /* Basic sanity checks. */
        vm_assert(who);
        vm_assert(who->vm_flags & VMF_INUSE);
-       if(pt) PT_SANE(pt);
        vm_assert(pt);
 
-#if 0
        slot = who->vm_slot;
        vm_assert(slot >= 0);
        vm_assert(slot < ELEMENTS(vmproc));
-       vm_assert(!(pt->pt_dir_phys & ~I386_VM_ADDR_MASK));
+       vm_assert(slot < I386_VM_PT_ENTRIES);
 
-       page_directories[slot] = (pt->pt_dir_phys & I386_VM_ADDR_MASK) | 
-               (I386_VM_PRESENT|I386_VM_WRITE);
-#endif
+       phys = pt->pt_dir_phys & I386_VM_ADDR_MASK;
+       vm_assert(pt->pt_dir_phys == phys);
+
+       /* Update "page directory pagetable." */
+       page_directories[slot] = phys | I386_VM_PRESENT|I386_VM_WRITE;
 
+#if 0
+       printf("VM: slot %d has pde val 0x%lx\n", slot, page_directories[slot]);
+#endif
        /* Tell kernel about new page table root. */
        return sys_vmctl(who->vm_endpoint, VMCTL_I386_SETCR3,
                pt ? pt->pt_dir_phys : 0);
@@ -687,24 +785,10 @@ PUBLIC void pt_free(pt_t *pt)
 /* Free memory associated with this pagetable. */
        int i;
 
-       PT_SANE(pt);
-
-       for(i = 0; i < I386_VM_DIR_ENTRIES; i++) {
-               int p;
-               if(pt->pt_pt[i]) {
-                  for(p = 0; p < I386_VM_PT_ENTRIES; p++) {
-                       if((pt->pt_pt[i][p] & (PTF_MAPALLOC | I386_VM_PRESENT)) 
-                        == (PTF_MAPALLOC | I386_VM_PRESENT)) {
-                                       u32_t pa = I386_VM_PFA(pt->pt_pt[i][p]);
-                                       FREE_MEM(ABS2CLICK(pa), CLICKSPERPAGE);
-                               }
-                 }
-                 vm_freepages((vir_bytes) pt->pt_pt[i],
-                       I386_VM_PFA(pt->pt_dir[i]), 1, VMP_PAGETABLE);
-               }
-       }
-
-       vm_freepages((vir_bytes) pt->pt_dir, pt->pt_dir_phys, 1, VMP_PAGEDIR);
+       for(i = 0; i < I386_VM_DIR_ENTRIES; i++)
+               if(pt->pt_pt[i])
+                       vm_freepages((vir_bytes) pt->pt_pt[i],
+                               I386_VM_PFA(pt->pt_dir[i]), 1, VMP_PAGETABLE);
 
        return;
 }
@@ -715,77 +799,51 @@ PUBLIC void pt_free(pt_t *pt)
 PUBLIC int pt_mapkernel(pt_t *pt)
 {
        int r;
-       static int pde = -1, do_bigpage = 0;
-       u32_t global = 0;
-       static u32_t kern_phys;
        static int printed = 0;
 
-       if(global_bit_ok) global = I386_VM_GLOBAL;
-
         /* Any i386 page table needs to map in the kernel address space. */
         vm_assert(vmproc[VMP_SYSTEM].vm_flags & VMF_INUSE);
 
-       if(pde == -1 && bigpage_ok) {
-               int pde1, pde2;
-               pde1 = I386_VM_PDE(KERNEL_TEXT);
-               pde2 = I386_VM_PDE(KERNEL_DATA+KERNEL_DATA_LEN);
-               if(pde1 != pde2) {
-                       printf("VM: pt_mapkernel: kernel too big?");
-                       bigpage_ok = 0;
-               } else {
-                       kern_phys = KERNEL_TEXT & I386_VM_ADDR_MASK_4MB;
-                       pde = pde1;
-                       do_bigpage = 1;
-                       vm_assert(pde >= 0);
-               }
-       }
-
-       if(do_bigpage) {
-               pt->pt_dir[pde] = kern_phys |
-                       I386_VM_BIGPAGE|I386_VM_PRESENT|I386_VM_WRITE|global;
+       if(bigpage_ok) {
+               if(kernel_pde >= 0) {
+                       pt->pt_dir[kernel_pde] = kern_pde_val;
+               } else
+                       vm_panic("VM: pt_mapkernel: no kernel pde", NO_NUM);
        } else {
+               vm_panic("VM: pt_mapkernel: no bigpage", NO_NUM);
+
                /* Map in text. flags: don't write, supervisor only */
                if((r=pt_writemap(pt, KERNEL_TEXT, KERNEL_TEXT, KERNEL_TEXT_LEN,
-                       I386_VM_PRESENT|global, 0)) != OK)
+                       I386_VM_PRESENT|global_bit, 0)) != OK)
                        return r;
  
                /* Map in data. flags: read-write, supervisor only */
                if((r=pt_writemap(pt, KERNEL_DATA, KERNEL_DATA, KERNEL_DATA_LEN,
-                       I386_VM_PRESENT|I386_VM_WRITE|global, 0)) != OK)
+                       I386_VM_PRESENT|I386_VM_WRITE, 0)) != OK)
                        return r;
        }
 
+       if(pagedir_pde >= 0) {
+               /* Kernel also wants to know about all page directories. */
+               pt->pt_dir[pagedir_pde] = pagedir_pde_val;
+       }
+
        return OK;
 }
 
 /*===========================================================================*
- *                             pt_freerange                                 *
+ *                              pt_check                                *
  *===========================================================================*/
-PUBLIC void pt_freerange(pt_t *pt, vir_bytes low, vir_bytes high)
+PUBLIC void pt_check(struct vmproc *vmp)
 {
-/* Free memory allocated by pagetable functions in this range. */
-       int pde;
-       u32_t v;
-
-       PT_SANE(pt);
-
-       for(v = low; v < high; v += I386_PAGE_SIZE) {
-               int pte;
-               pde = I386_VM_PDE(v);
-               pte = I386_VM_PTE(v);
-               if(!(pt->pt_dir[pde] & I386_VM_PRESENT))
-                       continue;
-               if((pt->pt_pt[pde][pte] & (PTF_MAPALLOC | I386_VM_PRESENT)) 
-                == (PTF_MAPALLOC | I386_VM_PRESENT)) {
-                       u32_t pa = I386_VM_PFA(pt->pt_pt[pde][pte]);
-                       FREE_MEM(ABS2CLICK(pa), CLICKSPERPAGE);
-                       pt->pt_pt[pde][pte] = 0;
-               }
+       phys_bytes hi;
+        hi = CLICK2ABS(vmp->vm_arch.vm_seg[S].mem_phys +
+                vmp->vm_arch.vm_seg[S].mem_len);
+       if(hi > (kernel_pde+1) * I386_BIG_PAGE_SIZE) {
+               printf("VM: %d doesn't fit in kernel range (0x%lx)\n",
+                       vmp->vm_endpoint, hi);
+               vm_panic("boot time processes too big", NO_NUM);
        }
-
-       PT_SANE(pt);
-
-       return;
 }
 
 /*===========================================================================*
@@ -796,82 +854,3 @@ PUBLIC void pt_cycle(void)
        vm_checkspares();
 }
 
-/* In sanity check mode, pages are mapped and unmapped explicitly, so
- * unexpected double mappings (overwriting a page table entry) are caught.
- * If not sanity checking, simply keep the page mapped in and overwrite
- * the mapping entry; we need WMF_OVERWRITE for that in PHYS_MAP though.
- */
-#if SANITYCHECKS
-#define MAPFLAGS       0
-#else
-#define MAPFLAGS       WMF_OVERWRITE
-#endif
-
-static u32_t ismapped = MAP_NONE;
-
-#define PHYS_MAP(a, o)                                                 \
-{      int r;                                                          \
-       u32_t wantmapped;                                               \
-       vm_assert(varmap);                                              \
-       (o) = (a) % I386_PAGE_SIZE;                                     \
-       wantmapped = (a) - (o);                                         \
-       if(wantmapped != ismapped || ismapped == MAP_NONE) {            \
-               r = pt_writemap(&vmp->vm_pt, (vir_bytes) varmap_loc,    \
-                       wantmapped, I386_PAGE_SIZE,                     \
-                       I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE, \
-                               MAPFLAGS);                              \
-               if(r != OK)                                             \
-                       vm_panic("PHYS_MAP: pt_writemap", NO_NUM);      \
-               ismapped = wantmapped;                                  \
-               /* pt_bind() flushes TLB. */                            \
-               pt_bind(&vmp->vm_pt, vmp);                              \
-       }                                                               \
-}
-
-#define PHYSMAGIC 0x7b9a0590
-
-#if SANITYCHECKS
-#define PHYS_UNMAP if(OK != pt_writemap(&vmp->vm_pt, varmap_loc, MAP_NONE,\
-       I386_PAGE_SIZE, 0, WMF_OVERWRITE)) {                            \
-               vm_panic("PHYS_UNMAP: pt_writemap failed", NO_NUM); }
-       ismapped = MAP_NONE;
-#endif
-
-#define PHYS_VAL(o) (* (phys_bytes *) (varmap + (o)))
-
-
-/*===========================================================================*
- *                              phys_writeaddr                               *
- *===========================================================================*/
-PUBLIC void phys_writeaddr(phys_bytes addr, phys_bytes v1, phys_bytes v2)
-{
-       phys_bytes offset;
-
-       SANITYCHECK(SCL_DETAIL);
-       PHYS_MAP(addr, offset);
-       PHYS_VAL(offset) = v1;
-       PHYS_VAL(offset + sizeof(phys_bytes)) = v2;
-#if SANITYCHECKS
-       PHYS_VAL(offset + 2*sizeof(phys_bytes)) = PHYSMAGIC;
-       PHYS_UNMAP;
-#endif
-       SANITYCHECK(SCL_DETAIL);
-}
-
-/*===========================================================================*
- *                              phys_readaddr                                *
- *===========================================================================*/
-PUBLIC void phys_readaddr(phys_bytes addr, phys_bytes *v1, phys_bytes *v2)
-{
-       phys_bytes offset;
-
-       SANITYCHECK(SCL_DETAIL);
-       PHYS_MAP(addr, offset);
-       *v1 = PHYS_VAL(offset);
-       *v2 = PHYS_VAL(offset + sizeof(phys_bytes));
-#if SANITYCHECKS
-       vm_assert(PHYS_VAL(offset + 2*sizeof(phys_bytes)) == PHYSMAGIC);
-       PHYS_UNMAP;
-#endif
-       SANITYCHECK(SCL_DETAIL);
-}
index b26ac9cd360f7fc4045eafbf9210ac5986cf699d..42fb7d7e62efa0178e231415dbc7b6e82fdd422d 100644 (file)
@@ -5,6 +5,8 @@
 #include <stdint.h>
 #include <sys/vm_i386.h>
 
+#include "../vm.h"
+
 /* An i386 pagetable. */
 typedef struct {
        /* Directory entries in VM addr space - root of page table.  */
@@ -34,5 +36,12 @@ typedef struct {
  */
 #define PTF_ALLFLAGS   (PTF_WRITE|PTF_PRESENT|PTF_USER|PTF_GLOBAL)
 
+#if SANITYCHECKS
+#define PT_SANE(p) { pt_sanitycheck((p), __FILE__, __LINE__); }
+#else
+#define PT_SANE(p)
+#endif
+
 #endif
 
+
index 1f55874cf4b3a6e2345ba189de20efed135f0be5..aa8286c2871c7992ba40ec17c6a0c692c183dcf8 100644 (file)
@@ -13,6 +13,7 @@
 #include <minix/ipc.h>
 #include <minix/sysutil.h>
 #include <minix/syslib.h>
+#include <minix/bitmap.h>
 
 #include <sys/mman.h>
 
 
 #include "memory.h"
 
-#define PAGE_SIZE      4096
-#define PAGE_DIR_SIZE  (1024*PAGE_SIZE)        
-#define PAGE_TABLE_COVER (1024*PAGE_SIZE)
-/*=========================================================================*
- *                             arch_init_vm                               *
- *=========================================================================*/
-PUBLIC void arch_init_vm(mem_chunks)
-struct memory mem_chunks[NR_MEMS];
-{
-       phys_bytes high, bytes;
-       phys_clicks clicks, base_click;
-       unsigned pages;
-       int i, r;
-
-       /* Compute the highest memory location */
-       high= 0;
-       for (i= 0; i<NR_MEMS; i++)
-       {
-               if (mem_chunks[i].size == 0)
-                       continue;
-               if (mem_chunks[i].base + mem_chunks[i].size > high)
-                       high= mem_chunks[i].base + mem_chunks[i].size;
-       }
-
-       high <<= CLICK_SHIFT;
-#if VERBOSE_VM
-       printf("do_x86_vm: found high 0x%x\n", high);
-#endif
-       
-       /* Rounding up */
-       high= (high-1+PAGE_DIR_SIZE) & ~(PAGE_DIR_SIZE-1);
-
-       /* The number of pages we need is one for the page directory, enough
-        * page tables to cover the memory, and one page for alignement.
-        */
-       pages= 1 + (high + PAGE_TABLE_COVER-1)/PAGE_TABLE_COVER + 1;
-       bytes= pages*PAGE_SIZE;
-       clicks= (bytes + CLICK_SIZE-1) >> CLICK_SHIFT;
-
-#if VERBOSE_VM
-       printf("do_x86_vm: need %d pages\n", pages);
-       printf("do_x86_vm: need %d bytes\n", bytes);
-       printf("do_x86_vm: need %d clicks\n", clicks);
-#endif
-
-       for (i= 0; i<NR_MEMS; i++)
-       {
-               if (mem_chunks[i].size <= clicks)
-                       continue;
-               break;
-       }
-       if (i >= NR_MEMS)
-               panic("VM", "not enough memory for VM page tables?", NO_NUM);
-       base_click= mem_chunks[i].base;
-       mem_chunks[i].base += clicks;
-       mem_chunks[i].size -= clicks;
-
-#if VERBOSE_VM
-       printf("do_x86_vm: using 0x%x clicks @ 0x%x\n", clicks, base_click);
-#endif
-       r= sys_vm_setbuf(base_click << CLICK_SHIFT, clicks << CLICK_SHIFT,
-               high);
-       if (r != 0)
-               printf("do_x86_vm: sys_vm_setbuf failed: %d\n", r);
-
-}
-
 /*===========================================================================*
  *                             arch_map2vir                                 *
  *===========================================================================*/
 PUBLIC vir_bytes arch_map2vir(struct vmproc *vmp, vir_bytes addr)
 {
-       vir_bytes bottom = CLICK2ABS(vmp->vm_arch.vm_seg[D].mem_phys);
+       vir_bytes textstart = CLICK2ABS(vmp->vm_arch.vm_seg[T].mem_phys);
+       vir_bytes datastart = CLICK2ABS(vmp->vm_arch.vm_seg[D].mem_phys);
 
-       vm_assert(bottom <= addr);
+       /* Could be a text address. */
+       vm_assert(datastart <= addr || textstart <= addr);
 
-       return addr - bottom;
+       return addr - datastart;
 }
 
 /*===========================================================================*
@@ -113,3 +49,13 @@ PUBLIC vir_bytes arch_vir2map(struct vmproc *vmp, vir_bytes addr)
 
        return addr + bottom;
 }
+
+/*===========================================================================*
+ *                             arch_vir2map_text                            *
+ *===========================================================================*/
+PUBLIC vir_bytes arch_vir2map_text(struct vmproc *vmp, vir_bytes addr)
+{
+       vir_bytes bottom = CLICK2ABS(vmp->vm_arch.vm_seg[T].mem_phys);
+
+       return addr + bottom;
+}
index 8b3b18d3ae6bc1ce13e25aa0cd43c9c9c5c7a2e7..910bc6cde13551078ba39aee88652943d035235c 100644 (file)
@@ -16,6 +16,8 @@
 #include <minix/sysutil.h>
 #include <minix/syslib.h>
 #include <minix/const.h>
+#include <minix/bitmap.h>
+#include <minix/crtso.h>
 
 #include <errno.h>
 #include <string.h>
@@ -45,6 +47,7 @@ typedef u32_t mask_t;
 #define MAXEPM (ANYEPM-1)
 #define EPM(e) ((1L) << ((e)-MINEPM))
 #define EPMOK(mask, ep) (((mask) & EPM(ANYEPM)) || ((ep) >= MINEPM && (ep) <= MAXEPM && (EPM(ep) & (mask))))
+#define EPMANYOK(mask, ep) ((mask) & EPM(ANYEPM))
 
 /* Table of calls and a macro to test for being in range. */
 struct {
@@ -76,10 +79,9 @@ PUBLIC int main(void)
   int result, who_e;
 
 #if SANITYCHECKS
-  nocheck = 0;
-  memcpy(data1, CHECKADDR, sizeof(data1));    
+  incheck = nocheck = 0;
+  FIXME("VM SANITYCHECKS are on");
 #endif
-       SANITYCHECK(SCL_TOP);
 
   vm_paged = 1;
   env_parse("vm_paged", "d", 0, &vm_paged, 0, 1);
@@ -87,10 +89,7 @@ PUBLIC int main(void)
   env_parse("vm_sanitychecklevel", "d", 0, &vm_sanitychecklevel, 0, SCL_MAX);
 #endif
 
-       SANITYCHECK(SCL_TOP);
-
   vm_init();
-       SANITYCHECK(SCL_TOP);
 
   /* This is VM's main loop. */
   while (TRUE) {
@@ -100,9 +99,6 @@ PUBLIC int main(void)
        if(missing_spares > 0) {
                pt_cycle();     /* pagetable code wants to be called */
        }
-#if SANITYCHECKS
-       slabstats();
-#endif
        SANITYCHECK(SCL_DETAIL);
 
        if ((r=receive(ANY, &msg)) != OK)
@@ -114,21 +110,18 @@ PUBLIC int main(void)
                switch(msg.m_source) {
                        case SYSTEM:
                                /* Kernel wants to have memory ranges
-                                * verified.
+                                * verified, and/or pagefaults handled.
                                 */
                                do_memory();
                                break;
+                       case HARDWARE:
+                               do_pagefaults();
+                               break;
                        case PM_PROC_NR:
                                /* PM sends a notify() on shutdown, which
                                 * is OK and we ignore.
                                 */
                                break;
-                       case HARDWARE:
-                               /* This indicates a page fault has happened,
-                                * which we have to handle.
-                                */
-                               do_pagefaults();
-                               break;
                        default:
                                /* No-one else should send us notifies. */
                                printf("VM: ignoring notify() from %d\n",
@@ -147,6 +140,26 @@ PUBLIC int main(void)
                printf("VM: restricted call %s from %d instead of 0x%lx\n",
                        vm_calls[c].vmc_name, msg.m_source,
                        vm_calls[c].vmc_callers);
+       } else if (EPMANYOK(vm_calls[c].vmc_callers, who_e) &&
+                  c != VM_MMAP-VM_RQ_BASE &&
+                  c != VM_MUNMAP_TEXT-VM_RQ_BASE &&
+                  c != VM_MUNMAP-VM_RQ_BASE) {
+               /* check VM acl, we care ANYEPM only,
+                * and omit other hard-coded permission checks.
+                */
+               int n;
+
+               if ((r = vm_isokendpt(who_e, &n)) != OK)
+                       vm_panic("VM: from strange source.", who_e);
+
+               if (!GET_BIT(vmproc[n].vm_call_priv_mask, c))
+                       printf("VM: restricted call %s from %d\n",
+                              vm_calls[c].vmc_name, who_e);
+               else {
+       SANITYCHECK(SCL_FUNCTIONS);
+                       result = vm_calls[c].vmc_func(&msg);
+       SANITYCHECK(SCL_FUNCTIONS);
+               }
        } else {
        SANITYCHECK(SCL_FUNCTIONS);
                result = vm_calls[c].vmc_func(&msg);
@@ -171,12 +184,15 @@ PUBLIC int main(void)
   return(OK);
 }
 
+extern int unmap_ok;
+
 /*===========================================================================*
  *                             vm_init                                      *
  *===========================================================================*/
 PRIVATE void vm_init(void)
 {
        int s, i;
+       int click, clicksforgotten = 0;
        struct memory mem_chunks[NR_MEMS];
        struct boot_image image[NR_BOOT_PROCS];
        struct boot_image *ip;
@@ -241,37 +257,12 @@ PRIVATE void vm_init(void)
                        vmp->vm_flags |= VMF_SEPARATE;
        }
 
-
-       /* Let architecture-dependent VM initialization use some memory. */
-       arch_init_vm(mem_chunks);
-
        /* Architecture-dependent initialization. */
        pt_init();
 
        /* Initialize tables to all physical memory. */
        mem_init(mem_chunks);
-
-       /* Bits of code need to know where a process can
-        * start in a pagetable.
-        */
-        kernel_top_bytes = find_kernel_top();
-
-       /* Can first kernel pages of code and data be (left) mapped out?
-        * If so, change the SYSTEM process' memory map to reflect this
-        * (future mappings of SYSTEM into other processes will not include
-        * first pages), and free the first pages.
-        */
-       if(vm_paged && sys_vmctl(SELF, VMCTL_NOPAGEZERO, 0) == OK) {
-               struct vmproc *vmp;
-               vmp = &vmproc[VMP_SYSTEM];
-               if(vmp->vm_arch.vm_seg[T].mem_len > 0) {
-#define DIFF CLICKSPERPAGE
-                       vmp->vm_arch.vm_seg[T].mem_phys += DIFF;
-                       vmp->vm_arch.vm_seg[T].mem_len -= DIFF;
-               }
-               vmp->vm_arch.vm_seg[D].mem_phys += DIFF;
-               vmp->vm_arch.vm_seg[D].mem_len -= DIFF;
-       }
+       meminit_done = 1;
 
        /* Give these processes their own page table. */
        for (ip = &image[0]; ip < &image[NR_BOOT_PROCS]; ip++) {
@@ -283,14 +274,22 @@ PRIVATE void vm_init(void)
 
                GETVMP(vmp, ip->proc_nr);
 
+               if(!(ip->flags & PROC_FULLVM)) {
+                       /* See if this process fits in kernel
+                        * mapping. VM has its own pagetable,
+                        * don't check it.
+                        */
+                       if(!(vmp->vm_flags & VMF_HASPT)) {
+                               pt_check(vmp);
+                       }
+                       continue;
+               }
+
                old_stack = 
                        vmp->vm_arch.vm_seg[S].mem_vir +
                        vmp->vm_arch.vm_seg[S].mem_len - 
                        vmp->vm_arch.vm_seg[D].mem_len;
 
-               if(!(ip->flags & PROC_FULLVM))
-                       continue;
-
                if(pt_new(&vmp->vm_pt) != OK)
                        vm_panic("vm_init: no new pagetable", NO_NUM);
 #define BASICSTACK VM_PAGE_SIZE
@@ -305,7 +304,7 @@ PRIVATE void vm_init(void)
                        vmp->vm_arch.vm_seg[D].mem_len,
                        old_stack);
 
-               proc_new(vmp,
+               if(proc_new(vmp,
                        VM_PROCSTART,
                        CLICK2ABS(vmp->vm_arch.vm_seg[T].mem_len),
                        CLICK2ABS(vmp->vm_arch.vm_seg[D].mem_len),
@@ -315,7 +314,9 @@ PRIVATE void vm_init(void)
                                vmp->vm_arch.vm_seg[D].mem_len) - BASICSTACK,
                        CLICK2ABS(vmp->vm_arch.vm_seg[T].mem_phys),
                        CLICK2ABS(vmp->vm_arch.vm_seg[D].mem_phys),
-                               VM_STACKTOP);
+                               VM_STACKTOP) != OK) {
+                       vm_panic("failed proc_new for boot process", NO_NUM);
+               }
        }
 
        /* Set up table of calls. */
@@ -347,6 +348,7 @@ PRIVATE void vm_init(void)
        CALLMAP(VM_DELDMA, do_deldma, PM_PROC_NR);
        CALLMAP(VM_GETDMA, do_getdma, PM_PROC_NR);
        CALLMAP(VM_ALLOCMEM, do_allocmem, PM_PROC_NR);
+       CALLMAP(VM_NOTIFY_SIG, do_notify_sig, PM_PROC_NR);
 
        /* Physical mapping requests.
         * tty (for /dev/video) does this.
@@ -359,22 +361,34 @@ PRIVATE void vm_init(void)
 
        /* Requests from userland (source unrestricted). */
        CALLMAP(VM_MMAP, do_mmap, ANYEPM);
+       CALLMAP(VM_MUNMAP, do_munmap, ANYEPM);
+       CALLMAP(VM_MUNMAP_TEXT, do_munmap, ANYEPM);
+       CALLMAP(VM_REMAP, do_remap, ANYEPM);
+       CALLMAP(VM_GETPHYS, do_get_phys, ANYEPM);
+       CALLMAP(VM_SHM_UNMAP, do_shared_unmap, ANYEPM);
+       CALLMAP(VM_GETREF, do_get_refcount, ANYEPM);
+       CALLMAP(VM_CTL, do_ctl, ANYEPM);
+
+       /* Request only from IPC server */
+       CALLMAP(VM_QUERY_EXIT, do_query_exit, ANYEPM);
 
        /* Requests (actually replies) from VFS (restricted to VFS only). */
        CALLMAP(VM_VFS_REPLY_OPEN, do_vfs_reply, VFS_PROC_NR);
        CALLMAP(VM_VFS_REPLY_MMAP, do_vfs_reply, VFS_PROC_NR);
        CALLMAP(VM_VFS_REPLY_CLOSE, do_vfs_reply, VFS_PROC_NR);
 
+       /* Requests from RS */
+       CALLMAP(VM_RS_SET_PRIV, do_rs_set_priv, RS_PROC_NR);
+
        /* Sanity checks */
        if(find_kernel_top() >= VM_PROCSTART)
                vm_panic("kernel loaded too high", NO_NUM);
-}
 
-#if 0
-void kputc(int c)
-{
-       if(c == '\n')
-               ser_putc('\r');
-       ser_putc(c);
+       /* Initialize the structures for queryexit */
+       init_query_exit();
+
+       /* Unmap our own low pages. */
+       unmap_ok = 1;
+       _minix_unmapzero();
 }
-#endif
+
index 1bbbf699e61ef2075ebc3cd28c6d85af78528ed7..9def7b482b6a0f99f53aeae5a0b281431f282e17 100644 (file)
@@ -16,6 +16,7 @@
 #include <minix/sysutil.h>
 #include <minix/syslib.h>
 #include <minix/safecopies.h>
+#include <minix/bitmap.h>
 
 #include <sys/mman.h>
 
@@ -47,10 +48,6 @@ PUBLIC int do_mmap(message *m)
 
        vmp = &vmproc[n];
 
-       if(m->VMM_FLAGS & MAP_LOWER16M)
-               printf("VM: warning for %d: MAP_LOWER16M not implemented\n",
-                       m->m_source);
-
        if(!(vmp->vm_flags & VMF_HASPT))
                return ENXIO;
 
@@ -66,14 +63,17 @@ PUBLIC int do_mmap(message *m)
 
                if(m->VMM_FLAGS & MAP_CONTIG) mfflags |= MF_CONTIG;
                if(m->VMM_FLAGS & MAP_PREALLOC) mfflags |= MF_PREALLOC;
+               if(m->VMM_FLAGS & MAP_LOWER16M) vrflags |= VR_LOWER16MB;
+               if(m->VMM_FLAGS & MAP_LOWER1M)  vrflags |= VR_LOWER1MB;
                if(m->VMM_FLAGS & MAP_ALIGN64K) vrflags |= VR_PHYS64K;
+               if(m->VMM_FLAGS & MAP_SHARED) vrflags |= VR_SHARED;
 
                if(len % VM_PAGE_SIZE)
                        len += VM_PAGE_SIZE - (len % VM_PAGE_SIZE);
 
                if(!(vr = map_page_region(vmp,
-                       arch_vir2map(vmp, vmp->vm_stacktop), VM_DATATOP, len, MAP_NONE,
-                       vrflags, mfflags))) {
+                       arch_vir2map(vmp, vmp->vm_stacktop),
+                       VM_DATATOP, len, MAP_NONE, vrflags, mfflags))) {
                        return ENOMEM;
                }
        } else {
@@ -84,6 +84,7 @@ PUBLIC int do_mmap(message *m)
        vm_assert(vr);
        m->VMM_RETADDR = arch_map2vir(vmp, vr->vaddr);
 
+
        return OK;
 }
 
@@ -153,9 +154,244 @@ PUBLIC int do_unmap_phys(message *m)
                return EINVAL;
        }
 
-       if(map_unmap_region(vmp, region) != OK) {
+       if(map_unmap_region(vmp, region, region->length) != OK) {
+               return EINVAL;
+       }
+
+       return OK;
+}
+
+/*===========================================================================*
+ *                             do_remap                                     *
+ *===========================================================================*/
+PUBLIC int do_remap(message *m)
+{
+       int d, dn, s, sn;
+       vir_bytes da, sa, startv;
+       size_t size;
+       struct vir_region *vr, *region;
+       struct vmproc *dvmp, *svmp;
+       int r;
+
+       d = m->VMRE_D;
+       s = m->VMRE_S;
+       da = (vir_bytes) m->VMRE_DA;
+       sa = (vir_bytes) m->VMRE_SA;
+       size = m->VMRE_SIZE;
+
+       if ((r = vm_isokendpt(d, &dn)) != OK)
+               return EINVAL;
+       if ((r = vm_isokendpt(s, &sn)) != OK)
+               return EINVAL;
+
+       dvmp = &vmproc[dn];
+       svmp = &vmproc[sn];
+
+       /* da is not translated by arch_vir2map(),
+        * it's handled a little differently,
+        * since in map_remap(), we have to know
+        * about whether the user needs to bind to
+        * THAT address or be chosen by the system.
+        */
+       sa = arch_vir2map(svmp, sa);
+
+       if (!(region = map_lookup(svmp, sa)))
+               return EINVAL;
+
+       if ((r = map_remap(dvmp, da, size, region, &startv)) != OK)
+               return r;
+
+       m->VMRE_RETA = (char *) arch_map2vir(dvmp, startv);
+       return OK;
+}
+
+/*===========================================================================*
+ *                             do_shared_unmap                              *
+ *===========================================================================*/
+PUBLIC int do_shared_unmap(message *m)
+{
+       int r, n;
+       struct vmproc *vmp;
+       endpoint_t target;
+       struct vir_region *vr;
+       vir_bytes addr;
+
+       target = m->VMUN_ENDPT;
+
+       if ((r = vm_isokendpt(target, &n)) != OK)
+               return EINVAL;
+
+       vmp = &vmproc[n];
+
+       addr = arch_vir2map(vmp, m->VMUN_ADDR);
+
+       if(!(vr = map_lookup(vmp, addr))) {
+               printf("VM: addr 0x%lx not found.\n", m->VMUN_ADDR);
+               return EFAULT;
+       }
+
+       if(vr->vaddr != addr) {
+               printf("VM: wrong address for shared_unmap.\n");
+               return EFAULT;
+       }
+
+       if(!(vr->flags & VR_SHARED)) {
+               printf("VM: address does not point to shared region.\n");
+               return EFAULT;
+       }
+
+       if(map_unmap_region(vmp, vr, vr->length) != OK)
+               vm_panic("do_shared_unmap: map_unmap_region failed", NO_NUM);
+
+       return OK;
+}
+
+/*===========================================================================*
+ *                             do_get_phys                                  *
+ *===========================================================================*/
+PUBLIC int do_get_phys(message *m)
+{
+       int r, n;
+       struct vmproc *vmp;
+       endpoint_t target;
+       phys_bytes ret;
+       vir_bytes addr;
+
+       target = m->VMPHYS_ENDPT;
+       addr = m->VMPHYS_ADDR;
+
+       if ((r = vm_isokendpt(target, &n)) != OK)
+               return EINVAL;
+
+       vmp = &vmproc[n];
+       addr = arch_vir2map(vmp, addr);
+
+       r = map_get_phys(vmp, addr, &ret);
+
+       m->VMPHYS_RETA = ret;
+       return r;
+}
+
+/*===========================================================================*
+ *                             do_get_refcount                              *
+ *===========================================================================*/
+PUBLIC int do_get_refcount(message *m)
+{
+       int r, n;
+       struct vmproc *vmp;
+       endpoint_t target;
+       u8_t cnt;
+       vir_bytes addr;
+
+       target = m->VMREFCNT_ENDPT;
+       addr = m->VMREFCNT_ADDR;
+
+       if ((r = vm_isokendpt(target, &n)) != OK)
                return EINVAL;
+
+       vmp = &vmproc[n];
+       addr = arch_vir2map(vmp, addr);
+
+       r = map_get_ref(vmp, addr, &cnt);
+
+       m->VMREFCNT_RETC = cnt;
+       return r;
+}
+
+/*===========================================================================*
+ *                              do_munmap                                    *
+ *===========================================================================*/
+PUBLIC int do_munmap(message *m)
+{
+        int r, n;
+        struct vmproc *vmp;
+        vir_bytes addr, len;
+       struct vir_region *vr;
+        
+        if((r=vm_isokendpt(m->m_source, &n)) != OK) {
+                vm_panic("do_mmap: message from strange source", m->m_source);
+        }
+        vmp = &vmproc[n];
+
+       if(!(vmp->vm_flags & VMF_HASPT))
+               return ENXIO;
+        
+       if(m->m_type == VM_MUNMAP) {
+               addr = (vir_bytes) arch_vir2map(vmp, (vir_bytes) m->VMUM_ADDR);
+       } else if(m->m_type == VM_MUNMAP_TEXT) {
+               addr = (vir_bytes) arch_vir2map_text(vmp, (vir_bytes) m->VMUM_ADDR);
+       } else {
+               vm_panic("do_munmap: strange type", NO_NUM);
+       }
+
+        if(!(vr = map_lookup(vmp, addr))) {
+                printf("VM: unmap: virtual address 0x%lx not found in %d\n",
+                        m->VMUM_ADDR, vmp->vm_endpoint);
+                return EFAULT;
+        }
+       len = m->VMUM_LEN;
+       len -= len % VM_PAGE_SIZE;
+
+        if(addr != vr->vaddr || len > vr->length || len < VM_PAGE_SIZE) {
+                return EFAULT;
+        }       
+
+       if(map_unmap_region(vmp, vr, len) != OK)
+               vm_panic("do_munmap: map_unmap_region failed", NO_NUM);
+
+       return OK;
+}
+
+int unmap_ok = 0;
+
+/*===========================================================================*
+ *                     munmap_lin (used for overrides for VM)                *
+ *===========================================================================*/
+PRIVATE int munmap_lin(vir_bytes addr, size_t len)
+{
+       if(addr % VM_PAGE_SIZE) {
+               printf("munmap_lin: offset not page aligned\n");
+               return EFAULT;
+       }
+
+       if(len % VM_PAGE_SIZE) {
+               printf("munmap_lin: len not page aligned\n");
+               return EFAULT;
+       }
+
+       if(pt_writemap(&vmproc[VM_PROC_NR].vm_pt, addr, MAP_NONE, len, 0,
+               WMF_OVERWRITE | WMF_FREE) != OK) {
+               printf("munmap_lin: pt_writemap failed\n");
+               return EFAULT;
        }
 
        return OK;
 }
+
+/*===========================================================================*
+ *                              munmap (override for VM)                    *
+ *===========================================================================*/
+PUBLIC int munmap(void *addr, size_t len)
+{
+       vir_bytes laddr;
+       if(!unmap_ok)
+               return ENOSYS;
+       laddr = (vir_bytes) arch_vir2map(&vmproc[VM_PROC_NR], (vir_bytes) addr);
+       return munmap_lin(laddr, len);
+}
+
+/*===========================================================================*
+ *                              munmap_text (override for VM)                *
+ *===========================================================================*/
+PUBLIC int munmap_text(void *addr, size_t len)
+{
+       vir_bytes laddr;
+       if(!unmap_ok)
+               return ENOSYS;
+       laddr = (vir_bytes) arch_vir2map_text(&vmproc[VM_PROC_NR],
+               (vir_bytes) addr);
+       return munmap_lin(laddr, len);
+}
+
index 3790df238380e20f1abfc3e82fa69e77c3ce3fd5..8ba94db4ad296f18e6bd9a9e4090c4e3ac004817 100644 (file)
@@ -14,6 +14,7 @@
 #include <minix/sysutil.h>
 #include <minix/syslib.h>
 #include <minix/safecopies.h>
+#include <minix/bitmap.h>
 
 #include <errno.h>
 #include <string.h>
@@ -61,12 +62,21 @@ PUBLIC void do_pagefaults(void)
                vir_bytes offset;
                int p, wr = PFERR_WRITE(err);
 
+#if 0
+               printf("VM: pagefault: ep %d 0x%lx %s\n", 
+                       ep, arch_map2vir(vmp, addr), pf_errstr(err));
+#endif
+
                if(vm_isokendpt(ep, &p) != OK)
                        vm_panic("do_pagefaults: endpoint wrong", ep);
 
                vmp = &vmproc[p];
                vm_assert(vmp->vm_flags & VMF_INUSE);
 
+#if 0
+               map_printmap(vmp);
+#endif
+
                /* See if address is valid at all. */
                if(!(region = map_lookup(vmp, addr))) {
                        vm_assert(PFERR_NOPAGE(err));
@@ -75,6 +85,8 @@ PUBLIC void do_pagefaults(void)
                        sys_sysctl_stacktrace(vmp->vm_endpoint);
                        if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK)
                                vm_panic("sys_kill failed", s);
+                       if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, r)) != OK)
+                               vm_panic("do_pagefaults: sys_vmctl failed", ep);
                        continue;
                }
 
@@ -83,6 +95,11 @@ PUBLIC void do_pagefaults(void)
                 */
                vm_assert(!(region->flags & VR_NOPF));
 
+               /* We do not allow shared memory to cause pagefaults.
+                * These pages have to be pre-allocated.
+                */
+               vm_assert(!(region->flags & VR_SHARED));
+
                /* If process was writing, see if it's writable. */
                if(!(region->flags & VR_WRITABLE) && wr) {
                        printf("VM: pagefault: SIGSEGV %d ro map 0x%lx %s\n", 
@@ -90,6 +107,8 @@ PUBLIC void do_pagefaults(void)
                        sys_sysctl_stacktrace(vmp->vm_endpoint);
                        if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK)
                                vm_panic("sys_kill failed", s);
+                       if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, r)) != OK)
+                               vm_panic("do_pagefaults: sys_vmctl failed", ep);
                        continue;
                }
 
@@ -102,13 +121,23 @@ PUBLIC void do_pagefaults(void)
                        sys_sysctl_stacktrace(vmp->vm_endpoint);
                        if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK)
                                vm_panic("sys_kill failed", s);
+                       if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, r)) != OK)
+                               vm_panic("do_pagefaults: sys_vmctl failed", ep);
                        continue;
                }
+#if 0
+               printf("VM: map_pf done; ep %d 0x%lx %s\n", 
+                       ep, arch_map2vir(vmp, addr), pf_errstr(err));
+
 
+               printf("VM: handling pagefault OK: %d addr 0x%lx %s\n", 
+                       ep, arch_map2vir(vmp, addr), pf_errstr(err));
+#endif
 
                /* Pagefault is handled, so now reactivate the process. */
                if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, r)) != OK)
                        vm_panic("do_pagefaults: sys_vmctl failed", ep);
+
        }
 
        return;
@@ -120,55 +149,73 @@ PUBLIC void do_pagefaults(void)
 PUBLIC void do_memory(void)
 {
        int r, s;
-       endpoint_t who;
+       endpoint_t who, requestor;
        vir_bytes mem;
        vir_bytes len;
        int wrflag;
 
-       while((r=sys_vmctl_get_memreq(&who, &mem, &len, &wrflag)) == OK) {
+       while((r=sys_vmctl_get_memreq(&who, &mem, &len, &wrflag, &requestor))
+         == OK) {
                int p, r = OK;
-               struct vir_region *region;
                struct vmproc *vmp;
-               vir_bytes o;
 
                if(vm_isokendpt(who, &p) != OK)
                        vm_panic("do_memory: endpoint wrong", who);
                vmp = &vmproc[p];
 
-               /* Page-align memory and length. */
-               o = mem % VM_PAGE_SIZE;
-               mem -= o;
-               len += o;
-               o = len % VM_PAGE_SIZE;
-               if(o > 0) len += VM_PAGE_SIZE - o;
-
-               if(!(region = map_lookup(vmp, mem))) {
-                       printf("VM: do_memory: memory doesn't exist\n");
-                       r = EFAULT;
-               } else if(mem + len > region->vaddr + region->length) {
-                       vm_assert(region->vaddr <= mem);
-                       vm_panic("do_memory: not contained", NO_NUM);
-               } else if(!(region->flags & VR_WRITABLE) && wrflag) {
-                       printf("VM: do_memory: write to unwritable map\n");
-                       r = EFAULT;
-               } else {
-                       vir_bytes offset;
-                       vm_assert(region->vaddr <= mem);
-                       vm_assert(!(region->flags & VR_NOPF));
-                       vm_assert(!(region->vaddr % VM_PAGE_SIZE));
-                       offset = mem - region->vaddr;
-
-                       r = map_handle_memory(vmp, region, offset, len, wrflag);
-               }
-
-               if(r != OK) {
-                       printf("VM: memory range 0x%lx-0x%lx not available in %d\n",
-                               arch_map2vir(vmp, mem), arch_map2vir(vmp, mem+len),
-                               vmp->vm_endpoint);
-               }
+               r = handle_memory(vmp, mem, len, wrflag);
 
-               if(sys_vmctl(who, VMCTL_MEMREQ_REPLY, r) != OK)
+               if(sys_vmctl(requestor, VMCTL_MEMREQ_REPLY, r) != OK)
                        vm_panic("do_memory: sys_vmctl failed", r);
+
+#if 0
+               printf("VM: handling memory request %d done OK\n",
+                       who);
+#endif
        }
 }
 
+int handle_memory(struct vmproc *vmp, vir_bytes mem, vir_bytes len, int wrflag)
+{
+       struct vir_region *region;
+       vir_bytes o;
+       int r;
+
+#if 0
+       printf("VM: handling memory request: %d, 0x%lx-0x%lx, wr %d\n",
+               vmp->vm_endpoint, mem, mem+len, wrflag);
+#endif
+
+       /* Page-align memory and length. */
+       o = mem % VM_PAGE_SIZE;
+       mem -= o;
+       len += o;
+       o = len % VM_PAGE_SIZE;
+       if(o > 0) len += VM_PAGE_SIZE - o;
+
+       if(!(region = map_lookup(vmp, mem))) {
+               map_printmap(vmp);
+               printf("VM: do_memory: memory doesn't exist\n");
+               r = EFAULT;
+       } else if(mem + len > region->vaddr + region->length) {
+               vm_assert(region->vaddr <= mem);
+               vm_panic("do_memory: not contained", NO_NUM);
+       } else if(!(region->flags & VR_WRITABLE) && wrflag) {
+               printf("VM: do_memory: write to unwritable map\n");
+               r = EFAULT;
+       } else {
+               vir_bytes offset;
+               vm_assert(region->vaddr <= mem);
+               vm_assert(!(region->flags & VR_NOPF));
+               vm_assert(!(region->vaddr % VM_PAGE_SIZE));
+               offset = mem - region->vaddr;
+
+               r = map_handle_memory(vmp, region, offset, len, wrflag);
+       }
+
+       if(r != OK) {
+               printf("VM: memory range 0x%lx-0x%lx not available in %d\n",
+                       arch_map2vir(vmp, mem), arch_map2vir(vmp, mem+len),
+                       vmp->vm_endpoint);
+       }
+}
diff --git a/servers/vm/pagerange.h b/servers/vm/pagerange.h
new file mode 100644 (file)
index 0000000..edf84e9
--- /dev/null
@@ -0,0 +1,24 @@
+
+
+#include <minix/callnr.h>
+#include <minix/com.h>
+#include <minix/config.h>
+#include <minix/const.h>
+#include <minix/ds.h>
+#include <minix/endpoint.h>
+#include <minix/keymap.h>
+#include <minix/minlib.h>
+#include <minix/type.h>
+#include <minix/ipc.h>
+#include <minix/sysutil.h>
+#include <minix/syslib.h>
+#include <minix/const.h>
+
+typedef struct pagerange {
+       phys_bytes      addr;   /* in pages */
+       phys_bytes      size;   /* in pages */
+
+       /* AVL fields */
+       struct pagerange *less, *greater;       /* children */
+       int             factor; /* AVL balance factor */
+} pagerange_t;
diff --git a/servers/vm/physravl.c b/servers/vm/physravl.c
new file mode 100644 (file)
index 0000000..5788f34
--- /dev/null
@@ -0,0 +1,8 @@
+
+#include "sanitycheck.h"
+#include "region.h"
+#include "physravl.h"
+#include "util.h"
+#include "proto.h"
+#include "cavl_impl.h"
+
diff --git a/servers/vm/physravl.h b/servers/vm/physravl.h
new file mode 100644 (file)
index 0000000..9690964
--- /dev/null
@@ -0,0 +1,24 @@
+
+#ifndef _PHYSRAVL_H 
+#define _PHYSRAVL_H 
+
+#define AVL_UNIQUE(id) physr_ ## id
+#define AVL_HANDLE phys_region_t *
+#define AVL_KEY phys_bytes
+#define AVL_MAX_DEPTH 30 /* good for 2 million nodes */
+#define AVL_NULL NULL
+#define AVL_GET_LESS(h, a) (h)->less
+#define AVL_GET_GREATER(h, a) (h)->greater
+#define AVL_SET_LESS(h1, h2) USE((h1), (h1)->less = h2;);
+#define AVL_SET_GREATER(h1, h2) USE((h1), (h1)->greater = h2;);
+#define AVL_GET_BALANCE_FACTOR(h) (h)->factor
+#define AVL_SET_BALANCE_FACTOR(h, f) USE((h), (h)->factor = f;);
+#define AVL_SET_ROOT(h, v) USE((h), (h)->root = v;);
+#define AVL_COMPARE_KEY_KEY(k1, k2) ((k1) > (k2) ? 1 : ((k1) < (k2) ? -1 : 0))
+#define AVL_COMPARE_KEY_NODE(k, h) AVL_COMPARE_KEY_KEY((k), (h)->offset)
+#define AVL_COMPARE_NODE_NODE(h1, h2) AVL_COMPARE_KEY_KEY((h1)->offset, (h2)->offset)
+#define AVL_INSIDE_STRUCT char pad[4];   
+
+#include "cavl_if.h"
+
+#endif
index 3e7158a0d627e608fb6bf957c98ee84da161746a..388397aebbe7d8bd96952fce22b097e2757bca3d 100644 (file)
@@ -21,14 +21,18 @@ _PROTOTYPE( int do_deldma, (message *msg)                              );
 _PROTOTYPE( int do_getdma, (message *msg)                              );
 _PROTOTYPE( int do_allocmem, (message *msg)                              );
 _PROTOTYPE( void release_dma, (struct vmproc *vmp)                     );
-
+_PROTOTYPE( void memstats, (int *nodes, int *pages, int *largest)      );
+_PROTOTYPE( void printmemstats, (void)                                 );
+_PROTOTYPE( void usedpages_reset, (void)                               );
+_PROTOTYPE( int usedpages_add_f, (phys_bytes phys, phys_bytes len,
+       char *file, int line)   );
 _PROTOTYPE( void free_mem_f, (phys_clicks base, phys_clicks clicks)    );
+#define usedpages_add(a, l) usedpages_add_f(a, l, __FILE__, __LINE__)
 
 #define ALLOC_MEM(clicks, flags) alloc_mem_f(clicks, flags)
 #define FREE_MEM(base, clicks) free_mem_f(base, clicks)
 
 _PROTOTYPE( void mem_init, (struct memory *chunks)                     );
-_PROTOTYPE( void memstats, (void)                                      );
 
 /* utility.c */
 _PROTOTYPE( int get_mem_map, (int proc_nr, struct mem_map *mem_map)     );
@@ -37,6 +41,7 @@ _PROTOTYPE( void reserve_proc_mem, (struct memory *mem_chunks,
         struct mem_map *map_ptr));
 _PROTOTYPE( int vm_isokendpt, (endpoint_t ep, int *proc)            );
 _PROTOTYPE( int get_stack_ptr, (int proc_nr, vir_bytes *sp)             );
+_PROTOTYPE( int do_ctl, (message *)             );
 
 /* exit.c */
 _PROTOTYPE( void clear_proc, (struct vmproc *vmp)                      );
@@ -74,16 +79,24 @@ _PROTOTYPE( int vfs_close, (struct vmproc *for_who, callback_t callback,
 
 /* mmap.c */
 _PROTOTYPE(int do_mmap, (message *msg)                                 );
+_PROTOTYPE(int do_munmap, (message *msg)                               );
 _PROTOTYPE(int do_map_phys, (message *msg)                              );
 _PROTOTYPE(int do_unmap_phys, (message *msg)                            );
+_PROTOTYPE(int do_remap, (message *m)                                   );
+_PROTOTYPE(int do_get_phys, (message *m)                                );
+_PROTOTYPE(int do_shared_unmap, (message *m)                            );
+_PROTOTYPE(int do_get_refcount, (message *m)                            );
 
 /* pagefaults.c */
 _PROTOTYPE( void do_pagefaults, (void)                         );
 _PROTOTYPE( void do_memory, (void)                             );
 _PROTOTYPE( char *pf_errstr, (u32_t err));
+_PROTOTYPE( int handle_memory, (struct vmproc *vmp, vir_bytes mem,
+       vir_bytes len, int wrflag));
 
 /* $(ARCH)/pagetable.c */
 _PROTOTYPE( void pt_init, (void)                                       );
+_PROTOTYPE( void pt_check, (struct vmproc *vmp)                                );
 _PROTOTYPE( int pt_new, (pt_t *pt)                                     );
 _PROTOTYPE( void pt_free, (pt_t *pt)                                   );
 _PROTOTYPE( void pt_freerange, (pt_t *pt, vir_bytes lo, vir_bytes hi)  );
@@ -93,8 +106,8 @@ _PROTOTYPE( int pt_bind, (pt_t *pt, struct vmproc *who)                      );
 _PROTOTYPE( void *vm_allocpages, (phys_bytes *p, int pages, int cat));
 _PROTOTYPE( void pt_cycle, (void));
 _PROTOTYPE( int pt_mapkernel, (pt_t *pt));
-_PROTOTYPE( void phys_readaddr, (phys_bytes addr, phys_bytes *v1, phys_bytes *v2));
-_PROTOTYPE( void phys_writeaddr, (phys_bytes addr, phys_bytes v1, phys_bytes v2));
+_PROTOTYPE( void vm_pagelock, (void *vir, int lockflag)                );
+
 #if SANITYCHECKS
 _PROTOTYPE( void pt_sanitycheck, (pt_t *pt, char *file, int line)      );
 #endif
@@ -106,18 +119,14 @@ _PROTOTYPE( int arch_get_pagefault, (endpoint_t *who, vir_bytes *addr, u32_t *er
 _PROTOTYPE(void *slaballoc,(int bytes));
 _PROTOTYPE(void slabfree,(void *mem, int bytes));
 _PROTOTYPE(void slabstats,(void));
+_PROTOTYPE(void slab_sanitycheck, (char *file, int line));
 #define SLABALLOC(var) (var = slaballoc(sizeof(*var)))
 #define SLABFREE(ptr) slabfree(ptr, sizeof(*(ptr)))
 #if SANITYCHECKS
-_PROTOTYPE(int slabsane,(void *mem, int bytes));
-#define SLABSANE(ptr) { \
-       if(!slabsane(ptr, sizeof(*(ptr)))) { \
-               printf("VM:%s:%d: SLABSANE(%s)\n", __FILE__, __LINE__, #ptr); \
-               vm_panic("SLABSANE failed", NO_NUM);    \
-       } \
-}
-#else
-#define SLABSANE(ptr)
+
+_PROTOTYPE(void slabunlock,(void *mem, int bytes));
+_PROTOTYPE(void slablock,(void *mem, int bytes));
+_PROTOTYPE(int slabsane_f,(char *file, int line, void *mem, int bytes));
 #endif
 
 /* region.c */
@@ -127,7 +136,7 @@ _PROTOTYPE(struct vir_region * map_page_region,(struct vmproc *vmp, \
 _PROTOTYPE(struct vir_region * map_proc_kernel,(struct vmproc *dst));
 _PROTOTYPE(int map_region_extend,(struct vmproc *vmp, struct vir_region *vr, vir_bytes delta));
 _PROTOTYPE(int map_region_shrink,(struct vir_region *vr, vir_bytes delta));
-_PROTOTYPE(int map_unmap_region,(struct vmproc *vmp, struct vir_region *vr));
+_PROTOTYPE(int map_unmap_region,(struct vmproc *vmp, struct vir_region *vr, vir_bytes len));
 _PROTOTYPE(int map_free_proc,(struct vmproc *vmp));
 _PROTOTYPE(int map_proc_copy,(struct vmproc *dst, struct vmproc *src));
 _PROTOTYPE(struct vir_region *map_lookup,(struct vmproc *vmp, vir_bytes addr));
@@ -135,11 +144,17 @@ _PROTOTYPE(int map_pf,(struct vmproc *vmp,
        struct vir_region *region, vir_bytes offset, int write));
 _PROTOTYPE(int map_handle_memory,(struct vmproc *vmp,
        struct vir_region *region, vir_bytes offset, vir_bytes len, int write));
+_PROTOTYPE(void map_printmap, (struct vmproc *vmp));
+_PROTOTYPE(int map_writept, (struct vmproc *vmp));
+_PROTOTYPE(void printregionstats, (struct vmproc *vmp));
 
 _PROTOTYPE(struct vir_region * map_region_lookup_tag, (struct vmproc *vmp, u32_t tag));
 _PROTOTYPE(void map_region_set_tag, (struct vir_region *vr, u32_t tag));
 _PROTOTYPE(u32_t map_region_get_tag, (struct vir_region *vr));
-
+_PROTOTYPE(int map_remap, (struct vmproc *dvmp, vir_bytes da, size_t size,
+       struct vir_region *region, vir_bytes *r));
+_PROTOTYPE(int map_get_phys, (struct vmproc *vmp, vir_bytes addr, phys_bytes *r));
+_PROTOTYPE(int map_get_ref, (struct vmproc *vmp, vir_bytes addr, u8_t *cnt));
 
 #if SANITYCHECKS
 _PROTOTYPE(void map_sanitycheck,(char *file, int line));
@@ -149,4 +164,12 @@ _PROTOTYPE(void map_sanitycheck,(char *file, int line));
 _PROTOTYPE( void arch_init_vm, (struct memory mem_chunks[NR_MEMS]));
 _PROTOTYPE( vir_bytes, arch_map2vir(struct vmproc *vmp, vir_bytes addr));
 _PROTOTYPE( vir_bytes, arch_vir2map(struct vmproc *vmp, vir_bytes addr));
+_PROTOTYPE( vir_bytes, arch_vir2map_text(struct vmproc *vmp, vir_bytes addr));
+
+/* rs.c */
+_PROTOTYPE(int do_rs_set_priv, (message *m));
 
+/* queryexit.c */
+_PROTOTYPE(int do_query_exit, (message *m));
+_PROTOTYPE(int do_notify_sig, (message *m));
+_PROTOTYPE(void init_query_exit, (void));
diff --git a/servers/vm/queryexit.c b/servers/vm/queryexit.c
new file mode 100644 (file)
index 0000000..339d537
--- /dev/null
@@ -0,0 +1,123 @@
+
+#define _SYSTEM 1
+
+#define VERBOSE 0
+
+#include <minix/callnr.h>
+#include <minix/com.h>
+#include <minix/config.h>
+#include <minix/const.h>
+#include <minix/ds.h>
+#include <minix/endpoint.h>
+#include <minix/keymap.h>
+#include <minix/minlib.h>
+#include <minix/type.h>
+#include <minix/ipc.h>
+#include <minix/sysutil.h>
+#include <minix/syslib.h>
+#include <minix/safecopies.h>
+#include <minix/bitmap.h>
+#include <minix/vm.h>
+#include <minix/ds.h>
+
+#include <errno.h>
+#include <string.h>
+#include <env.h>
+#include <stdio.h>
+
+#include "glo.h"
+#include "proto.h"
+#include "util.h"
+
+struct query_exit_struct {
+       int avail;
+       endpoint_t ep;
+};
+static struct query_exit_struct array[NR_PROCS];
+
+/*===========================================================================*
+ *                             do_query_exit                                *
+ *===========================================================================*/
+PUBLIC int do_query_exit(message *m)
+{
+       int i, nr;
+       endpoint_t ep;
+
+       for (i = 0; i < NR_PROCS; i++) {
+               if (!array[i].avail) {
+                       array[i].avail = 1;
+                       ep = array[i].ep;
+                       array[i].ep = 0;
+
+                       break;
+               }
+       }
+
+       nr = 0;
+       for (i = 0; i < NR_PROCS; i++) {
+               if (!array[i].avail)
+                       nr++;
+       }
+       m->VM_QUERY_RET_PT = ep;
+       m->VM_QUERY_IS_MORE = (nr > 0);
+
+       return OK;
+}
+
+/*===========================================================================*
+ *                             do_notify_sig                                *
+ *===========================================================================*/
+PUBLIC int do_notify_sig(message *m)
+{
+       int i, avails = 0;
+       endpoint_t ep = m->VM_NOTIFY_SIG_ENDPOINT;
+       endpoint_t ipc_ep = m->VM_NOTIFY_SIG_IPC;
+       int r;
+
+       for (i = 0; i < NR_PROCS; i++) {
+               /* its signal is already here */
+               if (!array[i].avail && array[i].ep == ep)
+                       goto out;
+               if (array[i].avail)
+                       avails++;
+       }
+       if (!avails) {
+               /* no slot for signals, impossible */
+               printf("VM: no slot for signals!\n");
+               return ENOMEM;
+       }
+
+       for (i = 0; i < NR_PROCS; i++) {
+               if (array[i].avail) {
+                       array[i].avail = 0;
+                       array[i].ep = ep;
+
+                       break;
+               }
+       }
+
+out:
+       /* only care when IPC server starts up,
+        * and bypass the process to be signal is IPC itself.
+        */
+       if (ipc_ep != 0 && ep != ipc_ep) {
+               r = notify(ipc_ep);
+               if (r != OK)
+                       printf("VM: notify IPC error!\n");
+       }
+       return OK;
+}
+
+/*===========================================================================*
+ *                             init_query_exit                              *
+ *===========================================================================*/
+PUBLIC void init_query_exit(void)
+{
+       int i;
+
+       for (i = 0; i < NR_PROCS; i++) {
+               array[i].avail = 1;
+               array[i].ep = 0;
+       }
+}
+
index e327c455ac623ef6f93b10f60bbf02e48f65c203..d71bfe459725f6b5f8d8c21e823db8cdc91e53d6 100644 (file)
@@ -8,6 +8,8 @@
 #include <minix/const.h>
 #include <minix/sysutil.h>
 #include <minix/syslib.h>
+#include <minix/debug.h>
+#include <minix/bitmap.h>
 
 #include <sys/mman.h>
 
 #include "glo.h"
 #include "region.h"
 #include "sanitycheck.h"
+#include "physravl.h"
 
-FORWARD _PROTOTYPE(int map_new_physblock, (struct vmproc *vmp,
+/* Should a physblock be mapped writable? */
+#define WRITABLE(r, pb) \
+       (((r)->flags & (VR_DIRECT | VR_SHARED)) ||      \
+        (((r)->flags & VR_WRITABLE) && (pb)->refcount == 1))
+
+FORWARD _PROTOTYPE(struct phys_region *map_new_physblock, (struct vmproc *vmp,
        struct vir_region *region, vir_bytes offset, vir_bytes length,
-       phys_bytes what, struct phys_region *physhint));
+       phys_bytes what));
+
+FORWARD _PROTOTYPE(int map_ph_writept, (struct vmproc *vmp, struct vir_region *vr,
+       struct phys_region *pr));
 
 FORWARD _PROTOTYPE(int map_copy_ph_block, (struct vmproc *vmp, struct vir_region *region, struct phys_region *ph));
-FORWARD _PROTOTYPE(struct vir_region *map_copy_region, (struct vir_region *));
 
-FORWARD _PROTOTYPE(void map_printmap, (struct vmproc *vmp));
+FORWARD _PROTOTYPE(struct vir_region *map_copy_region, (struct vmproc *vmp, struct vir_region *vr));
 
 PRIVATE char *map_name(struct vir_region *vr)
 {
@@ -52,25 +62,30 @@ PRIVATE char *map_name(struct vir_region *vr)
 /*===========================================================================*
  *                             map_printmap                                 *
  *===========================================================================*/
-PRIVATE void map_printmap(vmp)
+PUBLIC void map_printmap(vmp)
 struct vmproc *vmp;
 {
        struct vir_region *vr;
+       physr_iter iter;
+
        printf("memory regions in process %d:\n", vmp->vm_endpoint);
        for(vr = vmp->vm_regions; vr; vr = vr->next) {
                struct phys_region *ph;
                int nph = 0;
+               printf("map_printmap: map_name: %s\n", map_name(vr));
                printf("\t0x%lx - 0x%lx (len 0x%lx), %s\n",
                        vr->vaddr, vr->vaddr + vr->length, vr->length,
-                       vr->vaddr + vr->length, map_name(vr));
+                       map_name(vr));
                printf("\t\tphysical: ");
-               for(ph = vr->first; ph; ph = ph->next) {
+               physr_start_iter_least(vr->phys, &iter);
+               while((ph = physr_get_iter(&iter))) {
                        printf("0x%lx-0x%lx (refs %d): phys 0x%lx ",
-                               vr->vaddr + ph->ph->offset,
-                               vr->vaddr + ph->ph->offset + ph->ph->length,
+                               vr->vaddr + ph->offset,
+                               vr->vaddr + ph->offset + ph->ph->length,
                                ph->ph->refcount,
                                ph->ph->phys);
                        nph++;
+                       physr_incr_iter(&iter);
                }
                printf(" (phregions %d)\n", nph);
        }
@@ -91,27 +106,35 @@ PUBLIC void map_sanitycheck(char *file, int line)
  * all processes.
  */
 #define ALLREGIONS(regioncode, physcode)                       \
-       for(vmp = vmproc; vmp <= &vmproc[_NR_PROCS]; vmp++) {   \
+       for(vmp = vmproc; vmp < &vmproc[VMP_NR]; vmp++) {       \
                struct vir_region *vr;                          \
                if(!(vmp->vm_flags & VMF_INUSE))                \
                        continue;                               \
                for(vr = vmp->vm_regions; vr; vr = vr->next) {  \
+                       physr_iter iter;                        \
                        struct phys_region *pr;                 \
                        regioncode;                             \
-                       for(pr = vr->first; pr; pr = pr->next) { \
+                       physr_start_iter_least(vr->phys, &iter); \
+                       while((pr = physr_get_iter(&iter))) {   \
                                physcode;                       \
+                               physr_incr_iter(&iter);         \
                        }                                       \
                }                                               \
        }
 
-#define MYSLABSANE(s) MYASSERT(slabsane(s, sizeof(*(s))))
+#define MYSLABSANE(s) MYASSERT(slabsane_f(__FILE__, __LINE__, s, sizeof(*(s))))
        /* Basic pointers check. */
        ALLREGIONS(MYSLABSANE(vr),MYSLABSANE(pr); MYSLABSANE(pr->ph);MYSLABSANE(pr->parent));
-       ALLREGIONS(MYASSERT(vr->parent == vmp),MYASSERT(pr->parent == vr););
+       ALLREGIONS(/* MYASSERT(vr->parent == vmp) */,MYASSERT(pr->parent == vr););
 
        /* Do counting for consistency check. */
-       ALLREGIONS(;,pr->ph->seencount = 0;);
-       ALLREGIONS(;,pr->ph->seencount++;);
+       ALLREGIONS(;,USE(pr->ph, pr->ph->seencount = 0;););
+       ALLREGIONS(;,USE(pr->ph, pr->ph->seencount++;);
+               if(pr->ph->seencount == 1) {
+                       MYASSERT(usedpages_add(pr->ph->phys,
+                               pr->ph->length) == OK);
+               }
+       );
 
        /* Do consistency check. */
        ALLREGIONS(if(vr->next) {
@@ -123,8 +146,8 @@ PUBLIC void map_sanitycheck(char *file, int line)
                        map_printmap(vmp);
                        printf("ph in vr 0x%lx: 0x%lx-0x%lx  refcount %d "
                                "but seencount %lu\n", 
-                               vr, pr->ph->offset,
-                               pr->ph->offset + pr->ph->length,
+                               vr, pr->offset,
+                               pr->offset + pr->ph->length,
                                pr->ph->refcount, pr->ph->seencount);
                }
                {
@@ -147,8 +170,9 @@ PUBLIC void map_sanitycheck(char *file, int line)
                        MYASSERT(pr->ph->refcount == n_others);
                }
                MYASSERT(pr->ph->refcount == pr->ph->seencount);
-               MYASSERT(!(pr->ph->offset % VM_PAGE_SIZE));
+               MYASSERT(!(pr->offset % VM_PAGE_SIZE));
                MYASSERT(!(pr->ph->length % VM_PAGE_SIZE)););
+
 }
 #endif
 
@@ -156,34 +180,23 @@ PUBLIC void map_sanitycheck(char *file, int line)
 /*=========================================================================*
  *                             map_ph_writept                          *
  *=========================================================================*/
-PUBLIC int map_ph_writept(struct vmproc *vmp, struct vir_region *vr,
-       struct phys_block *pb, int *ropages, int *rwpages)
+PRIVATE int map_ph_writept(struct vmproc *vmp, struct vir_region *vr,
+       struct phys_region *pr)
 {
        int rw;
+       struct phys_block *pb = pr->ph;
 
        vm_assert(!(vr->vaddr % VM_PAGE_SIZE));
        vm_assert(!(pb->length % VM_PAGE_SIZE));
-       vm_assert(!(pb->offset % VM_PAGE_SIZE));
+       vm_assert(!(pr->offset % VM_PAGE_SIZE));
        vm_assert(pb->refcount > 0);
 
-       if((vr->flags & VR_WRITABLE)
-       && (pb->refcount == 1 || (vr->flags & VR_DIRECT)))
+       if(WRITABLE(vr, pb))
                rw = PTF_WRITE;
        else
                rw = 0;
 
-#if SANITYCHECKS
-       if(rwpages && ropages && (vr->flags & VR_ANON)) {
-               int pages;
-               pages = pb->length / VM_PAGE_SIZE;
-               if(rw)
-                       (*rwpages) += pages;
-               else
-                       (*ropages) += pages;
-       }
-#endif
-
-       if(pt_writemap(&vmp->vm_pt, vr->vaddr + pb->offset,
+       if(pt_writemap(&vmp->vm_pt, vr->vaddr + pr->offset,
          pb->phys, pb->length, PTF_PRESENT | PTF_USER | rw,
                WMF_OVERWRITE) != OK) {
            printf("VM: map_writept: pt_writemap failed\n");
@@ -194,20 +207,13 @@ PUBLIC int map_ph_writept(struct vmproc *vmp, struct vir_region *vr,
 }
 
 /*===========================================================================*
- *                             map_page_region                              *
+ *                             region_find_slot                             *
  *===========================================================================*/
-PUBLIC struct vir_region *map_page_region(vmp, minv, maxv, length,
-       what, flags, mapflags)
-struct vmproc *vmp;
-vir_bytes minv;
-vir_bytes maxv;
-vir_bytes length;
-vir_bytes what;
-u32_t flags;
-int mapflags;
+PRIVATE vir_bytes region_find_slot(struct vmproc *vmp,
+               vir_bytes minv, vir_bytes maxv, vir_bytes length,
+               struct vir_region **prev)
 {
-       struct vir_region *vr, *prevregion = NULL, *newregion,
-               *firstregion = vmp->vm_regions;
+       struct vir_region *firstregion = vmp->vm_regions, *prevregion = NULL;
        vir_bytes startv;
        int foundflag = 0;
 
@@ -227,10 +233,10 @@ int mapflags;
 
                 /* Sanity check. */
                 if(maxv <= minv) {
-                        printf("map_page_region: minv 0x%lx and bytes 0x%lx\n",
+                        printf("region_find_slot: minv 0x%lx and bytes 0x%lx\n",
                                 minv, length);
                        map_printmap(vmp);
-                        return NULL;
+                        return (vir_bytes) -1;
                 }
         }
 
@@ -257,6 +263,7 @@ int mapflags;
        FREEVRANGE(0, firstregion ? firstregion->vaddr : VM_DATATOP, ;);
 
        if(!foundflag) {
+               struct vir_region *vr;
                for(vr = vmp->vm_regions; vr && !foundflag; vr = vr->next) {
                        FREEVRANGE(vr->vaddr + vr->length,
                          vr->next ? vr->next->vaddr : VM_DATATOP,
@@ -265,10 +272,10 @@ int mapflags;
        }
 
        if(!foundflag) {
-               printf("VM: map_page_region: no 0x%lx bytes found for %d between 0x%lx and 0x%lx\n",
+               printf("VM: region_find_slot: no 0x%lx bytes found for %d between 0x%lx and 0x%lx\n",
                        length, vmp->vm_endpoint, minv, maxv);
                map_printmap(vmp);
-               return NULL;
+               return (vir_bytes) -1;
        }
 
 #if SANITYCHECKS
@@ -280,6 +287,35 @@ int mapflags;
        vm_assert(startv < maxv);
        vm_assert(startv + length <= maxv);
 
+       if (prev)
+               *prev = prevregion;
+       return startv;
+}
+
+/*===========================================================================*
+ *                             map_page_region                              *
+ *===========================================================================*/
+PUBLIC struct vir_region *map_page_region(vmp, minv, maxv, length,
+       what, flags, mapflags)
+struct vmproc *vmp;
+vir_bytes minv;
+vir_bytes maxv;
+vir_bytes length;
+vir_bytes what;
+u32_t flags;
+int mapflags;
+{
+       struct vir_region *prevregion = NULL, *newregion;
+       vir_bytes startv;
+       struct phys_region *ph;
+       physr_avl *phavl;
+
+       SANITYCHECK(SCL_FUNCTIONS);
+
+       startv = region_find_slot(vmp, minv, maxv, length, &prevregion);
+       if (startv == (vir_bytes) -1)
+               return NULL;
+
        /* Now we want a new region. */
        if(!SLABALLOC(newregion)) {
                printf("VM: map_page_region: allocating region failed\n");
@@ -287,28 +323,37 @@ int mapflags;
        }
 
        /* Fill in node details. */
+USE(newregion,
        newregion->vaddr = startv;
        newregion->length = length;
-       newregion->first = NULL;
        newregion->flags = flags;
        newregion->tag = VRT_NONE;
-       newregion->parent = vmp;
+       newregion->parent = vmp;);
+
+       SLABALLOC(phavl);
+       if(!phavl) {
+               printf("VM: map_page_region: allocating phys avl failed\n");
+               SLABFREE(newregion);
+               return NULL;
+       }
+       USE(newregion, newregion->phys = phavl;);
+
+       physr_init(newregion->phys);
 
        /* If we know what we're going to map to, map it right away. */
        if(what != MAP_NONE) {
+               struct phys_region *pr;
                vm_assert(!(what % VM_PAGE_SIZE));
                vm_assert(!(length % VM_PAGE_SIZE));
                vm_assert(!(startv % VM_PAGE_SIZE));
-               vm_assert(!newregion->first);
                vm_assert(!(mapflags & MF_PREALLOC));
-               if(map_new_physblock(vmp, newregion, 0, length, what, NULL) != OK) {
+               if(!(pr=map_new_physblock(vmp, newregion, 0, length, what))) {
                        printf("VM: map_new_physblock failed\n");
+                       SLABFREE(newregion->phys);
                        SLABFREE(newregion);
                        return NULL;
                }
-               vm_assert(newregion->first);
-               vm_assert(!newregion->first->next);
-               if(map_ph_writept(vmp, newregion, newregion->first->ph, NULL, NULL) != OK) {
+               if(map_ph_writept(vmp, newregion, pr) != OK) {
                        printf("VM: map_region_writept failed\n");
                        SLABFREE(newregion);
                        return NULL;
@@ -317,7 +362,8 @@ int mapflags;
 
        if((flags & VR_ANON) && (mapflags & MF_PREALLOC)) {
                if(map_handle_memory(vmp, newregion, 0, length, 1) != OK) {
-                       printf("VM:map_page_region: prealloc failed\n");
+                       printf("VM: map_page_region: prealloc failed\n");
+                       SLABFREE(newregion->phys);
                        SLABFREE(newregion);
                        return NULL;
                }
@@ -326,10 +372,10 @@ int mapflags;
        /* Link it. */
        if(prevregion) {
                vm_assert(prevregion->vaddr < newregion->vaddr);
-               newregion->next = prevregion->next;
-               prevregion->next = newregion;
+               USE(newregion, newregion->next = prevregion->next;);
+               USE(prevregion, prevregion->next = newregion;);
        } else {
-               newregion->next = vmp->vm_regions;
+               USE(newregion, newregion->next = vmp->vm_regions;);
                vmp->vm_regions = newregion;
        }
 
@@ -353,28 +399,21 @@ void pb_unreferenced(struct vir_region *region, struct phys_region *pr)
        struct phys_block *pb;
        int remap = 0;
 
-       SLABSANE(pr);
        pb = pr->ph;
-       SLABSANE(pb);
        vm_assert(pb->refcount > 0);
-       pb->refcount--;
+       USE(pb, pb->refcount--;);
        vm_assert(pb->refcount >= 0);
 
-       SLABSANE(pb->firstregion);
        if(pb->firstregion == pr) {
-               pb->firstregion = pr->next_ph_list;
-               if(pb->firstregion) {
-                       SLABSANE(pb->firstregion);
-               }
+               USE(pb, pb->firstregion = pr->next_ph_list;);
        } else {
                struct phys_region *others;
 
                for(others = pb->firstregion; others;
                        others = others->next_ph_list) {
-                       SLABSANE(others);
                        vm_assert(others->ph == pb);
                        if(others->next_ph_list == pr) {
-                               others->next_ph_list = pr->next_ph_list;
+                               USE(others, others->next_ph_list = pr->next_ph_list;);
                                break;
                        }
                }
@@ -393,62 +432,95 @@ void pb_unreferenced(struct vir_region *region, struct phys_region *pr)
                        vm_panic("strange phys flags", NO_NUM);
                }
                SLABFREE(pb);
-       } else {
-               SLABSANE(pb->firstregion);
+       } else if(WRITABLE(region, pb)) {
                /* If a writable piece of physical memory is now only
                 * referenced once, map it writable right away instead of
                 * waiting for a page fault.
                 */
-               if(pb->refcount == 1 && (region->flags & VR_WRITABLE)) {
                        vm_assert(pb);
                        vm_assert(pb->firstregion);
                        vm_assert(!pb->firstregion->next_ph_list);
                        vm_assert(pb->firstregion->ph == pb);
                        vm_assert(pb->firstregion->ph == pb);
-                       SLABSANE(pb);
-                       SLABSANE(pb->firstregion);
-                       SLABSANE(pb->firstregion->parent);
                        if(map_ph_writept(pb->firstregion->parent->parent,
-                               pb->firstregion->parent, pb, NULL, NULL) != OK) {
+                               pb->firstregion->parent, pb->firstregion) != OK) {
                                vm_panic("pb_unreferenced: writept", NO_NUM);
                        }
-               }
        }
 }
 
 /*===========================================================================*
- *                             map_free                                     *
+ *                             map_subfree                                  *
  *===========================================================================*/
-PRIVATE int map_free(struct vir_region *region)
+PRIVATE int map_subfree(struct vmproc *vmp,
+       struct vir_region *region, vir_bytes len)
 {
        struct phys_region *pr, *nextpr;
+       physr_iter iter;
 
 #if SANITYCHECKS
-       for(pr = region->first; pr; pr = pr->next) {
+       {
+       physr_start_iter_least(region->phys, &iter);
+       while((pr = physr_get_iter(&iter))) {
                struct phys_region *others;
                struct phys_block *pb;
 
-               SLABSANE(pr);
                pb = pr->ph;
-               SLABSANE(pb);
-               SLABSANE(pb->firstregion);
 
                for(others = pb->firstregion; others;
                        others = others->next_ph_list) {
-                       SLABSANE(others);
                        vm_assert(others->ph == pb);
                }
+               physr_incr_iter(&iter);
+       }
        }
 #endif
 
-       for(pr = region->first; pr; pr = nextpr) {
-               SANITYCHECK(SCL_DETAIL);
-               pb_unreferenced(region, pr);
-               nextpr = pr->next;
-               region->first = nextpr; /* For sanity checks. */
-               SLABFREE(pr);
+       physr_start_iter_least(region->phys, &iter);
+       while((pr = physr_get_iter(&iter))) {
+               physr_incr_iter(&iter);
+               if(pr->offset >= len)
+                       break;
+               if(pr->offset + pr->ph->length <= len) {
+                       pb_unreferenced(region, pr);
+                       physr_remove(region->phys, pr->offset);
+                       physr_start_iter_least(region->phys, &iter);
+                       SLABFREE(pr);
+               } else {
+                       vir_bytes sublen;
+                       vm_assert(len > pr->offset);
+                       vm_assert(len < pr->offset + pr->ph->length);
+                       vm_assert(pr->ph->refcount > 0);
+                       sublen = len - pr->offset;
+                       if(pr->ph->refcount > 1) {
+                               int r;
+                               r = map_copy_ph_block(vmp, region, pr);
+                               if(r != OK)
+                                       return r;
+                       }
+                       vm_assert(pr->ph->refcount == 1);
+                       FREE_MEM(ABS2CLICK(pr->ph->phys), ABS2CLICK(sublen));
+                       USE(pr, pr->offset += sublen;);
+                       USE(pr->ph,
+                               pr->ph->phys += sublen;
+                               pr->ph->length -= sublen;);
+               }
        }
 
+       return OK;
+}
+
+/*===========================================================================*
+ *                             map_free                                     *
+ *===========================================================================*/
+PRIVATE int map_free(struct vmproc *vmp, struct vir_region *region)
+{
+       int r;
+
+       if((r=map_subfree(vmp, region, region->length)) != OK)
+               return r;
+
+       SLABFREE(region->phys);
        SLABFREE(region);
 
        return OK;
@@ -470,7 +542,7 @@ struct vmproc *vmp;
 #if SANITYCHECKS
                nocheck++;
 #endif
-               map_free(r);
+               map_free(vmp, r);
                vmp->vm_regions = nextr;        /* For sanity checks. */
 #if SANITYCHECKS
                nocheck--;
@@ -513,13 +585,12 @@ vir_bytes offset;
 /*===========================================================================*
  *                             map_new_physblock                            *
  *===========================================================================*/
-PRIVATE int map_new_physblock(vmp, region, offset, length, what_mem, physhint)
+PRIVATE struct phys_region *map_new_physblock(vmp, region, offset, length, what_mem)
 struct vmproc *vmp;
 struct vir_region *region;
 vir_bytes offset;
 vir_bytes length;
 phys_bytes what_mem;
-struct phys_region *physhint;
 {
        struct phys_region *newphysr;
        struct phys_block *newpb;
@@ -529,14 +600,15 @@ struct phys_region *physhint;
        SANITYCHECK(SCL_FUNCTIONS);
 
        vm_assert(!(length % VM_PAGE_SIZE));
-       if(!physhint) physhint = region->first;
+
+       NOTRUNNABLE(vmp->vm_endpoint);
 
        /* Allocate things necessary for this chunk of memory. */
        if(!SLABALLOC(newphysr))
-               return ENOMEM;
+               return NULL;
        if(!SLABALLOC(newpb)) {
                SLABFREE(newphysr);
-               return ENOMEM;
+               return NULL;
        }
 
        /* Memory for new physical block. */
@@ -545,10 +617,15 @@ struct phys_region *physhint;
                u32_t af = PAF_CLEAR;
                if(region->flags & VR_PHYS64K)
                        af |= PAF_ALIGN64K;
+               if(region->flags & VR_LOWER16MB)
+                       af |= PAF_LOWER16MB;
+               if(region->flags & VR_LOWER1MB)
+                       af |= PAF_LOWER1MB;
                if((mem_clicks = ALLOC_MEM(clicks, af)) == NO_MEM) {
                        SLABFREE(newpb);
                        SLABFREE(newphysr);
-                       return ENOMEM;
+                       printf("map_new_physblock: couldn't allocate\n");
+                       return NULL;
                }
                mem = CLICK2ABS(mem_clicks);
        } else {
@@ -557,54 +634,37 @@ struct phys_region *physhint;
        SANITYCHECK(SCL_DETAIL);
 
        /* New physical block. */
+       USE(newpb,
        newpb->phys = mem;
        newpb->refcount = 1;
-       newpb->offset = offset;
        newpb->length = length;
-       newpb->firstregion = newphysr;
-       SLABSANE(newpb->firstregion);
+       newpb->firstregion = newphysr;);
 
        /* New physical region. */
+       USE(newphysr,
+       newphysr->offset = offset;
        newphysr->ph = newpb;
        newphysr->parent = region;
-       newphysr->next_ph_list = NULL;  /* No other references to this block. */
+       newphysr->next_ph_list = NULL;  /* No other references to this block. */);
 
        /* Update pagetable. */
        vm_assert(!(length % VM_PAGE_SIZE));
        vm_assert(!(newpb->length % VM_PAGE_SIZE));
        SANITYCHECK(SCL_DETAIL);
-       if(map_ph_writept(vmp, region, newpb, NULL, NULL) != OK) {
+       if(map_ph_writept(vmp, region, newphysr) != OK) {
                if(what_mem == MAP_NONE)
                        FREE_MEM(mem_clicks, clicks);
                SLABFREE(newpb);
                SLABFREE(newphysr);
-               return ENOMEM;
+               printf("map_new_physblock: map_ph_writept failed\n");
+               return NULL;
        }
 
-       if(!region->first || offset < region->first->ph->offset) {
-               /* Special case: offset is before start. */
-               if(region->first) {
-                       vm_assert(offset + length <= region->first->ph->offset);
-               }
-               newphysr->next = region->first;
-               region->first = newphysr;
-       } else {
-               struct phys_region *physr;
-               for(physr = physhint; physr; physr = physr->next) {
-                       if(!physr->next || physr->next->ph->offset > offset) {
-                               newphysr->next = physr->next;
-                               physr->next = newphysr;
-                               break;
-                       }
-               }
-
-               /* Loop must have put the node somewhere. */
-               vm_assert(physr->next == newphysr);
-       }
+       physr_insert(region->phys, newphysr);
 
        SANITYCHECK(SCL_FUNCTIONS);
 
-       return OK;
+       return newphysr;
 }
 
 
@@ -637,7 +697,11 @@ struct phys_region *ph;
        vm_assert(CLICK2ABS(clicks) == ph->ph->length);
        if(region->flags & VR_PHYS64K)
                af |= PAF_ALIGN64K;
+
+       NOTRUNNABLE(vmp->vm_endpoint);
+
        if((newmem_cl = ALLOC_MEM(clicks, af)) == NO_MEM) {
+               printf("VM: map_copy_ph_block: couldn't allocate new block\n");
                SLABFREE(newpb);
                return ENOMEM;
        }
@@ -645,15 +709,17 @@ struct phys_region *ph;
        vm_assert(ABS2CLICK(newmem) == newmem_cl);
 
        pb_unreferenced(region, ph);
-       SLABSANE(ph);
-       SLABSANE(ph->ph);
        vm_assert(ph->ph->refcount > 0);
+
+USE(newpb,
        newpb->length = ph->ph->length;
-       newpb->offset = ph->ph->offset;
        newpb->refcount = 1;
        newpb->phys = newmem;
-       newpb->firstregion = ph;
-       ph->next_ph_list = NULL;
+       newpb->firstregion = ph;);
+
+       USE(ph, ph->next_ph_list = NULL;);
+
+       NOTRUNNABLE(vmp->vm_endpoint);
 
        /* Copy old memory to new memory. */
        if((r=sys_abscopy(ph->ph->phys, newpb->phys, newpb->length)) != OK) {
@@ -667,7 +733,7 @@ struct phys_region *ph;
 #endif
 
        /* Reference new block. */
-       ph->ph = newpb;
+       USE(ph, ph->ph = newpb;);
 
        /* Check reference counts. */
        SANITYCHECK(SCL_DETAIL);
@@ -675,7 +741,7 @@ struct phys_region *ph;
        /* Update pagetable with new address.
         * This will also make it writable.
         */
-       r = map_ph_writept(vmp, region, ph->ph, NULL, NULL);
+       r = map_ph_writept(vmp, region, ph);
        if(r != OK)
                vm_panic("map_copy_ph_block: map_ph_writept failed", r);
 
@@ -695,7 +761,7 @@ int write;
 {
        vir_bytes virpage;
        struct phys_region *ph;
-       int r;
+       int r = OK;
 
        vm_assert(offset >= 0);
        vm_assert(offset < region->length);
@@ -707,30 +773,30 @@ int write;
 
        SANITYCHECK(SCL_FUNCTIONS);
 
-       for(ph = region->first; ph; ph = ph->next)
-               if(ph->ph->offset <= offset && offset < ph->ph->offset + ph->ph->length)
-                       break;
+       NOTRUNNABLE(vmp->vm_endpoint);
 
-       if(ph) {
+       if((ph = physr_search(region->phys, offset, AVL_LESS_EQUAL)) &&
+          (ph->offset <= offset && offset < ph->offset + ph->ph->length)) {
                /* Pagefault in existing block. Do copy-on-write. */
                vm_assert(write);
                vm_assert(region->flags & VR_WRITABLE);
                vm_assert(ph->ph->refcount > 0);
 
-               if(ph->ph->refcount == 1)
-                       r = map_ph_writept(vmp, region, ph->ph, NULL, NULL);
-               else
+               if(WRITABLE(region, ph->ph)) {
+                       r = map_ph_writept(vmp, region, ph);
+                       if(r != OK)
+                               printf("map_ph_writept failed\n");
+               } else {
                        r = map_copy_ph_block(vmp, region, ph);
+                       if(r != OK)
+                               printf("map_copy_ph_block failed\n");
+               }
        } else {
                /* Pagefault in non-existing block. Map in new block. */
-#if 0
-               if(!write) {
-                       printf("VM: read from uninitialized memory by %d\n",
-                               vmp->vm_endpoint);
+               if(!map_new_physblock(vmp, region, virpage, VM_PAGE_SIZE, MAP_NONE)) {
+                       printf("map_new_physblock failed\n");
+                       r = ENOMEM;
                }
-#endif
-               r = map_new_physblock(vmp, region, virpage, VM_PAGE_SIZE,
-                       MAP_NONE, region->first);
        }
 
        if(r != OK)
@@ -750,21 +816,26 @@ struct vir_region *region;
 vir_bytes offset, length;
 int write;
 {
-       struct phys_region *physr;
+       struct phys_region *physr, *nextphysr;
        int changes = 0;
+       physr_iter iter;
+
+       NOTRUNNABLE(vmp->vm_endpoint);
 
 #define FREE_RANGE_HERE(er1, er2) {                                    \
        struct phys_region *r1 = (er1), *r2 = (er2);                    \
        vir_bytes start = offset, end = offset + length;                \
-       if(r1) { start = MAX(start, r1->ph->offset + r1->ph->length); } \
-       if(r2) { end   = MIN(end, r2->ph->offset); }                    \
+       if(r1) {                                                        \
+               start = MAX(start, r1->offset + r1->ph->length); }      \
+       if(r2) {                                                        \
+               end   = MIN(end, r2->offset); }                         \
        if(start < end) {                                               \
                int r;                                                  \
                SANITYCHECK(SCL_DETAIL);                                \
-               if((r=map_new_physblock(vmp, region, start,             \
-                       end-start, MAP_NONE, r1 ? r1 : r2)) != OK) {    \
+               if(!map_new_physblock(vmp, region, start,               \
+                       end-start, MAP_NONE) != OK) {                   \
                        SANITYCHECK(SCL_DETAIL);                        \
-                       return r;                                       \
+                       return ENOMEM;                                  \
                }                                                       \
                changes++;                                              \
        } }
@@ -777,16 +848,29 @@ int write;
        vm_assert(!(length % VM_PAGE_SIZE));
        vm_assert(!write || (region->flags & VR_WRITABLE));
 
-       FREE_RANGE_HERE(NULL, region->first);
+       physr_start_iter(region->phys, &iter, offset, AVL_LESS_EQUAL);
+       physr = physr_get_iter(&iter);
+
+       if(!physr || offset < physr->offset) {
+               physr_iter previter;
+               struct phys_region *prevphysr;
+               previter = iter;
+               physr_decr_iter(&iter);
+               prevphysr = physr_get_iter(&iter);
+
+               FREE_RANGE_HERE(prevphysr, physr);
+
+               iter = previter;
+       }
 
-       for(physr = region->first; physr; physr = physr->next) {
+       while(physr) {
                int r;
 
                SANITYCHECK(SCL_DETAIL);
 
                if(write) {
                  vm_assert(physr->ph->refcount > 0);
-                 if(physr->ph->refcount > 1) {
+                 if(!WRITABLE(region, physr->ph)) {
                        SANITYCHECK(SCL_DETAIL);
                        r = map_copy_ph_block(vmp, region, physr);
                        if(r != OK) {
@@ -797,7 +881,7 @@ int write;
                        SANITYCHECK(SCL_DETAIL);
                  } else {
                        SANITYCHECK(SCL_DETAIL);
-                       if((r=map_ph_writept(vmp, region, physr->ph, NULL, NULL)) != OK) {
+                       if((r=map_ph_writept(vmp, region, physr)) != OK) {
                                printf("VM: map_ph_writept failed\n");
                                return r;
                        }
@@ -807,17 +891,20 @@ int write;
                }
 
                SANITYCHECK(SCL_DETAIL);
-               FREE_RANGE_HERE(physr, physr->next);
+               physr_incr_iter(&iter);
+               nextphysr = physr_get_iter(&iter);
+               FREE_RANGE_HERE(physr, nextphysr);
                SANITYCHECK(SCL_DETAIL);
+               if(nextphysr) {
+                       if(nextphysr->offset >= offset + length)
+                               break;
+               }
+               physr = nextphysr;
        }
 
        SANITYCHECK(SCL_FUNCTIONS);
 
-#if SANITYCHECKS
-       if(changes == 0)  {
-               vm_panic("no changes?!", changes);
-       }
-#endif
+       vm_assert(changes > 0);
 
        return OK;
 }
@@ -827,8 +914,12 @@ static int countregions(struct vir_region *vr)
 {
        int n = 0;
        struct phys_region *ph;
-       for(ph = vr->first; ph; ph = ph->next)
+       physr_iter iter;
+       physr_start_iter_least(vr->phys, &iter);
+       while((ph = physr_get_iter(&iter))) {
                n++;
+               physr_incr_iter(&iter);
+       }
        return n;
 }
 #endif
@@ -836,7 +927,7 @@ static int countregions(struct vir_region *vr)
 /*===========================================================================*
  *                             map_copy_region                         *
  *===========================================================================*/
-PRIVATE struct vir_region *map_copy_region(struct vir_region *vr)
+PRIVATE struct vir_region *map_copy_region(struct vmproc *vmp, struct vir_region *vr)
 {
        /* map_copy_region creates a complete copy of the vir_region
         * data structure, linking in the same phys_blocks directly,
@@ -847,34 +938,46 @@ PRIVATE struct vir_region *map_copy_region(struct vir_region *vr)
         * within this function.
         */
        struct vir_region *newvr;
-       struct phys_region *ph, *prevph = NULL;
+       struct phys_region *ph;
+       physr_iter iter;
+       physr_avl *phavl;
 #if SANITYCHECKS
        int cr;
        cr = countregions(vr);
 #endif
+
        if(!SLABALLOC(newvr))
                return NULL;
-       *newvr = *vr;
-       newvr->first = NULL;
-       newvr->next = NULL;
+       SLABALLOC(phavl);
+       if(!phavl) {
+               SLABFREE(newvr);
+               return NULL;
+       }
+       USE(newvr,
+               *newvr = *vr;
+               newvr->next = NULL;
+               newvr->phys = phavl;
+       );
+       physr_init(newvr->phys);
 
        SANITYCHECK(SCL_FUNCTIONS);
 
-       for(ph = vr->first; ph; ph = ph->next) {
+       physr_start_iter_least(vr->phys, &iter);
+       while((ph = physr_get_iter(&iter))) {
                struct phys_region *newph;
                if(!SLABALLOC(newph)) {
-                       map_free(newvr);
+                       map_free(vmp, newvr);
                        return NULL;
                }
-               newph->next = NULL;
+               USE(newph,
                newph->ph = ph->ph;
                newph->next_ph_list = NULL;
                newph->parent = newvr;
-               if(prevph) prevph->next = newph;
-               else newvr->first = newph;
-               prevph = newph;
+               newph->offset = ph->offset;);
+               physr_insert(newvr->phys, newph);
                SANITYCHECK(SCL_DETAIL);
                vm_assert(countregions(vr) == cr);
+               physr_incr_iter(&iter);
        }
 
        vm_assert(countregions(vr) == countregions(newvr));
@@ -891,12 +994,19 @@ PUBLIC int map_writept(struct vmproc *vmp)
 {
        struct vir_region *vr;
        struct phys_region *ph;
-       int ropages = 0, rwpages = 0;
+       int r;
 
-       for(vr = vmp->vm_regions; vr; vr = vr->next)
-               for(ph = vr->first; ph; ph = ph->next) {
-                       map_ph_writept(vmp, vr, ph->ph, &ropages, &rwpages);
+       for(vr = vmp->vm_regions; vr; vr = vr->next) {
+               physr_iter iter;
+               physr_start_iter_least(vr->phys, &iter);
+               while((ph = physr_get_iter(&iter))) {
+                       if((r=map_ph_writept(vmp, vr, ph)) != OK) {
+                               printf("VM: map_writept: failed\n");
+                               return r;
+                       }
+                       physr_incr_iter(&iter);
                }
+       }
 
        return OK;
 }
@@ -912,54 +1022,64 @@ struct vmproc *src;
        dst->vm_regions = NULL;
 
        SANITYCHECK(SCL_FUNCTIONS);
+
+       PT_SANE(&src->vm_pt);
+
        for(vr = src->vm_regions; vr; vr = vr->next) {
+               physr_iter iter_orig, iter_new;
                struct vir_region *newvr;
                struct phys_region *orig_ph, *new_ph;
        SANITYCHECK(SCL_DETAIL);
-               if(!(newvr = map_copy_region(vr))) {
+               if(!(newvr = map_copy_region(dst, vr))) {
                        map_free_proc(dst);
        SANITYCHECK(SCL_FUNCTIONS);
                        return ENOMEM;
                }
                SANITYCHECK(SCL_DETAIL);
-               newvr->parent = dst;
-               if(prevvr) { prevvr->next = newvr; }
+               USE(newvr, newvr->parent = dst;);
+               if(prevvr) { USE(prevvr, prevvr->next = newvr;); }
                else { dst->vm_regions = newvr; }
-               new_ph = newvr->first;
-               for(orig_ph = vr->first; orig_ph; orig_ph = orig_ph->next) {
+               physr_start_iter_least(vr->phys, &iter_orig);
+               physr_start_iter_least(newvr->phys, &iter_new);
+               while((orig_ph = physr_get_iter(&iter_orig))) {
                        struct phys_block *pb;
+                       new_ph = physr_get_iter(&iter_new);
                        /* Check two physregions both are nonnull,
                         * are different, and match physblocks.
                         */
-                       vm_assert(orig_ph && new_ph);
+                       vm_assert(new_ph);
+                       vm_assert(orig_ph);
                        vm_assert(orig_ph != new_ph);
                        pb = orig_ph->ph;
                        vm_assert(pb == new_ph->ph);
 
                        /* Link in new physregion. */
                        vm_assert(!new_ph->next_ph_list);
-                       new_ph->next_ph_list = pb->firstregion;
-                       pb->firstregion = new_ph;
-                       SLABSANE(new_ph);
-                       SLABSANE(new_ph->next_ph_list);
+                       USE(new_ph, new_ph->next_ph_list = pb->firstregion;);
+                       USE(pb, pb->firstregion = new_ph;);
 
                        /* Increase phys block refcount */
                        vm_assert(pb->refcount > 0);
-                       pb->refcount++;
+                       USE(pb, pb->refcount++;);
                        vm_assert(pb->refcount > 1);
 
                        /* Get next new physregion */
-                       new_ph = new_ph->next;
+                       physr_incr_iter(&iter_orig);
+                       physr_incr_iter(&iter_new);
                }
-               vm_assert(!new_ph);
+               vm_assert(!physr_get_iter(&iter_new));
                SANITYCHECK(SCL_DETAIL);
                prevvr = newvr;
        SANITYCHECK(SCL_DETAIL);
        }
        SANITYCHECK(SCL_DETAIL);
 
+       PT_SANE(&src->vm_pt);
+
        map_writept(src);
+       PT_SANE(&src->vm_pt);
        map_writept(dst);
+       PT_SANE(&dst->vm_pt);
 
        SANITYCHECK(SCL_FUNCTIONS);
        return OK;
@@ -1015,7 +1135,7 @@ PUBLIC int map_region_extend(struct vmproc *vmp, struct vir_region *vr,
        }
 
        if(!vr->next || end + delta <= vr->next->vaddr) {
-               vr->length += delta;
+               USE(vr, vr->length += delta;);
                return OK;
        }
 
@@ -1055,7 +1175,7 @@ u32_t tag;
 
 PUBLIC void map_region_set_tag(struct vir_region *vr, u32_t tag)
 {
-       vr->tag = tag;
+       USE(vr, vr->tag = tag;);
 }
 
 PUBLIC u32_t map_region_get_tag(struct vir_region *vr)
@@ -1066,9 +1186,14 @@ PUBLIC u32_t map_region_get_tag(struct vir_region *vr)
 /*========================================================================*
  *                             map_unmap_region                        *
  *========================================================================*/
-PUBLIC int map_unmap_region(struct vmproc *vmp, struct vir_region *region)
+PUBLIC int map_unmap_region(struct vmproc *vmp, struct vir_region *region,
+       vir_bytes len)
 {
+/* Shrink the region by 'len' bytes, from the start. Unreference
+ * memory it used to reference if any.
+ */
        struct vir_region *r, *nextr, *prev = NULL;
+       vir_bytes regionstart;
 
        SANITYCHECK(SCL_FUNCTIONS);
 
@@ -1084,16 +1209,53 @@ PUBLIC int map_unmap_region(struct vmproc *vmp, struct vir_region *region)
        if(r == NULL)
                vm_panic("map_unmap_region: region not found\n", NO_NUM);
 
-       if(!prev)
-               vmp->vm_regions = r->next;
-       else
-               prev->next = r->next;
-       map_free(r);
+       if(len > r->length || (len % VM_PAGE_SIZE)) {
+               printf("VM: bogus length 0x%lx\n", len);
+               return EINVAL;
+       }
+
+       if(!(r->flags & VR_ANON)) {
+               printf("VM: only unmap anonymous memory\n");
+               return EINVAL;
+       }
+
+       regionstart = r->vaddr;
+
+       if(len == r->length) {
+               /* Whole region disappears. Unlink and free it. */
+               if(!prev) {
+                       vmp->vm_regions = r->next;
+               } else {
+                       USE(prev, prev->next = r->next;);
+               }
+               map_free(vmp, r);
+       } else {
+               struct phys_region *pr;
+               physr_iter iter;
+               /* Region shrinks. First unreference its memory
+                * and then shrink the region.
+                */
+               map_subfree(vmp, r, len);
+               USE(r,
+               r->vaddr += len;
+               r->length -= len;);
+               physr_start_iter_least(r->phys, &iter);
+
+               /* vaddr has increased; to make all the phys_regions
+                * point to the same addresses, make them shrink by the
+                * same amount.
+                */
+               while((pr = physr_get_iter(&iter))) {
+                       vm_assert(pr->offset >= len);
+                       USE(pr, pr->offset -= len;);
+                       physr_incr_iter(&iter);
+               }
+       }
 
        SANITYCHECK(SCL_DETAIL);
 
-       if(pt_writemap(&vmp->vm_pt, r->vaddr,
-         MAP_NONE, r->length, 0, WMF_OVERWRITE) != OK) {
+       if(pt_writemap(&vmp->vm_pt, regionstart,
+         MAP_NONE, len, 0, WMF_OVERWRITE) != OK) {
            printf("VM: map_unmap_region: pt_writemap failed\n");
            return ENOMEM;
        }
@@ -1102,3 +1264,159 @@ PUBLIC int map_unmap_region(struct vmproc *vmp, struct vir_region *region)
 
        return OK;
 }
+
+/*========================================================================*
+ *                             map_remap                                 *
+ *========================================================================*/
+PUBLIC int map_remap(struct vmproc *dvmp, vir_bytes da, size_t size,
+               struct vir_region *region, vir_bytes *r)
+{
+       struct vir_region *vr, *prev;
+       struct phys_region *ph;
+       vir_bytes startv, dst_addr;
+       physr_iter iter;
+
+       SANITYCHECK(SCL_FUNCTIONS);
+
+       /* da is handled differently */
+       if (!da)
+               dst_addr = dvmp->vm_stacktop;
+       else
+               dst_addr = da;
+       dst_addr = arch_vir2map(dvmp, dst_addr);
+
+       prev = NULL;
+       /* round up to page size */
+       if (size % I386_PAGE_SIZE)
+               size += I386_PAGE_SIZE - size % I386_PAGE_SIZE;
+       startv = region_find_slot(dvmp, dst_addr, VM_DATATOP, size, &prev);
+       if (startv == (vir_bytes) -1) {
+               printf("map_remap: search %x...\n", dst_addr);
+               map_printmap(dvmp);
+               return ENOMEM;
+       }
+       /* when the user specifies the address, we cannot change it */
+       if (da && (startv != dst_addr))
+               return EINVAL;
+
+       vr = map_copy_region(dvmp, region);
+       if(!vr)
+               return ENOMEM;
+
+       USE(vr,
+       vr->vaddr = startv;
+       vr->length = size;
+       vr->flags = region->flags;
+       vr->tag = VRT_NONE;
+       vr->parent = dvmp;);
+       vm_assert(vr->flags & VR_SHARED);
+
+       if (prev) {
+               USE(vr,
+               vr->next = prev->next;);
+               USE(prev, prev->next = vr;);
+       } else {
+               USE(vr,
+               vr->next = dvmp->vm_regions;);
+               dvmp->vm_regions = vr;
+       }
+
+       physr_start_iter_least(vr->phys, &iter);
+       while((ph = physr_get_iter(&iter))) {
+               struct phys_block *pb = ph->ph;
+               USE(pb, pb->refcount++;);
+               if(map_ph_writept(dvmp, vr, ph) != OK) {
+                       vm_panic("map_remap: map_ph_writept failed", NO_NUM);
+               }
+
+               physr_incr_iter(&iter);
+       }
+
+       *r = startv;
+
+       SANITYCHECK(SCL_FUNCTIONS);
+
+       return OK;
+}
+
+/*========================================================================*
+ *                             map_get_phys                              *
+ *========================================================================*/
+PUBLIC int map_get_phys(struct vmproc *vmp, vir_bytes addr, phys_bytes *r)
+{
+       struct vir_region *vr;
+       struct phys_region *ph;
+       physr_iter iter;
+
+       if (!(vr = map_lookup(vmp, addr)) ||
+               (vr->vaddr != addr))
+               return EINVAL;
+
+       if (!(vr->flags & VR_SHARED))
+               return EINVAL;
+
+       physr_start_iter_least(vr->phys, &iter);
+       ph = physr_get_iter(&iter);
+
+       vm_assert(ph);
+       vm_assert(ph->ph);
+       if (r)
+               *r = ph->ph->phys;
+
+       return OK;
+}
+
+/*========================================================================*
+ *                             map_get_ref                               *
+ *========================================================================*/
+PUBLIC int map_get_ref(struct vmproc *vmp, vir_bytes addr, u8_t *cnt)
+{
+       struct vir_region *vr;
+       struct phys_region *ph;
+       physr_iter iter;
+
+       if (!(vr = map_lookup(vmp, addr)) ||
+               (vr->vaddr != addr))
+               return EINVAL;
+
+       if (!(vr->flags & VR_SHARED))
+               return EINVAL;
+
+       physr_start_iter_least(vr->phys, &iter);
+       ph = physr_get_iter(&iter);
+
+       vm_assert(ph);
+       vm_assert(ph->ph);
+       if (cnt)
+               *cnt = ph->ph->refcount;
+
+       return OK;
+}
+
+
+/*========================================================================*
+ *                             regionprintstats                          *
+ *========================================================================*/
+PUBLIC void printregionstats(struct vmproc *vmp)
+{
+       struct vir_region *vr;
+       struct phys_region *pr;
+       physr_iter iter;
+       vir_bytes used = 0, weighted = 0;
+
+       for(vr = vmp->vm_regions; vr; vr = vr->next) {
+               if(vr->flags & VR_DIRECT)
+                       continue;
+               physr_start_iter_least(vr->phys, &iter);
+               while((pr = physr_get_iter(&iter))) {
+                       physr_incr_iter(&iter);
+                       used += pr->ph->length;
+                       weighted += pr->ph->length / pr->ph->refcount;
+               }
+       }
+
+       printf("%6dkB  %6dkB\n", used/1024, weighted/1024);
+
+       return;
+}
+
index 9e4cf9f026cc524a47e05f7a2089d182a0530493..9eaf1d41a749b124ff084a92d77d542c42536a91 100644 (file)
@@ -2,11 +2,24 @@
 #ifndef _REGION_H
 #define _REGION_H 1
 
+#include <minix/callnr.h>
+#include <minix/com.h>
+#include <minix/config.h>
+#include <minix/const.h>
+#include <minix/ds.h>
+#include <minix/endpoint.h>
+#include <minix/keymap.h>
+#include <minix/minlib.h>
+#include <minix/type.h>
+#include <minix/ipc.h>
+#include <minix/sysutil.h>
+#include <minix/syslib.h>
+#include <minix/const.h>
+
 struct phys_block {
 #if SANITYCHECKS
        u32_t                   seencount;
 #endif
-       vir_bytes               offset; /* offset from start of vir region */
        vir_bytes               length; /* no. of contiguous bytes */
        phys_bytes              phys;   /* physical memory */
        u8_t                    refcount;       /* Refcount of these pages */
@@ -15,33 +28,42 @@ struct phys_block {
        struct phys_region      *firstregion;   
 };
 
-struct phys_region {
-       struct phys_region      *next;  /* next contiguous block */
+typedef struct phys_region {
        struct phys_block       *ph;
-       struct vir_region       *parent; /* Region that owns this phys_region. */
+       struct vir_region       *parent; /* parent vir_region. */
+       vir_bytes               offset; /* offset from start of vir region */
 
        /* list of phys_regions that reference the same phys_block */
        struct phys_region      *next_ph_list;  
-};
+
+       /* AVL fields */
+       struct phys_region      *less, *greater;
+       int                     factor;
+} phys_region_t;
+
+#include "physravl.h"
 
 struct vir_region {
        struct vir_region *next; /* next virtual region in this process */
        vir_bytes       vaddr;  /* virtual address, offset from pagetable */
        vir_bytes       length; /* length in bytes */
-       struct  phys_region *first; /* phys regions in vir region */
+       physr_avl       *phys;  /* avl tree of physical memory blocks */
        u16_t           flags;
        u32_t tag;              /* Opaque to mapping code. */
        struct vmproc *parent;  /* Process that owns this vir_region. */
 };
 
 /* Mapping flags: */
-#define VR_WRITABLE    0x01    /* Process may write here. */
-#define VR_NOPF                0x02    /* May not generate page faults. */
-#define VR_PHYS64K     0x04    /* Physical memory must be 64k aligned. */
+#define VR_WRITABLE    0x001   /* Process may write here. */
+#define VR_NOPF                0x002   /* May not generate page faults. */
+#define VR_PHYS64K     0x004   /* Physical memory must be 64k aligned. */
+#define VR_LOWER16MB   0x008
+#define VR_LOWER1MB    0x010
 
 /* Mapping type: */
-#define VR_ANON                0x10    /* Memory to be cleared and allocated */
-#define VR_DIRECT      0x20    /* Mapped, but not managed by VM */
+#define VR_ANON                0x100   /* Memory to be cleared and allocated */
+#define VR_DIRECT      0x200   /* Mapped, but not managed by VM */
+#define VR_SHARED      0x40
 
 /* Tag values: */
 #define VRT_NONE       0xBEEF0000
diff --git a/servers/vm/rs.c b/servers/vm/rs.c
new file mode 100644 (file)
index 0000000..4473c67
--- /dev/null
@@ -0,0 +1,56 @@
+
+#define _SYSTEM 1
+
+#define VERBOSE 0
+
+#include <minix/callnr.h>
+#include <minix/com.h>
+#include <minix/config.h>
+#include <minix/const.h>
+#include <minix/ds.h>
+#include <minix/endpoint.h>
+#include <minix/keymap.h>
+#include <minix/minlib.h>
+#include <minix/type.h>
+#include <minix/ipc.h>
+#include <minix/sysutil.h>
+#include <minix/syslib.h>
+#include <minix/safecopies.h>
+#include <minix/bitmap.h>
+
+#include <errno.h>
+#include <string.h>
+#include <env.h>
+#include <stdio.h>
+
+#include "glo.h"
+#include "proto.h"
+#include "util.h"
+
+/*===========================================================================*
+ *                             do_rs_set_priv                               *
+ *===========================================================================*/
+PUBLIC int do_rs_set_priv(message *m)
+{
+       int r, n, nr;
+       struct vmproc *vmp;
+
+       nr = m->VM_RS_NR;
+
+       if ((r = vm_isokendpt(nr, &n)) != OK) {
+               printf("do_rs_set_priv: message from strange source %d\n", nr);
+               return EINVAL;
+       }
+
+       vmp = &vmproc[n];
+
+       if (m->VM_RS_BUF) {
+               r = sys_datacopy(m->m_source, (vir_bytes) m->VM_RS_BUF,
+                                SELF, (vir_bytes) vmp->vm_call_priv_mask,
+                                sizeof(vmp->vm_call_priv_mask));
+               if (r != OK)
+                       return r;
+       }
+       return OK;
+}
+
index dac7b83e7bfa63a466d9c09a26bfa52cc79509f0..a0a67729e90a2bc0a551c750983b20a2d97b8a6b 100644 (file)
         printf("VM:%s:%d: %s failed\n", file, line, #c); \
        vm_panic("sanity check failed", NO_NUM); } } while(0)
 
+#define SLABSANITYCHECK(l) if((l) <= vm_sanitychecklevel) { \
+       slab_sanitycheck(__FILE__, __LINE__); }
+
 #define SANITYCHECK(l) if(!nocheck && ((l) <= vm_sanitychecklevel)) {  \
-               int failflag = 0; \
-               u32_t *origptr = CHECKADDR;\
-               int _sanep; \
                struct vmproc *vmp;     \
-                                       \
-                for(_sanep = 0; _sanep < sizeof(data1) / sizeof(*origptr); \
-                       _sanep++) {    \
-                        if(origptr[_sanep] != data1[_sanep]) {    \
-                                printf("%d: %08lx != %08lx  ", \
-               _sanep, origptr[_sanep], data1[_sanep]); failflag = 1;   \
-                        }                       \
-                }                               \
-        if(failflag) {                         \
-               printf("%s:%d: memory corruption test failed\n", \
-                       __FILE__, __LINE__);            \
-               vm_panic("memory corruption", NO_NUM);  \
-       }  \
-       for(vmp = vmproc; vmp <= &vmproc[_NR_PROCS]; vmp++) { \
+               vm_assert(incheck == 0);        \
+               incheck = 1;            \
+               usedpages_reset();      \
+       slab_sanitycheck(__FILE__, __LINE__);   \
+       for(vmp = vmproc; vmp < &vmproc[VMP_NR]; vmp++) { \
                if((vmp->vm_flags & (VMF_INUSE | VMF_HASPT)) == \
                        (VMF_INUSE | VMF_HASPT)) { \
-                       pt_sanitycheck(&vmp->vm_pt, __FILE__, __LINE__); \
+                       PT_SANE(&vmp->vm_pt); \
                } \
        } \
        map_sanitycheck(__FILE__, __LINE__); \
+       vm_assert(incheck == 1);        \
+       incheck = 0;            \
        } 
+
+#include "../../kernel/proc.h"
+
+#define USE(obj, code) do {            \
+       slabunlock(obj, sizeof(*obj));  \
+       do {                            \
+               code                    \
+       } while(0);                     \
+       slablock(obj, sizeof(*obj));    \
+} while(0)
+
+#define SLABSANE(ptr) { \
+       if(!slabsane_f(__FILE__, __LINE__, ptr, sizeof(*(ptr)))) { \
+               printf("VM:%s:%d: SLABSANE(%s)\n", __FILE__, __LINE__, #ptr); \
+               vm_panic("SLABSANE failed", NO_NUM);    \
+       } \
+}
+
+#define NOTRUNNABLE(ep) {                      \
+       struct proc pr;                         \
+       if(sys_getproc(&pr, ep) != OK) {        \
+               vm_panic("VM: sys_getproc failed", ep); \
+       }                                       \
+       if(!pr.p_rts_flags) {                   \
+               vm_panic("VM: runnable", ep);   \
+       }                                       \
+}
+
 #else
 #define SANITYCHECK 
+#define SLABSANITYCHECK(l)
+#define USE(obj, code) do { code } while(0)
+#define SLABSANE(ptr)
+#define NOTRUNNABLE(ep)
 #endif
 
 #endif
index a43e76962fb6e4c9ab7df6eab4ffab967a9a676a..bc7555bb5b94f830a4a30ee614b40401627134ad 100644 (file)
@@ -13,6 +13,7 @@
 #include <minix/ipc.h>
 #include <minix/sysutil.h>
 #include <minix/syslib.h>
+#include <minix/bitmap.h>
 #include <sys/sigcontext.h>
 
 #include <errno.h>
index 2789303a83e9dff79214e150f617f79824b7b62a..2cc8c2e65191664dc49f7fa3d1fdc53ad46e7abd 100644 (file)
@@ -13,6 +13,8 @@
 #include <minix/ipc.h>
 #include <minix/sysutil.h>
 #include <minix/syslib.h>
+#include <minix/bitmap.h>
+#include <minix/debug.h>
 
 #include <errno.h>
 #include <string.h>
@@ -27,7 +29,7 @@
 
 #define SLABSIZES 60
 
-#define ITEMSPERPAGE(s, bytes) (DATABYTES / (bytes))
+#define ITEMSPERPAGE(bytes) (DATABYTES / (bytes))
 
 #define ELBITS         (sizeof(element_t)*8)
 #define BITPAT(b)      (1UL << ((b) %  ELBITS))
 #define OFF(f, b) vm_assert(!GETBIT(f, b))
 #define ON(f, b)  vm_assert(GETBIT(f, b))
 
+#if SANITYCHECKS
+#define SLABDATAWRITABLE(data, wr) do {                        \
+       vm_assert(data->sdh.writable == WRITABLE_NONE); \
+       vm_assert(wr != WRITABLE_NONE);                 \
+       vm_pagelock(data, 0);                           \
+       data->sdh.writable = wr;                        \
+} while(0)
+
+#define SLABDATAUNWRITABLE(data) do {                  \
+       vm_assert(data->sdh.writable != WRITABLE_NONE); \
+       data->sdh.writable = WRITABLE_NONE;             \
+       vm_pagelock(data, 1);                           \
+} while(0)
+
+#define SLABDATAUSE(data, code) do {                   \
+       SLABDATAWRITABLE(data, WRITABLE_HEADER);        \
+       code                                            \
+       SLABDATAUNWRITABLE(data);                       \
+} while(0)
+
+#else
+
+#define SLABDATAWRITABLE(data, wr)
+#define SLABDATAUNWRITABLE(data)
+#define SLABDATAUSE(data, code) do { code } while(0)
+
+#endif
+
 #define GETBIT(f, b)     (BITEL(f,b) &   BITPAT(b))
-#define SETBIT(f, b)   {OFF(f,b); (BITEL(f,b)|= BITPAT(b)); (f)->sdh.nused++; }
-#define CLEARBIT(f, b) {ON(f, b); (BITEL(f,b)&=~BITPAT(b)); (f)->sdh.nused--; (f)->sdh.freeguess = (b); }
+#define SETBIT(f, b)   {OFF(f,b); SLABDATAUSE(f, BITEL(f,b)|= BITPAT(b); (f)->sdh.nused++;); }
+#define CLEARBIT(f, b) {ON(f, b); SLABDATAUSE(f, BITEL(f,b)&=~BITPAT(b); (f)->sdh.nused--; (f)->sdh.freeguess = (b);); }
 
 #define MINSIZE 8
 #define MAXSIZE (SLABSIZES-1+MINSIZE)
@@ -56,28 +86,32 @@ typedef element_t elements_t[USEELEMENTS];
  * inconsistent state during a slaballoc() / slabfree(). So only do
  * our own sanity checks here, with SLABSANITYCHECK.
  */
-#if SANITYCHECKS
-#define SLABSANITYCHECK(l) if((l) <= vm_sanitychecklevel) { \
-       slab_sanitycheck(__FILE__, __LINE__); }
-#else
-#define SLABSANITYCHECK(l)
-#endif
+
+
+/* Special writable values. */
+#define WRITABLE_NONE  -2
+#define WRITABLE_HEADER        -1
 
 struct sdh {
-       u8_t list;
-       u16_t nused;    /* Number of data items used in this slab. */
 #if SANITYCHECKS
-       u32_t magic;
+       u32_t magic1;
 #endif
+       u8_t list;
+       u16_t nused;    /* Number of data items used in this slab. */
        int freeguess;
        struct slabdata *next, *prev;
        elements_t usebits;
        phys_bytes phys;
+#if SANITYCHECKS
+       int writable;   /* data item number or WRITABLE_* */
+       u32_t magic2;
+#endif
 };
 
 #define DATABYTES      (VM_PAGE_SIZE-sizeof(struct sdh))
 
-#define MAGIC 0x1f5b842f
+#define MAGIC1 0x1f5b842f
+#define MAGIC2 0x8bb5a420
 #define JUNK  0xdeadbeef
 #define NOJUNK 0xc0ffee
 
@@ -107,6 +141,7 @@ FORWARD _PROTOTYPE( int objstats, (void *, int, struct slabheader **, struct sla
 
 #define LH(sl, l) (sl)->list_head[l]
 
+/* move head of list l1 to list of l2 in slabheader sl. */
 #define MOVEHEAD(sl, l1, l2) {         \
        struct slabdata *t;             \
        vm_assert(LH(sl,l1));           \
@@ -114,28 +149,35 @@ FORWARD _PROTOTYPE( int objstats, (void *, int, struct slabheader **, struct sla
        ADDHEAD(t, sl, l2);             \
 }
 
+/* remove head of list 'list' in sl, assign it unlinked to 'to'. */
 #define REMOVEHEAD(sl, list, to) {     \
-       (to) = LH(sl, list);            \
-       vm_assert(to);                  \
-       LH(sl, list) = (to)->sdh.next;  \
-       if(LH(sl, list)) LH(sl, list) = NULL; \
-       vm_assert((to)->sdh.magic == MAGIC);\
-       vm_assert(!(to)->sdh.prev);             \
+       struct slabdata *dat;           \
+       dat = (to) = LH(sl, list);      \
+       vm_assert(dat);                 \
+       LH(sl, list) = dat->sdh.next;   \
+       UNLINKNODE(dat);                \
 }
 
+/* move slabdata nw to slabheader sl under list number l. */
 #define ADDHEAD(nw, sl, l) {                   \
-       vm_assert((nw)->sdh.magic == MAGIC);    \
-       (nw)->sdh.next = LH(sl, l);                     \
-       (nw)->sdh.prev = NULL;                  \
-       (nw)->sdh.list = l;                             \
+       SLABDATAUSE(nw,                         \
+               (nw)->sdh.next = LH(sl, l);     \
+               (nw)->sdh.prev = NULL;          \
+               (nw)->sdh.list = l;);           \
        LH(sl, l) = (nw);                       \
-       if((nw)->sdh.next) (nw)->sdh.next->sdh.prev = (nw);     \
+       if((nw)->sdh.next) {                    \
+               SLABDATAUSE((nw)->sdh.next, \
+                       (nw)->sdh.next->sdh.prev = (nw););      \
+       } \
 }
 
-#define UNLINKNODE(n)  {                               \
-       if((f)->sdh.prev) (f)->sdh.prev->sdh.next = (f)->sdh.next;      \
-       if((f)->sdh.next) (f)->sdh.next->sdh.prev = (f)->sdh.prev;      \
-       }
+#define UNLINKNODE(node)       {                               \
+       struct slabdata *next, *prev;                           \
+       prev = (node)->sdh.prev;                                \
+       next = (node)->sdh.next;                                \
+       if(prev) { SLABDATAUSE(prev, prev->sdh.next = next;); } \
+       if(next) { SLABDATAUSE(next, next->sdh.prev = prev;); } \
+}
 
 struct slabdata *newslabdata(int list)
 {
@@ -151,12 +193,18 @@ struct slabdata *newslabdata(int list)
 
        n->sdh.phys = p;
 #if SANITYCHECKS
-       n->sdh.magic = MAGIC;
+       n->sdh.magic1 = MAGIC1;
+       n->sdh.magic2 = MAGIC2;
 #endif
        n->sdh.nused = 0;
        n->sdh.freeguess = 0;
        n->sdh.list = list;
 
+#if SANITYCHECKS
+       n->sdh.writable = WRITABLE_HEADER;
+       SLABDATAUNWRITABLE(n);
+#endif
+
        return n;
 }
 
@@ -173,15 +221,17 @@ PRIVATE int checklist(char *file, int line,
 
        while(n) {
                int count = 0, i;
+               MYASSERT(n->sdh.magic1 == MAGIC1);
+               MYASSERT(n->sdh.magic2 == MAGIC2);
                MYASSERT(n->sdh.list == l);
-               MYASSERT(n->sdh.magic == MAGIC);
+               MYASSERT(usedpages_add(n->sdh.phys, VM_PAGE_SIZE) == OK);
                if(n->sdh.prev)
                        MYASSERT(n->sdh.prev->sdh.next == n);
                else
                        MYASSERT(s->list_head[l] == n);
                if(n->sdh.next) MYASSERT(n->sdh.next->sdh.prev == n);
                for(i = 0; i < USEELEMENTS*8; i++)
-                       if(i >= ITEMSPERPAGE(s, bytes))
+                       if(i >= ITEMSPERPAGE(bytes))
                                MYASSERT(!GETBIT(n, i));
                        else
                                if(GETBIT(n,i))
@@ -211,21 +261,25 @@ PUBLIC void slab_sanitycheck(char *file, int line)
 /*===========================================================================*
  *                             int slabsane                                 *
  *===========================================================================*/
-PUBLIC int slabsane(void *mem, int bytes)
+PUBLIC int slabsane_f(char *file, int line, void *mem, int bytes)
 {
        struct slabheader *s;
        struct slabdata *f;
        int i;
+
        return (objstats(mem, bytes, &s, &f, &i) == OK);
 }
 #endif
 
+static int nojunkwarning = 0;
+
 /*===========================================================================*
  *                             void *slaballoc                              *
  *===========================================================================*/
 PUBLIC void *slaballoc(int bytes)
 {
-       int i, n = 0;
+       int i;
+       int count = 0;
        struct slabheader *s;
        struct slabdata *firstused;
 
@@ -242,10 +296,10 @@ PUBLIC void *slaballoc(int bytes)
                /* Make sure there is something on the freelist. */
        SLABSANITYCHECK(SCL_DETAIL);
                if(!LH(s, LIST_FREE)) {
-                       struct slabdata *n = newslabdata(LIST_FREE);
+                       struct slabdata *nd = newslabdata(LIST_FREE);
        SLABSANITYCHECK(SCL_DETAIL);
-                       if(!n) return NULL;
-                       ADDHEAD(n, s, LIST_FREE);
+                       if(!nd) return NULL;
+                       ADDHEAD(nd, s, LIST_FREE);
        SLABSANITYCHECK(SCL_DETAIL);
                }
 
@@ -260,18 +314,21 @@ PUBLIC void *slaballoc(int bytes)
        vm_assert(s);
        firstused = LH(s, LIST_USED);
        vm_assert(firstused);
-       vm_assert(firstused->sdh.magic == MAGIC);
+       vm_assert(firstused->sdh.magic1 == MAGIC1);
+       vm_assert(firstused->sdh.magic2 == MAGIC2);
+       vm_assert(firstused->sdh.nused < ITEMSPERPAGE(bytes));
 
-       for(i = firstused->sdh.freeguess; n < ITEMSPERPAGE(s, bytes); n++, i++) {
+       for(i = firstused->sdh.freeguess;
+               count < ITEMSPERPAGE(bytes); count++, i++) {
        SLABSANITYCHECK(SCL_DETAIL);
-               i = i % ITEMSPERPAGE(s, bytes);
+               i = i % ITEMSPERPAGE(bytes);
 
                if(!GETBIT(firstused, i)) {
                        struct slabdata *f;
                        char *ret;
                        SETBIT(firstused, i);
        SLABSANITYCHECK(SCL_DETAIL);
-                       if(firstused->sdh.nused == ITEMSPERPAGE(s, bytes)) {
+                       if(firstused->sdh.nused == ITEMSPERPAGE(bytes)) {
        SLABSANITYCHECK(SCL_DETAIL);
                                MOVEHEAD(s, LIST_USED, LIST_FULL);
        SLABSANITYCHECK(SCL_DETAIL);
@@ -280,20 +337,21 @@ PUBLIC void *slaballoc(int bytes)
                        ret = ((char *) firstused->data) + i*bytes;
 
 #if SANITYCHECKS
-                       f = (struct slabdata *) ((char *) ret - (vir_bytes) ret % VM_PAGE_SIZE);
-                       if(f->sdh.magic != MAGIC) {
-                               printf("slaballoc bogus pointer 0x%lx, "
-                                       "rounded 0x%lx, bad magic 0x%lx\n",
-                                       ret, f, f->sdh.magic);
-                               vm_panic("slaballoc check failed", NO_NUM);
-                       }
+                       nojunkwarning++;
+                       slabunlock(ret, bytes);
+                       nojunkwarning--;
+                       vm_assert(!nojunkwarning);
                        *(u32_t *) ret = NOJUNK;
+                       slablock(ret, bytes);
 #endif
                        SLABSANITYCHECK(SCL_FUNCTIONS);
-                       firstused->sdh.freeguess = i+1;
+                       SLABDATAUSE(firstused, firstused->sdh.freeguess = i+1;);
 
 #if SANITYCHECKS
-                       if(!slabsane(ret, bytes))
+       if(bytes >= SLABSIZES+MINSIZE) {
+               printf("slaballoc: odd, bytes %d?\n", bytes);
+       }
+                       if(!slabsane_f(__FILE__, __LINE__, ret, bytes))
                                vm_panic("slaballoc: slabsane failed", NO_NUM);
 #endif
 
@@ -317,12 +375,16 @@ PUBLIC void *slaballoc(int bytes)
 PRIVATE int objstats(void *mem, int bytes,
        struct slabheader **sp, struct slabdata **fp, int *ip)
 {
+#if SANITYCHECKS
 #define OBJSTATSCHECK(cond) \
        if(!(cond)) { \
-               printf("VM:objstats: %s failed for ptr 0x%p, %d bytes\n", \
+               printf("VM: objstats: %s failed for ptr 0x%p, %d bytes\n", \
                        #cond, mem, bytes); \
                return EINVAL; \
        }
+#else
+#define OBJSTATSCHECK(cond)
+#endif
 
        struct slabheader *s;
        struct slabdata *f;
@@ -331,21 +393,19 @@ PRIVATE int objstats(void *mem, int bytes,
        OBJSTATSCHECK((char *) mem >= (char *) VM_PAGE_SIZE);
 
 #if SANITYCHECKS
-       if(*(u32_t *) mem == JUNK) {
+       if(*(u32_t *) mem == JUNK && !nojunkwarning) {
                util_stacktrace();
                printf("VM: WARNING: JUNK seen in slab object\n");
        }
 #endif
-
        /* Retrieve entry in slabs[]. */
        GETSLAB(bytes, s);
 
        /* Round address down to VM_PAGE_SIZE boundary to get header. */
        f = (struct slabdata *) ((char *) mem - (vir_bytes) mem % VM_PAGE_SIZE);
 
-#if SANITYCHECKS
-       OBJSTATSCHECK(f->sdh.magic == MAGIC);
-#endif
+       OBJSTATSCHECK(f->sdh.magic1 == MAGIC1);
+       OBJSTATSCHECK(f->sdh.magic2 == MAGIC2);
        OBJSTATSCHECK(f->sdh.list == LIST_USED || f->sdh.list == LIST_FULL);
 
        /* Make sure it's in range. */
@@ -379,22 +439,26 @@ PUBLIC void slabfree(void *mem, int bytes)
 
        SLABSANITYCHECK(SCL_FUNCTIONS);
 
+       if(objstats(mem, bytes, &s, &f, &i) != OK) {
+               vm_panic("slabfree objstats failed", NO_NUM);
+       }
+
 #if SANITYCHECKS
        if(*(u32_t *) mem == JUNK) {
                printf("VM: WARNING: likely double free, JUNK seen\n");
        }
+
+       slabunlock(mem, bytes);
+       *(u32_t *) mem = JUNK;
+       nojunkwarning++;
+       slablock(mem, bytes);
+       nojunkwarning--;
+       vm_assert(!nojunkwarning);
 #endif
-       if(objstats(mem, bytes, &s, &f, &i) != OK) {
-               vm_panic("slabfree objstats failed", NO_NUM);
-       }
 
        /* Free this data. */
        CLEARBIT(f, i);
 
-#if SANITYCHECKS
-       *(u32_t *) mem = JUNK;
-#endif
-
        /* Check if this slab changes lists. */
        if(f->sdh.nused == 0) {
                /* Now become FREE; must've been USED */
@@ -404,7 +468,7 @@ PUBLIC void slabfree(void *mem, int bytes)
                        LH(s, LIST_USED) = f->sdh.next;
                ADDHEAD(f, s, LIST_FREE);
                SLABSANITYCHECK(SCL_DETAIL);
-       } else if(f->sdh.nused == ITEMSPERPAGE(s, bytes)-1) {
+       } else if(f->sdh.nused == ITEMSPERPAGE(bytes)-1) {
                /* Now become USED; must've been FULL */
                vm_assert(f->sdh.list == LIST_FULL);
                UNLINKNODE(f);
@@ -422,6 +486,42 @@ PUBLIC void slabfree(void *mem, int bytes)
        return;
 }
 
+/*===========================================================================*
+ *                             void *slablock                               *
+ *===========================================================================*/
+PUBLIC void slablock(void *mem, int bytes)
+{
+       int i;
+       struct slabheader *s;
+       struct slabdata *f;
+
+       if(objstats(mem, bytes, &s, &f, &i) != OK)
+               vm_panic("slablock objstats failed", NO_NUM);
+
+       SLABDATAUNWRITABLE(f);
+
+       FIXME("verify new contents");
+
+       return;
+}
+
+/*===========================================================================*
+ *                             void *slabunlock                             *
+ *===========================================================================*/
+PUBLIC void slabunlock(void *mem, int bytes)
+{
+       int i;
+       struct slabheader *s;
+       struct slabdata *f;
+
+       if(objstats(mem, bytes, &s, &f, &i) != OK)
+               vm_panic("slablock objstats failed", NO_NUM);
+
+       SLABDATAWRITABLE(f, i);
+
+       return;
+}
+
 #if SANITYCHECKS
 /*===========================================================================*
  *                             void slabstats                               *
index 7d90821560996117e83ef741e37b2f5a2af71c99..2b5bd6374e4aac4c4905597e1fe72ead5f3d07a4 100644 (file)
@@ -8,15 +8,15 @@
 #define ELEMENTS(a) (sizeof(a)/sizeof((a)[0]))
 
 #if SANITYCHECKS
-#define vm_assert(cond) do {                           \
+#define vm_assert(cond) {                              \
        if(vm_sanitychecklevel > 0 && !(cond)) {        \
                printf("VM:%s:%d: assert failed: %s\n", \
                        __FILE__, __LINE__, #cond);     \
                panic("VM", "assert failed", NO_NUM);   \
        }                                               \
-       } while(0)
+       }
 #else
-#define vm_assert(cond)
+#define vm_assert(cond)        ;
 #endif
 
 #define vm_panic(str, n) { char _pline[100]; \
index 47ed47a40fab18014635cd0710c36edabc8d370b..09b4dbbc05cff2912a0ca4c6fcb60dd1d796b959 100644 (file)
 #include <minix/sysutil.h>
 #include <minix/syslib.h>
 #include <minix/type.h>
+#include <minix/bitmap.h>
 #include <string.h>
 #include <errno.h>
 #include <env.h>
 #include <unistd.h>
+#include <memory.h>
 
 #include "proto.h"
 #include "glo.h"
@@ -119,8 +121,8 @@ struct mem_map *map_ptr;                        /* memory to remove */
 PUBLIC int vm_isokendpt(endpoint_t endpoint, int *proc)
 {
         *proc = _ENDPOINT_P(endpoint);
-        if(*proc < -NR_TASKS || *proc >= NR_PROCS)
-                return EINVAL;
+        if(*proc < 0 || *proc >= NR_PROCS)
+               vm_panic("crazy slot number", *proc); 
         if(*proc >= 0 && endpoint != vmproc[*proc].vm_endpoint)
                 return EDEADSRCDST;
         if(*proc >= 0 && !(vmproc[*proc].vm_flags & VMF_INUSE))
@@ -163,3 +165,28 @@ char *brk_addr;
         return 0;
 }
 
+/*===========================================================================*
+ *                              do_ctl                                        *
+ *===========================================================================*/
+PUBLIC int do_ctl(message *m)
+{
+       int pages, nodes;
+       int pr;
+       struct vmproc *vmp;
+
+       switch(m->VCTL_WHAT) {
+               case VCTLP_STATS_MEM:
+                       printmemstats();
+                       break;
+               case VCTLP_STATS_EP:
+                       if(vm_isokendpt(m->VCTL_PARAM, &pr) != OK)
+                               return EINVAL;
+                       printregionstats(&vmproc[pr]);
+                       break;
+               default:
+                       return EINVAL;
+       }
+
+       return OK;
+}
+
index 62f89fea9ca765ef4b2b0d1da3fa47e8ab9dfe31..9f8d34899838fc7a44c73810f56d0d9684b4ddbc 100644 (file)
@@ -16,6 +16,7 @@
 #include <minix/sysutil.h>
 #include <minix/syslib.h>
 #include <minix/safecopies.h>
+#include <minix/bitmap.h>
 
 #include <errno.h>
 #include <string.h>
index 0f4040db7160e284826e6d58dde0f4ee3ee85106..53a9aac0aa272829fe79f1aa059d50088ea3611c 100644 (file)
@@ -5,6 +5,8 @@
 #define PAF_CLEAR      0x01    /* Clear physical memory. */
 #define PAF_CONTIG     0x02    /* Physically contiguous. */
 #define PAF_ALIGN64K   0x04    /* Aligned to 64k boundary. */
+#define PAF_LOWER16MB  0x08
+#define PAF_LOWER1MB   0x10
 
 /* special value for v in pt_allocmap */
 #define AM_AUTO         ((u32_t) -1)
 
 /* Compile in asserts and custom sanity checks at all? */
 #define SANITYCHECKS   0
-#define VMSTATS                1
+#define VMSTATS                0
+
+/* Minimum stack region size - 64MB. */
+#define MINSTACKREGION (64*1024*1024)
 
 /* If so, this level: */
 #define SCL_NONE       0       /* No sanity checks - vm_assert()s only. */
@@ -31,7 +36,9 @@
 #define VMP_CATEGORIES 4
 
 /* Flags to pt_writemap(). */
-#define WMF_OVERWRITE  0x01    /* Caller knows map may overwrite. */
+#define WMF_OVERWRITE          0x01    /* Caller knows map may overwrite. */
+#define WMF_WRITEFLAGSONLY     0x02    /* Copy physaddr and update flags. */
+#define WMF_FREE               0x04    /* Free pages overwritten. */
 
-/* Special value of 'what' to map_page_region meaning: unknown. */
 #define MAP_NONE       0xFFFFFFFE
+
index 7b9d6a18ea0252ecafc30078b9c51971f51e9e4c..47beedaa7687ec722a2c0f1a876db1e873f20185 100644 (file)
@@ -4,6 +4,7 @@
 
 #include <pagetable.h>
 #include <arch_vmproc.h>
+#include <minix/bitmap.h>
 
 #include "vm.h"
 
@@ -31,6 +32,9 @@ struct vmproc {
        /* Heap for brk() to extend. */
        struct vir_region *vm_heap;
 
+#define VM_CALL_PRIV_MASK_SIZE BITMAP_CHUNKS(VM_NCALLS)
+       bitchunk_t vm_call_priv_mask[VM_CALL_PRIV_MASK_SIZE];
+
        /* State for requests pending to be done to vfs on behalf of
         * this process.
         */