From: Ben Gras Date: Mon, 21 Sep 2009 14:49:49 +0000 (+0000) Subject: - pages that points to page directory values of all processes, X-Git-Tag: v3.1.5~117 X-Git-Url: http://zhaoyanbai.com/repos/%22http:/www.isc.org/icons/zlib_tech.html?a=commitdiff_plain;h=32fbbd370c9239eca821db504fd61cb2039807d7;p=minix.git - pages that points to page directory values of all processes, shared with the kernel, mapped into kernel address space; kernel is notified of its location. kernel segment size is increased to make it fit. - map in kernel and other processes that don't have their own page table using single 4MB (global) mapping. - new sanity check facility: objects that are allocated with the slab allocator are, when running with sanity checking on, marked readonly until they are explicitly unlocked using the USE() macro. - another sanity check facility: collect all uses of memory and see if they don't overlap with (a) eachother and (b) free memory - own munmap() and munmap_text() functions. - exec() recovers from out-of-memory conditions properly now; this solves some weird exec() behaviour - chew off memory from the same side of the chunk as where we start scanning, solving some memory fragmentation issues - use avl trees for freelist and phys_ranges in regions - implement most useful part of munmap() - remap() stuff is GQ's for shared memory --- diff --git a/servers/vm/Makefile b/servers/vm/Makefile index bf3c6bfe3..e108cc8cc 100644 --- a/servers/vm/Makefile +++ b/servers/vm/Makefile @@ -4,7 +4,8 @@ SERVER = vm include /etc/make.conf OBJ = main.o alloc.o utility.o exec.o exit.o fork.o break.o \ - signal.o vfs.o mmap.o slaballoc.o region.o pagefaults.o + signal.o vfs.o mmap.o slaballoc.o region.o pagefaults.o addravl.o \ + physravl.o rs.o queryexit.o ARCHOBJ = $(ARCH)/vm.o $(ARCH)/pagetable.o $(ARCH)/arch_pagefaults.o $(ARCH)/util.o CPPFLAGS=-I../../kernel/arch/$(ARCH)/include -I$(ARCH) @@ -13,7 +14,7 @@ CFLAGS = $(CPROFILE) $(CPPFLAGS) # build local binary all build install: $(SERVER) - #install $(SERVER) + install -S 100k $(SERVER) $(SERVER): $(OBJ) phony cd $(ARCH) && $(MAKE) diff --git a/servers/vm/addravl.c b/servers/vm/addravl.c new file mode 100644 index 000000000..72c66c8b9 --- /dev/null +++ b/servers/vm/addravl.c @@ -0,0 +1,8 @@ + +#include "sanitycheck.h" +#include "pagerange.h" +#include "addravl.h" +#include "proto.h" +#include "util.h" +#include "cavl_impl.h" + diff --git a/servers/vm/addravl.h b/servers/vm/addravl.h new file mode 100644 index 000000000..1024ae8f7 --- /dev/null +++ b/servers/vm/addravl.h @@ -0,0 +1,24 @@ + +#ifndef ADDRAVL +#define ADDRAVL 1 + +#define AVL_UNIQUE(id) addr_ ## id +#define AVL_HANDLE pagerange_t * +#define AVL_KEY phys_bytes +#define AVL_MAX_DEPTH 30 /* good for 2 million nodes */ +#define AVL_NULL NULL +#define AVL_GET_LESS(h, a) (h)->less +#define AVL_GET_GREATER(h, a) (h)->greater +#define AVL_SET_LESS(h1, h2) USE((h1), (h1)->less = h2;); +#define AVL_SET_GREATER(h1, h2) USE((h1), (h1)->greater = h2;); +#define AVL_GET_BALANCE_FACTOR(h) (h)->factor +#define AVL_SET_BALANCE_FACTOR(h, f) USE((h), (h)->factor = f;); +#define AVL_SET_ROOT(h, v) (h)->root = v; +#define AVL_COMPARE_KEY_KEY(k1, k2) ((k1) > (k2) ? 1 : ((k1) < (k2) ? -1 : 0)) +#define AVL_COMPARE_KEY_NODE(k, h) AVL_COMPARE_KEY_KEY((k), (h)->addr) +#define AVL_COMPARE_NODE_NODE(h1, h2) AVL_COMPARE_KEY_KEY((h1)->addr, (h2)->addr) +#define AVL_INSIDE_STRUCT char pad[4]; + +#include "cavl_if.h" + +#endif diff --git a/servers/vm/alloc.c b/servers/vm/alloc.c index 7ab0fd488..f9387a200 100644 --- a/servers/vm/alloc.c +++ b/servers/vm/alloc.c @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include @@ -36,9 +38,12 @@ #include "proto.h" #include "util.h" #include "glo.h" +#include "pagerange.h" +#include "addravl.h" +#include "sanitycheck.h" -/* Initially, no free pages are known. */ -PRIVATE phys_bytes free_pages_head = NO_MEM; /* Physical address in bytes. */ +/* AVL tree of free pages. */ +addr_avl addravl; /* Used for sanity check. */ PRIVATE phys_bytes mem_low, mem_high; @@ -54,6 +59,8 @@ struct hole { int holelist; }; +static int startpages; + #define NIL_HOLE (struct hole *) 0 #define _NR_HOLES (_NR_PROCS*2) /* No. of memory holes maintained by VM */ @@ -71,6 +78,11 @@ FORWARD _PROTOTYPE( phys_bytes alloc_pages, (int pages, int flags) ); #if SANITYCHECKS FORWARD _PROTOTYPE( void holes_sanity_f, (char *fn, int line) ); #define CHECKHOLES holes_sanity_f(__FILE__, __LINE__) + +#define MAXPAGES (1024*1024*1024/VM_PAGE_SIZE) /* 1GB of memory */ +#define CHUNKS BITMAP_CHUNKS(MAXPAGES) +PRIVATE bitchunk_t pagemap[CHUNKS]; + #else #define CHECKHOLES #endif @@ -102,26 +114,6 @@ FORWARD _PROTOTYPE( void holes_sanity_f, (char *fn, int line) ); } -void availbytes(vir_bytes *bytes, vir_bytes *chunks) -{ - phys_bytes p, nextp; - *bytes = 0; - *chunks = 0; - for(p = free_pages_head; p != NO_MEM; p = nextp) { - phys_bytes thissize, ret; - GET_PARAMS(p, thissize, nextp); - (*bytes) += thissize; - (*chunks)++; - if(nextp != NO_MEM) { - vm_assert(nextp > p); - vm_assert(nextp > p + thissize); - } - } - - return; -} - - #if SANITYCHECKS /*===========================================================================* @@ -400,6 +392,7 @@ struct memory *chunks; /* list of free memory chunks */ */ int i, first = 0; register struct hole *hp; + int nodes, largest; /* Put all holes on the free list. */ for (hp = &hole[0]; hp < &hole[_NR_HOLES]; hp++) { @@ -410,6 +403,8 @@ struct memory *chunks; /* list of free memory chunks */ hole_head = NIL_HOLE; free_slots = &hole[0]; + addr_init(&addravl); + /* Use the chunks of physical memory to allocate holes. */ for (i=NR_MEMS-1; i>=0; i--) { if (chunks[i].size > 0) { @@ -422,217 +417,226 @@ struct memory *chunks; /* list of free memory chunks */ } } + memstats(&nodes, &startpages, &largest); + + printf("VM: %d nodes, %d pages, largest chunk %d\n", + nodes, startpages, largest); + CHECKHOLES; } +#if SANITYCHECKS +PRIVATE void sanitycheck(void) +{ + pagerange_t *p, *prevp = NULL; + addr_iter iter; + addr_start_iter_least(&addravl, &iter); + while((p=addr_get_iter(&iter))) { + SLABSANE(p); + vm_assert(p->size > 0); + if(prevp) { + vm_assert(prevp->addr < p->addr); + vm_assert(prevp->addr + p->addr < p->addr); + } + addr_incr_iter(&iter); + } +} +#endif + +PUBLIC void memstats(int *nodes, int *pages, int *largest) +{ + pagerange_t *p, *prevp = NULL; + addr_iter iter; + addr_start_iter_least(&addravl, &iter); + *nodes = 0; + *pages = 0; + *largest = 0; +#if SANITYCHECKS + sanitycheck(); +#endif + while((p=addr_get_iter(&iter))) { + SLABSANE(p); + (*nodes)++; + (*pages)+= p->size; + if(p->size > *largest) + *largest = p->size; + addr_incr_iter(&iter); + } +} + /*===========================================================================* * alloc_pages * *===========================================================================*/ PRIVATE PUBLIC phys_bytes alloc_pages(int pages, int memflags) { - phys_bytes bytes, p, nextp, prevp = NO_MEM; - phys_bytes prevsize = 0; - + addr_iter iter; + pagerange_t *pr; + int incr; + phys_bytes boundary16 = 16 * 1024 * 1024 / VM_PAGE_SIZE; + phys_bytes boundary1 = 1 * 1024 * 1024 / VM_PAGE_SIZE; + phys_bytes mem; #if SANITYCHECKS - vir_bytes avail1, avail2, chunks1, chunks2; - availbytes(&avail1, &chunks1); + int firstnodes, firstpages, wantnodes, wantpages; + int finalnodes, finalpages; + int largest; + + memstats(&firstnodes, &firstpages, &largest); + sanitycheck(); + wantnodes = firstnodes; + wantpages = firstpages - pages; #endif - vm_assert(pages > 0); - bytes = CLICK2ABS(pages); - vm_assert(ABS2CLICK(bytes) == pages); + if(memflags & (PAF_LOWER16MB|PAF_LOWER1MB)) { + addr_start_iter_least(&addravl, &iter); + incr = 1; + } else { + addr_start_iter_greatest(&addravl, &iter); + incr = 0; + } + + while((pr = addr_get_iter(&iter))) { + SLABSANE(pr); + if(pr->size >= pages) { + if(memflags & PAF_LOWER16MB) { + if(pr->addr + pages > boundary16) + return NO_MEM; + } + + if(memflags & PAF_LOWER1MB) { + if(pr->addr + pages > boundary1) + return NO_MEM; + } + + /* good block found! */ + break; + } + if(incr) + addr_incr_iter(&iter); + else + addr_decr_iter(&iter); + } + if(!pr) { + printf("VM: alloc_pages: alloc failed of %d pages\n", pages); + util_stacktrace(); + printmemstats(); #if SANITYCHECKS -#define ALLOCRETURNCHECK \ - availbytes(&avail2, &chunks2); \ - vm_assert(avail1 - bytes == avail2); \ - vm_assert(chunks1 == chunks2 || chunks1-1 == chunks2); -#else -#define ALLOCRETURNCHECK + if(largest >= pages) { + vm_panic("no memory but largest was enough", NO_NUM); + } #endif + return NO_MEM; + } + SLABSANE(pr); - for(p = free_pages_head; p != NO_MEM; p = nextp) { - phys_bytes thissize, ret; - GET_PARAMS(p, thissize, nextp); - if(thissize >= bytes) { - /* We found a chunk that's big enough. */ - - ret = p + thissize - bytes; - thissize -= bytes; - - if(thissize == 0) { - /* Special case: remove this link entirely. */ - if(prevp == NO_MEM) - free_pages_head = nextp; - else { - vm_assert(prevsize > 0); - SET_PARAMS(prevp, prevsize, nextp); - } - } else { - /* Remove memory from this chunk. */ - SET_PARAMS(p, thissize, nextp); - } + /* Allocated chunk is off the end. */ + mem = pr->addr + pr->size - pages; - /* Clear memory if requested. */ - if(memflags & PAF_CLEAR) { - int s; - if ((s= sys_memset(0, ret, bytes)) != OK) { - vm_panic("alloc_pages: sys_memset failed", s); - } - } + vm_assert(pr->size >= pages); + if(pr->size == pages) { + pagerange_t *prr; + prr = addr_remove(&addravl, pr->addr); + vm_assert(prr); + vm_assert(prr == pr); + SLABFREE(pr); +#if SANITYCHECKS + wantnodes--; +#endif + } else { + USE(pr, pr->size -= pages;); + } - /* Check if returned range is actual good memory. */ - vm_assert_range(ret, bytes); + if(memflags & PAF_CLEAR) { + int s; + if ((s= sys_memset(0, CLICK_SIZE*mem, + VM_PAGE_SIZE*pages)) != OK) + vm_panic("alloc_mem: sys_memset failed", s); + } - ALLOCRETURNCHECK; +#if SANITYCHECKS + memstats(&finalnodes, &finalpages, &largest); + sanitycheck(); - /* Return it in clicks. */ - return ABS2CLICK(ret); - } - prevp = p; - prevsize = thissize; - } - return NO_MEM; + vm_assert(finalnodes == wantnodes); + vm_assert(finalpages == wantpages); +#endif + + return mem; } /*===========================================================================* * free_pages * *===========================================================================*/ -PRIVATE PUBLIC void free_pages(phys_bytes pageno, int npages) +PRIVATE void free_pages(phys_bytes pageno, int npages) { - phys_bytes p, origsize, - size, nextaddr, thissize, prevp = NO_MEM, pageaddr; - + pagerange_t *pr, *p; + addr_iter iter; #if SANITYCHECKS - vir_bytes avail1, avail2, chunks1, chunks2; - availbytes(&avail1, &chunks1); -#endif + int firstnodes, firstpages, wantnodes, wantpages; + int finalnodes, finalpages, largest; -#if SANITYCHECKS -#define FREERETURNCHECK \ - availbytes(&avail2, &chunks2); \ - vm_assert(avail1 + origsize == avail2); \ - vm_assert(chunks1 == chunks2 || chunks1+1 == chunks2 || chunks1-1 == chunks2); -#else -#define FREERETURNCHECK + memstats(&firstnodes, &firstpages, &largest); + sanitycheck(); + + wantnodes = firstnodes; + wantpages = firstpages + npages; #endif - /* Basic sanity check. */ - vm_assert(npages > 0); - vm_assert(pageno != NO_MEM); /* Page number must be reasonable. */ - - /* Convert page and pages to bytes. */ - pageaddr = CLICK2ABS(pageno); - origsize = size = npages * VM_PAGE_SIZE; /* Size in bytes. */ - vm_assert(pageaddr != NO_MEM); - vm_assert(ABS2CLICK(pageaddr) == pageno); - vm_assert_range(pageaddr, size); - - /* More sanity checks. */ - vm_assert(ABS2CLICK(size) == npages); /* Sanity. */ - vm_assert(pageaddr + size > pageaddr); /* Must not overflow. */ - - /* Special case: no free pages. */ - if(free_pages_head == NO_MEM) { - free_pages_head = pageaddr; - SET_PARAMS(pageaddr, size, NO_MEM); - FREERETURNCHECK; - return; - } + vm_assert(!addr_search(&addravl, pageno, AVL_EQUAL)); - /* Special case: the free block is before the current head. */ - if(pageaddr < free_pages_head) { - phys_bytes newsize, newnext, headsize, headnext; - vm_assert(pageaddr + size <= free_pages_head); - GET_PARAMS(free_pages_head, headsize, headnext); - newsize = size; - if(pageaddr + size == free_pages_head) { - /* Special case: contiguous. */ - newsize += headsize; - newnext = headnext; - } else { - newnext = free_pages_head; - } - SET_PARAMS(pageaddr, newsize, newnext); - free_pages_head = pageaddr; - FREERETURNCHECK; - return; - } + /* try to merge with higher neighbour */ + if((pr=addr_search(&addravl, pageno+npages, AVL_EQUAL))) { + USE(pr, pr->addr -= npages; + pr->size += npages;); + } else { + if(!SLABALLOC(pr)) + vm_panic("alloc_pages: can't alloc", NO_NUM); +#if SANITYCHECKS + memstats(&firstnodes, &firstpages, &largest); - /* Find where to put the block in the free list. */ - for(p = free_pages_head; p < pageaddr; p = nextaddr) { - GET_PARAMS(p, thissize, nextaddr); - - if(nextaddr == NO_MEM) { - /* Special case: page is at the end of the list. */ - if(p + thissize == pageaddr) { - /* Special case: contiguous. */ - SET_PARAMS(p, thissize + size, NO_MEM); - FREERETURNCHECK; - } else { - SET_PARAMS(p, thissize, pageaddr); - SET_PARAMS(pageaddr, size, NO_MEM); - FREERETURNCHECK; - } - return; - } + wantnodes = firstnodes; + wantpages = firstpages + npages; - prevp = p; + sanitycheck(); +#endif + vm_assert(npages > 0); + USE(pr, pr->addr = pageno; + pr->size = npages;); + addr_insert(&addravl, pr); +#if SANITYCHECKS + wantnodes++; +#endif } - /* Normal case: insert page block between two others. - * The first block starts at 'prevp' and is 'thissize'. - * The second block starts at 'p' and is 'nextsize'. - * The block that has to come in between starts at - * 'pageaddr' and is size 'size'. - */ - vm_assert(p != NO_MEM); - vm_assert(prevp != NO_MEM); - vm_assert(prevp < p); - vm_assert(p == nextaddr); - + addr_start_iter(&addravl, &iter, pr->addr, AVL_EQUAL); + p = addr_get_iter(&iter); + vm_assert(p); + vm_assert(p == pr); + + addr_decr_iter(&iter); + if((p = addr_get_iter(&iter))) { + SLABSANE(p); + if(p->addr + p->size == pr->addr) { + USE(p, p->size += pr->size;); + addr_remove(&addravl, pr->addr); + SLABFREE(pr); #if SANITYCHECKS - { - vir_bytes prevpsize, prevpnext; - GET_PARAMS(prevp, prevpsize, prevpnext); - vm_assert(prevpsize == thissize); - vm_assert(prevpnext == p); - - availbytes(&avail2, &chunks2); - vm_assert(avail1 == avail2); - } + wantnodes--; #endif - - if(prevp + thissize == pageaddr) { - /* Special case: first block is contiguous with freed one. */ - phys_bytes newsize = thissize + size; - SET_PARAMS(prevp, newsize, p); - pageaddr = prevp; - size = newsize; - } else { - SET_PARAMS(prevp, thissize, pageaddr); + } } - /* The block has been inserted (and possibly merged with the - * first one). Check if it has to be merged with the second one. - */ - if(pageaddr + size == p) { - phys_bytes nextsize, nextnextaddr; - /* Special case: freed block is contiguous with next one. */ - GET_PARAMS(p, nextsize, nextnextaddr); - SET_PARAMS(pageaddr, size+nextsize, nextnextaddr); - FREERETURNCHECK; - } else { - SET_PARAMS(pageaddr, size, p); - FREERETURNCHECK; - } +#if SANITYCHECKS + memstats(&finalnodes, &finalpages, &largest); + sanitycheck(); - return; + vm_assert(finalnodes == wantnodes); + vm_assert(finalpages == wantpages); +#endif } - #define NR_DMA 16 PRIVATE struct dmatab @@ -850,3 +854,65 @@ PUBLIC int do_allocmem(message *m) return OK; } +/*===========================================================================* + * do_allocmem * + *===========================================================================*/ +void printmemstats(void) +{ + int nodes, pages, largest; + memstats(&nodes, &pages, &largest); + printf("%d blocks, %d pages (%ukB) free, largest %d pages (%ukB)\n", + nodes, pages, (u32_t) pages * (VM_PAGE_SIZE/1024), + largest, (u32_t) largest * (VM_PAGE_SIZE/1024)); +} + + +#if SANITYCHECKS + +/*===========================================================================* + * usedpages_reset * + *===========================================================================*/ +void usedpages_reset(void) +{ + memset(pagemap, 0, sizeof(pagemap)); +} + +/*===========================================================================* + * usedpages_add * + *===========================================================================*/ +int usedpages_add_f(phys_bytes addr, phys_bytes len, char *file, int line) +{ + pagerange_t *pr; + u32_t pagestart, pages; + + if(!incheck) + return OK; + + vm_assert(!(addr % VM_PAGE_SIZE)); + vm_assert(!(len % VM_PAGE_SIZE)); + vm_assert(len > 0); + vm_assert_range(addr, len); + + pagestart = addr / VM_PAGE_SIZE; + pages = len / VM_PAGE_SIZE; + + while(pages > 0) { + phys_bytes thisaddr; + vm_assert(pagestart > 0); + vm_assert(pagestart < MAXPAGES); + thisaddr = pagestart * VM_PAGE_SIZE; + if(GET_BIT(pagemap, pagestart)) { + int i; + printf("%s:%d: usedpages_add: addr 0x%lx reused.\n", + file, line, thisaddr); + return EFAULT; + } + SET_BIT(pagemap, pagestart); + pages--; + pagestart++; + } + + return OK; +} + +#endif diff --git a/servers/vm/break.c b/servers/vm/break.c index d392096c4..f2fadb504 100644 --- a/servers/vm/break.c +++ b/servers/vm/break.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include diff --git a/servers/vm/cavl_if.h b/servers/vm/cavl_if.h new file mode 100755 index 000000000..a2df08304 --- /dev/null +++ b/servers/vm/cavl_if.h @@ -0,0 +1,216 @@ +/* Abstract AVL Tree Generic C Package. +** Interface generation header file. +** +** This code is in the public domain. See cavl_tree.html for interface +** documentation. +** +** Version: 1.5 Author: Walt Karas +*/ + +/* This header contains the definition of CHAR_BIT (number of bits in a +** char). */ +#include + +#undef L__ +#undef L__EST_LONG_BIT +#undef L__SIZE +#undef L__SC +#undef L__LONG_BIT +#undef L__BIT_ARR_DEFN + +#ifndef AVL_SEARCH_TYPE_DEFINED_ +#define AVL_SEARCH_TYPE_DEFINED_ + +typedef enum + { + AVL_EQUAL = 1, + AVL_LESS = 2, + AVL_GREATER = 4, + AVL_LESS_EQUAL = AVL_EQUAL | AVL_LESS, + AVL_GREATER_EQUAL = AVL_EQUAL | AVL_GREATER + } +avl_search_type; + +#endif + +#ifdef AVL_UNIQUE + +#define L__ AVL_UNIQUE + +#else + +#define L__(X) X + +#endif + +/* Determine storage class for function prototypes. */ +#ifdef AVL_PRIVATE + +#define L__SC static + +#else + +#define L__SC extern + +#endif + +#ifdef AVL_SIZE + +#define L__SIZE AVL_SIZE + +#else + +#define L__SIZE unsigned long + +#endif + +typedef struct + { + #ifdef AVL_INSIDE_STRUCT + + AVL_INSIDE_STRUCT + + #endif + + AVL_HANDLE root; + } +L__(avl); + +/* Function prototypes. */ + +L__SC void L__(init)(L__(avl) *tree); + +L__SC int L__(is_empty)(L__(avl) *tree); + +L__SC AVL_HANDLE L__(insert)(L__(avl) *tree, AVL_HANDLE h); + +L__SC AVL_HANDLE L__(search)(L__(avl) *tree, AVL_KEY k, avl_search_type st); + +L__SC AVL_HANDLE L__(search_least)(L__(avl) *tree); + +L__SC AVL_HANDLE L__(search_greatest)(L__(avl) *tree); + +L__SC AVL_HANDLE L__(remove)(L__(avl) *tree, AVL_KEY k); + +L__SC AVL_HANDLE L__(subst)(L__(avl) *tree, AVL_HANDLE new_node); + +#ifdef AVL_BUILD_ITER_TYPE + +L__SC int L__(build)( + L__(avl) *tree, AVL_BUILD_ITER_TYPE p, L__SIZE num_nodes); + +#endif + +/* ANSI C/ISO C++ require that a long have at least 32 bits. Set +** L__EST_LONG_BIT to be the greatest multiple of 8 in the range +** 32 - 64 (inclusive) that is less than or equal to the number of +** bits in a long. +*/ + +#if (((LONG_MAX >> 31) >> 7) == 0) + +#define L__EST_LONG_BIT 32 + +#elif (((LONG_MAX >> 31) >> 15) == 0) + +#define L__EST_LONG_BIT 40 + +#elif (((LONG_MAX >> 31) >> 23) == 0) + +#define L__EST_LONG_BIT 48 + +#elif (((LONG_MAX >> 31) >> 31) == 0) + +#define L__EST_LONG_BIT 56 + +#else + +#define L__EST_LONG_BIT 64 + +#endif + +/* Number of bits in a long. */ +#define L__LONG_BIT (sizeof(long) * CHAR_BIT) + +/* The macro L__BIT_ARR_DEFN defines a bit array whose index is a (0-based) +** node depth. The definition depends on whether the maximum depth is more +** or less than the number of bits in a single long. +*/ + +#if ((AVL_MAX_DEPTH) > L__EST_LONG_BIT) + +/* Maximum depth may be more than number of bits in a long. */ + +#define L__BIT_ARR_DEFN(NAME) \ + unsigned long NAME[((AVL_MAX_DEPTH) + L__LONG_BIT - 1) / L__LONG_BIT]; + +#else + +/* Maximum depth is definitely less than number of bits in a long. */ + +#define L__BIT_ARR_DEFN(NAME) unsigned long NAME; + +#endif + +/* Iterator structure. */ +typedef struct + { + /* Tree being iterated over. */ + L__(avl) *tree_; + + /* Records a path into the tree. If bit n is true, indicates + ** take greater branch from the nth node in the path, otherwise + ** take the less branch. bit 0 gives branch from root, and + ** so on. */ + L__BIT_ARR_DEFN(branch) + + /* Zero-based depth of path into tree. */ + unsigned depth; + + /* Handles of nodes in path from root to current node (returned by *). */ + AVL_HANDLE path_h[(AVL_MAX_DEPTH) - 1]; + } +L__(iter); + +/* Iterator function prototypes. */ + +L__SC void L__(start_iter)( + L__(avl) *tree, L__(iter) *iter, AVL_KEY k, avl_search_type st); + +L__SC void L__(start_iter_least)(L__(avl) *tree, L__(iter) *iter); + +L__SC void L__(start_iter_greatest)(L__(avl) *tree, L__(iter) *iter); + +L__SC AVL_HANDLE L__(get_iter)(L__(iter) *iter); + +L__SC void L__(incr_iter)(L__(iter) *iter); + +L__SC void L__(decr_iter)(L__(iter) *iter); + +L__SC void L__(init_iter)(L__(iter) *iter); + +#define AVL_IMPL_INIT 1 +#define AVL_IMPL_IS_EMPTY (1 << 1) +#define AVL_IMPL_INSERT (1 << 2) +#define AVL_IMPL_SEARCH (1 << 3) +#define AVL_IMPL_SEARCH_LEAST (1 << 4) +#define AVL_IMPL_SEARCH_GREATEST (1 << 5) +#define AVL_IMPL_REMOVE (1 << 6) +#define AVL_IMPL_BUILD (1 << 7) +#define AVL_IMPL_START_ITER (1 << 8) +#define AVL_IMPL_START_ITER_LEAST (1 << 9) +#define AVL_IMPL_START_ITER_GREATEST (1 << 10) +#define AVL_IMPL_GET_ITER (1 << 11) +#define AVL_IMPL_INCR_ITER (1 << 12) +#define AVL_IMPL_DECR_ITER (1 << 13) +#define AVL_IMPL_INIT_ITER (1 << 14) +#define AVL_IMPL_SUBST (1 << 15) + +#define AVL_IMPL_ALL (~0) + +#undef L__ +#undef L__EST_LONG_BIT +#undef L__SIZE +#undef L__SC +#undef L__LONG_BIT +#undef L__BIT_ARR_DEFN diff --git a/servers/vm/cavl_impl.h b/servers/vm/cavl_impl.h new file mode 100755 index 000000000..ccf2e2184 --- /dev/null +++ b/servers/vm/cavl_impl.h @@ -0,0 +1,1187 @@ +/* Abstract AVL Tree Generic C Package. +** Implementation generation header file. +** +** This code is in the public domain. See cavl_tree.html for interface +** documentation. +** +** Version: 1.5 Author: Walt Karas +*/ + +#undef L__ +#undef L__EST_LONG_BIT +#undef L__SIZE +#undef L__tree +#undef L__MASK_HIGH_BIT +#undef L__LONG_BIT +#undef L__BIT_ARR_DEFN +#undef L__BIT_ARR_VAL +#undef L__BIT_ARR_0 +#undef L__BIT_ARR_1 +#undef L__BIT_ARR_ALL +#undef L__BIT_ARR_LONGS +#undef L__IMPL_MASK +#undef L__CHECK_READ_ERROR +#undef L__CHECK_READ_ERROR_INV_DEPTH +#undef L__SC +#undef L__BALANCE_PARAM_PREFIX + +#ifdef AVL_UNIQUE + +#define L__ AVL_UNIQUE + +#else + +#define L__(X) X + +#endif + +/* Determine correct storage class for functions */ +#ifdef AVL_PRIVATE + +#define L__SC static + +#else + +#define L__SC + +#endif + +#ifdef AVL_SIZE + +#define L__SIZE AVL_SIZE + +#else + +#define L__SIZE unsigned long + +#endif + +#define L__MASK_HIGH_BIT ((int) ~ ((~ (unsigned) 0) >> 1)) + +/* ANSI C/ISO C++ require that a long have at least 32 bits. Set +** L__EST_LONG_BIT to be the greatest multiple of 8 in the range +** 32 - 64 (inclusive) that is less than or equal to the number of +** bits in a long. +*/ + +#if (((LONG_MAX >> 31) >> 7) == 0) + +#define L__EST_LONG_BIT 32 + +#elif (((LONG_MAX >> 31) >> 15) == 0) + +#define L__EST_LONG_BIT 40 + +#elif (((LONG_MAX >> 31) >> 23) == 0) + +#define L__EST_LONG_BIT 48 + +#elif (((LONG_MAX >> 31) >> 31) == 0) + +#define L__EST_LONG_BIT 56 + +#else + +#define L__EST_LONG_BIT 64 + +#endif + +#define L__LONG_BIT (sizeof(long) * CHAR_BIT) + +#if ((AVL_MAX_DEPTH) > L__EST_LONG_BIT) + +/* The maximum depth may be greater than the number of bits in a long, +** so multiple longs are needed to hold a bit array indexed by node +** depth. */ + +#define L__BIT_ARR_LONGS (((AVL_MAX_DEPTH) + L__LONG_BIT - 1) / L__LONG_BIT) + +#define L__BIT_ARR_DEFN(NAME) unsigned long NAME[L__BIT_ARR_LONGS]; + +#define L__BIT_ARR_VAL(BIT_ARR, BIT_NUM) \ + ((BIT_ARR)[(BIT_NUM) / L__LONG_BIT] & (1L << ((BIT_NUM) % L__LONG_BIT))) + +#define L__BIT_ARR_0(BIT_ARR, BIT_NUM) \ + (BIT_ARR)[(BIT_NUM) / L__LONG_BIT] &= ~(1L << ((BIT_NUM) % L__LONG_BIT)); + +#define L__BIT_ARR_1(BIT_ARR, BIT_NUM) \ + (BIT_ARR)[(BIT_NUM) / L__LONG_BIT] |= 1L << ((BIT_NUM) % L__LONG_BIT); + +#define L__BIT_ARR_ALL(BIT_ARR, BIT_VAL) \ + { int i = L__BIT_ARR_LONGS; do (BIT_ARR)[--i] = 0L - (BIT_VAL); while(i); } + +#else /* The bit array can definitely fit in one long */ + +#define L__BIT_ARR_DEFN(NAME) unsigned long NAME; + +#define L__BIT_ARR_VAL(BIT_ARR, BIT_NUM) ((BIT_ARR) & (1L << (BIT_NUM))) + +#define L__BIT_ARR_0(BIT_ARR, BIT_NUM) (BIT_ARR) &= ~(1L << (BIT_NUM)); + +#define L__BIT_ARR_1(BIT_ARR, BIT_NUM) (BIT_ARR) |= 1L << (BIT_NUM); + +#define L__BIT_ARR_ALL(BIT_ARR, BIT_VAL) (BIT_ARR) = 0L - (BIT_VAL); + +#endif + +#ifdef AVL_READ_ERRORS_HAPPEN + +#define L__CHECK_READ_ERROR(ERROR_RETURN) \ +{ if (AVL_READ_ERROR) return(ERROR_RETURN); } + +#else + +#define L__CHECK_READ_ERROR(ERROR_RETURN) + +#endif + +/* The presumed reason that an instantiation places additional fields +** inside the AVL tree structure is that the SET_ and GET_ macros +** need these fields. The "balance" function does not explicitly use +** any fields in the AVL tree structure, so only pass an AVL tree +** structure pointer to "balance" if it has instantiation-specific +** fields that are (presumably) needed by the SET_/GET_ calls within +** "balance". +*/ +#ifdef AVL_INSIDE_STRUCT + +#define L__BALANCE_PARAM_CALL_PREFIX L__tree, +#define L__BALANCE_PARAM_DECL_PREFIX L__(avl) *L__tree, + +#else + +#define L__BALANCE_PARAM_CALL_PREFIX +#define L__BALANCE_PARAM_DECL_PREFIX + +#endif + +#ifdef AVL_IMPL_MASK + +#define L__IMPL_MASK (AVL_IMPL_MASK) + +#else + +/* Define all functions. */ +#define L__IMPL_MASK AVL_IMPL_ALL + +#endif + +#if (L__IMPL_MASK & AVL_IMPL_INIT) + +L__SC void L__(init)(L__(avl) *L__tree) { AVL_SET_ROOT(L__tree, AVL_NULL); } + +#endif + +#if (L__IMPL_MASK & AVL_IMPL_IS_EMPTY) + +L__SC int L__(is_empty)(L__(avl) *L__tree) + { return(L__tree->root == AVL_NULL); } + +#endif + +/* Put the private balance function in the same compilation module as +** the insert function. */ +#if (L__IMPL_MASK & AVL_IMPL_INSERT) + +/* Balances subtree, returns handle of root node of subtree after balancing. +*/ +L__SC AVL_HANDLE L__(balance)(L__BALANCE_PARAM_DECL_PREFIX AVL_HANDLE bal_h) + { + AVL_HANDLE deep_h; + + /* Either the "greater than" or the "less than" subtree of + ** this node has to be 2 levels deeper (or else it wouldn't + ** need balancing). + */ + if (AVL_GET_BALANCE_FACTOR(bal_h) > 0) + { + /* "Greater than" subtree is deeper. */ + + deep_h = AVL_GET_GREATER(bal_h, 1); + + L__CHECK_READ_ERROR(AVL_NULL) + + if (AVL_GET_BALANCE_FACTOR(deep_h) < 0) + { + int bf; + + AVL_HANDLE old_h = bal_h; + bal_h = AVL_GET_LESS(deep_h, 1); + L__CHECK_READ_ERROR(AVL_NULL) + AVL_SET_GREATER(old_h, AVL_GET_LESS(bal_h, 1)) + AVL_SET_LESS(deep_h, AVL_GET_GREATER(bal_h, 1)) + AVL_SET_LESS(bal_h, old_h) + AVL_SET_GREATER(bal_h, deep_h) + + bf = AVL_GET_BALANCE_FACTOR(bal_h); + if (bf != 0) + { + if (bf > 0) + { + AVL_SET_BALANCE_FACTOR(old_h, -1) + AVL_SET_BALANCE_FACTOR(deep_h, 0) + } + else + { + AVL_SET_BALANCE_FACTOR(deep_h, 1) + AVL_SET_BALANCE_FACTOR(old_h, 0) + } + AVL_SET_BALANCE_FACTOR(bal_h, 0) + } + else + { + AVL_SET_BALANCE_FACTOR(old_h, 0) + AVL_SET_BALANCE_FACTOR(deep_h, 0) + } + } + else + { + AVL_SET_GREATER(bal_h, AVL_GET_LESS(deep_h, 0)) + AVL_SET_LESS(deep_h, bal_h) + if (AVL_GET_BALANCE_FACTOR(deep_h) == 0) + { + AVL_SET_BALANCE_FACTOR(deep_h, -1) + AVL_SET_BALANCE_FACTOR(bal_h, 1) + } + else + { + AVL_SET_BALANCE_FACTOR(deep_h, 0) + AVL_SET_BALANCE_FACTOR(bal_h, 0) + } + bal_h = deep_h; + } + } + else + { + /* "Less than" subtree is deeper. */ + + deep_h = AVL_GET_LESS(bal_h, 1); + L__CHECK_READ_ERROR(AVL_NULL) + + if (AVL_GET_BALANCE_FACTOR(deep_h) > 0) + { + int bf; + AVL_HANDLE old_h = bal_h; + bal_h = AVL_GET_GREATER(deep_h, 1); + L__CHECK_READ_ERROR(AVL_NULL) + AVL_SET_LESS(old_h, AVL_GET_GREATER(bal_h, 0)) + AVL_SET_GREATER(deep_h, AVL_GET_LESS(bal_h, 0)) + AVL_SET_GREATER(bal_h, old_h) + AVL_SET_LESS(bal_h, deep_h) + + bf = AVL_GET_BALANCE_FACTOR(bal_h); + if (bf != 0) + { + if (bf < 0) + { + AVL_SET_BALANCE_FACTOR(old_h, 1) + AVL_SET_BALANCE_FACTOR(deep_h, 0) + } + else + { + AVL_SET_BALANCE_FACTOR(deep_h, -1) + AVL_SET_BALANCE_FACTOR(old_h, 0) + } + AVL_SET_BALANCE_FACTOR(bal_h, 0) + } + else + { + AVL_SET_BALANCE_FACTOR(old_h, 0) + AVL_SET_BALANCE_FACTOR(deep_h, 0) + } + } + else + { + AVL_SET_LESS(bal_h, AVL_GET_GREATER(deep_h, 0)) + AVL_SET_GREATER(deep_h, bal_h) + if (AVL_GET_BALANCE_FACTOR(deep_h) == 0) + { + AVL_SET_BALANCE_FACTOR(deep_h, 1) + AVL_SET_BALANCE_FACTOR(bal_h, -1) + } + else + { + AVL_SET_BALANCE_FACTOR(deep_h, 0) + AVL_SET_BALANCE_FACTOR(bal_h, 0) + } + bal_h = deep_h; + } + } + + return(bal_h); + } + +L__SC AVL_HANDLE L__(insert)(L__(avl) *L__tree, AVL_HANDLE h) + { + AVL_SET_LESS(h, AVL_NULL) + AVL_SET_GREATER(h, AVL_NULL) + AVL_SET_BALANCE_FACTOR(h, 0) + + if (L__tree->root == AVL_NULL) { + AVL_SET_ROOT(L__tree, h); + } else + { + /* Last unbalanced node encountered in search for insertion point. */ + AVL_HANDLE unbal = AVL_NULL; + /* Parent of last unbalanced node. */ + AVL_HANDLE parent_unbal = AVL_NULL; + /* Balance factor of last unbalanced node. */ + int unbal_bf; + + /* Zero-based depth in tree. */ + unsigned depth = 0, unbal_depth = 0; + + /* Records a path into the tree. If bit n is true, indicates + ** take greater branch from the nth node in the path, otherwise + ** take the less branch. bit 0 gives branch from root, and + ** so on. */ + L__BIT_ARR_DEFN(branch) + + AVL_HANDLE hh = L__tree->root; + AVL_HANDLE parent = AVL_NULL; + int cmp; + + do + { + if (AVL_GET_BALANCE_FACTOR(hh) != 0) + { + unbal = hh; + parent_unbal = parent; + unbal_depth = depth; + } + cmp = AVL_COMPARE_NODE_NODE(h, hh); + if (cmp == 0) + /* Duplicate key. */ + return(hh); + parent = hh; + if (cmp > 0) + { + hh = AVL_GET_GREATER(hh, 1); + L__BIT_ARR_1(branch, depth) + } + else + { + hh = AVL_GET_LESS(hh, 1); + L__BIT_ARR_0(branch, depth) + } + L__CHECK_READ_ERROR(AVL_NULL) + depth++; + } + while (hh != AVL_NULL); + + /* Add node to insert as leaf of tree. */ + if (cmp < 0) + AVL_SET_LESS(parent, h) + else + AVL_SET_GREATER(parent, h) + + depth = unbal_depth; + + if (unbal == AVL_NULL) + hh = L__tree->root; + else + { + cmp = L__BIT_ARR_VAL(branch, depth) ? 1 : -1; + depth++; + unbal_bf = AVL_GET_BALANCE_FACTOR(unbal); + if (cmp < 0) + unbal_bf--; + else /* cmp > 0 */ + unbal_bf++; + hh = cmp < 0 ? AVL_GET_LESS(unbal, 1) : AVL_GET_GREATER(unbal, 1); + L__CHECK_READ_ERROR(AVL_NULL) + if ((unbal_bf != -2) && (unbal_bf != 2)) + { + /* No rebalancing of tree is necessary. */ + AVL_SET_BALANCE_FACTOR(unbal, unbal_bf) + unbal = AVL_NULL; + } + } + + if (hh != AVL_NULL) + while (h != hh) + { + cmp = L__BIT_ARR_VAL(branch, depth) ? 1 : -1; + depth++; + if (cmp < 0) + { + AVL_SET_BALANCE_FACTOR(hh, -1) + hh = AVL_GET_LESS(hh, 1); + } + else /* cmp > 0 */ + { + AVL_SET_BALANCE_FACTOR(hh, 1) + hh = AVL_GET_GREATER(hh, 1); + } + L__CHECK_READ_ERROR(AVL_NULL) + } + + if (unbal != AVL_NULL) + { + unbal = L__(balance)(L__BALANCE_PARAM_CALL_PREFIX unbal); + L__CHECK_READ_ERROR(AVL_NULL) + if (parent_unbal == AVL_NULL) + { + AVL_SET_ROOT(L__tree, unbal); + } + else + { + depth = unbal_depth - 1; + cmp = L__BIT_ARR_VAL(branch, depth) ? 1 : -1; + if (cmp < 0) + AVL_SET_LESS(parent_unbal, unbal) + else /* cmp > 0 */ + AVL_SET_GREATER(parent_unbal, unbal) + } + } + + } + + return(h); + } + +#endif + +#if (L__IMPL_MASK & AVL_IMPL_SEARCH) + +L__SC AVL_HANDLE L__(search)(L__(avl) *L__tree, AVL_KEY k, avl_search_type st) + { + int cmp, target_cmp; + AVL_HANDLE match_h = AVL_NULL; + AVL_HANDLE h = L__tree->root; + + if (st & AVL_LESS) + target_cmp = 1; + else if (st & AVL_GREATER) + target_cmp = -1; + else + target_cmp = 0; + + while (h != AVL_NULL) + { + cmp = AVL_COMPARE_KEY_NODE(k, h); + if (cmp == 0) + { + if (st & AVL_EQUAL) + { + match_h = h; + break; + } + cmp = -target_cmp; + } + else if (target_cmp != 0) + if (!((cmp ^ target_cmp) & L__MASK_HIGH_BIT)) + /* cmp and target_cmp are both positive or both negative. */ + match_h = h; + h = cmp < 0 ? AVL_GET_LESS(h, 1) : AVL_GET_GREATER(h, 1); + L__CHECK_READ_ERROR(AVL_NULL) + } + + return(match_h); + } + +#endif + +#if (L__IMPL_MASK & AVL_IMPL_SEARCH_LEAST) + +L__SC AVL_HANDLE L__(search_least)(L__(avl) *L__tree) + { + AVL_HANDLE h = L__tree->root; + AVL_HANDLE parent = AVL_NULL; + + while (h != AVL_NULL) + { + parent = h; + h = AVL_GET_LESS(h, 1); + L__CHECK_READ_ERROR(AVL_NULL) + } + + return(parent); + } + +#endif + +#if (L__IMPL_MASK & AVL_IMPL_SEARCH_GREATEST) + +L__SC AVL_HANDLE L__(search_greatest)(L__(avl) *L__tree) + { + AVL_HANDLE h = L__tree->root; + AVL_HANDLE parent = AVL_NULL; + + while (h != AVL_NULL) + { + parent = h; + h = AVL_GET_GREATER(h, 1); + L__CHECK_READ_ERROR(AVL_NULL) + } + + return(parent); + } + +#endif + +#if (L__IMPL_MASK & AVL_IMPL_REMOVE) + +/* Prototype of balance function (called by remove) in case not in +** same compilation unit. +*/ +L__SC AVL_HANDLE L__(balance)(L__BALANCE_PARAM_DECL_PREFIX AVL_HANDLE bal_h); + +L__SC AVL_HANDLE L__(remove)(L__(avl) *L__tree, AVL_KEY k) + { + /* Zero-based depth in tree. */ + unsigned depth = 0, rm_depth; + + /* Records a path into the tree. If bit n is true, indicates + ** take greater branch from the nth node in the path, otherwise + ** take the less branch. bit 0 gives branch from root, and + ** so on. */ + L__BIT_ARR_DEFN(branch) + + AVL_HANDLE h = L__tree->root; + AVL_HANDLE parent = AVL_NULL; + AVL_HANDLE child; + AVL_HANDLE path; + int cmp, cmp_shortened_sub_with_path; + int reduced_depth; + int bf; + AVL_HANDLE rm; + AVL_HANDLE parent_rm; + + for ( ; ; ) + { + if (h == AVL_NULL) + /* No node in tree with given key. */ + return(AVL_NULL); + cmp = AVL_COMPARE_KEY_NODE(k, h); + if (cmp == 0) + /* Found node to remove. */ + break; + parent = h; + if (cmp > 0) + { + h = AVL_GET_GREATER(h, 1); + L__BIT_ARR_1(branch, depth) + } + else + { + h = AVL_GET_LESS(h, 1); + L__BIT_ARR_0(branch, depth) + } + L__CHECK_READ_ERROR(AVL_NULL) + depth++; + cmp_shortened_sub_with_path = cmp; + } + rm = h; + parent_rm = parent; + rm_depth = depth; + + /* If the node to remove is not a leaf node, we need to get a + ** leaf node, or a node with a single leaf as its child, to put + ** in the place of the node to remove. We will get the greatest + ** node in the less subtree (of the node to remove), or the least + ** node in the greater subtree. We take the leaf node from the + ** deeper subtree, if there is one. */ + + if (AVL_GET_BALANCE_FACTOR(h) < 0) + { + child = AVL_GET_LESS(h, 1); + L__BIT_ARR_0(branch, depth) + cmp = -1; + } + else + { + child = AVL_GET_GREATER(h, 1); + L__BIT_ARR_1(branch, depth) + cmp = 1; + } + L__CHECK_READ_ERROR(AVL_NULL) + depth++; + + if (child != AVL_NULL) + { + cmp = -cmp; + do + { + parent = h; + h = child; + if (cmp < 0) + { + child = AVL_GET_LESS(h, 1); + L__BIT_ARR_0(branch, depth) + } + else + { + child = AVL_GET_GREATER(h, 1); + L__BIT_ARR_1(branch, depth) + } + L__CHECK_READ_ERROR(AVL_NULL) + depth++; + } + while (child != AVL_NULL); + + if (parent == rm) + /* Only went through do loop once. Deleted node will be replaced + ** in the tree structure by one of its immediate children. */ + cmp_shortened_sub_with_path = -cmp; + else + cmp_shortened_sub_with_path = cmp; + + /* Get the handle of the opposite child, which may not be null. */ + child = cmp > 0 ? AVL_GET_LESS(h, 0) : AVL_GET_GREATER(h, 0); + } + + if (parent == AVL_NULL) { + /* There were only 1 or 2 nodes in this tree. */ + AVL_SET_ROOT(L__tree, child); + } + else if (cmp_shortened_sub_with_path < 0) + AVL_SET_LESS(parent, child) + else + AVL_SET_GREATER(parent, child) + + /* "path" is the parent of the subtree being eliminated or reduced + ** from a depth of 2 to 1. If "path" is the node to be removed, we + ** set path to the node we're about to poke into the position of the + ** node to be removed. */ + path = parent == rm ? h : parent; + + if (h != rm) + { + /* Poke in the replacement for the node to be removed. */ + AVL_SET_LESS(h, AVL_GET_LESS(rm, 0)) + AVL_SET_GREATER(h, AVL_GET_GREATER(rm, 0)) + AVL_SET_BALANCE_FACTOR(h, AVL_GET_BALANCE_FACTOR(rm)) + if (parent_rm == AVL_NULL) { + AVL_SET_ROOT(L__tree, h); + } + else + { + depth = rm_depth - 1; + if (L__BIT_ARR_VAL(branch, depth)) + AVL_SET_GREATER(parent_rm, h) + else + AVL_SET_LESS(parent_rm, h) + } + } + + if (path != AVL_NULL) + { + /* Create a temporary linked list from the parent of the path node + ** to the root node. */ + h = L__tree->root; + parent = AVL_NULL; + depth = 0; + while (h != path) + { + if (L__BIT_ARR_VAL(branch, depth)) + { + child = AVL_GET_GREATER(h, 1); + AVL_SET_GREATER(h, parent) + } + else + { + child = AVL_GET_LESS(h, 1); + AVL_SET_LESS(h, parent) + } + L__CHECK_READ_ERROR(AVL_NULL) + depth++; + parent = h; + h = child; + } + + /* Climb from the path node to the root node using the linked + ** list, restoring the tree structure and rebalancing as necessary. + */ + reduced_depth = 1; + cmp = cmp_shortened_sub_with_path; + for ( ; ; ) + { + if (reduced_depth) + { + bf = AVL_GET_BALANCE_FACTOR(h); + if (cmp < 0) + bf++; + else /* cmp > 0 */ + bf--; + if ((bf == -2) || (bf == 2)) + { + h = L__(balance)(L__BALANCE_PARAM_CALL_PREFIX h); + L__CHECK_READ_ERROR(AVL_NULL) + bf = AVL_GET_BALANCE_FACTOR(h); + } + else + AVL_SET_BALANCE_FACTOR(h, bf) + reduced_depth = (bf == 0); + } + if (parent == AVL_NULL) + break; + child = h; + h = parent; + depth--; + cmp = L__BIT_ARR_VAL(branch, depth) ? 1 : -1; + if (cmp < 0) + { + parent = AVL_GET_LESS(h, 1); + AVL_SET_LESS(h, child) + } + else + { + parent = AVL_GET_GREATER(h, 1); + AVL_SET_GREATER(h, child) + } + L__CHECK_READ_ERROR(AVL_NULL) + } + AVL_SET_ROOT(L__tree, h); + } + + return(rm); + } + +#endif + +#if (L__IMPL_MASK & AVL_IMPL_SUBST) + +L__SC AVL_HANDLE L__(subst)(L__(avl) *L__tree, AVL_HANDLE new_node) + { + AVL_HANDLE h = L__tree->root; + AVL_HANDLE parent = AVL_NULL; + int cmp, last_cmp; + + /* Search for node already in tree with same key. */ + for ( ; ; ) + { + if (h == AVL_NULL) + /* No node in tree with same key as new node. */ + return(AVL_NULL); + cmp = AVL_COMPARE_NODE_NODE(new_node, h); + if (cmp == 0) + /* Found the node to substitute new one for. */ + break; + last_cmp = cmp; + parent = h; + h = cmp < 0 ? AVL_GET_LESS(h, 1) : AVL_GET_GREATER(h, 1); + L__CHECK_READ_ERROR(AVL_NULL) + } + + /* Copy tree housekeeping fields from node in tree to new node. */ + AVL_SET_LESS(new_node, AVL_GET_LESS(h, 0)) + AVL_SET_GREATER(new_node, AVL_GET_GREATER(h, 0)) + AVL_SET_BALANCE_FACTOR(new_node, AVL_GET_BALANCE_FACTOR(h)) + + if (parent == AVL_NULL) + { + /* New node is also new root. */ + AVL_SET_ROOT(L__tree, new_node); + } + else + { + /* Make parent point to new node. */ + if (last_cmp < 0) + AVL_SET_LESS(parent, new_node) + else + AVL_SET_GREATER(parent, new_node) + } + + return(h); + } + +#endif + +#ifdef AVL_BUILD_ITER_TYPE + +#if (L__IMPL_MASK & AVL_IMPL_BUILD) + +L__SC int L__(build)( + L__(avl) *L__tree, AVL_BUILD_ITER_TYPE p, L__SIZE num_nodes) + { + /* Gives path to subtree being built. If bit n is false, branch + ** less from the node at depth n, if true branch greater. */ + L__BIT_ARR_DEFN(branch) + + /* If bit n is true, then for the current subtree at depth n, its + ** greater subtree has one more node than its less subtree. */ + L__BIT_ARR_DEFN(rem) + + /* Depth of root node of current subtree. */ + unsigned depth = 0; + + /* Number of nodes in current subtree. */ + L__SIZE num_sub = num_nodes; + + /* The algorithm relies on a stack of nodes whose less subtree has + ** been built, but whose greater subtree has not yet been built. + ** The stack is implemented as linked list. The nodes are linked + ** together by having the "greater" handle of a node set to the + ** next node in the list. "less_parent" is the handle of the first + ** node in the list. */ + AVL_HANDLE less_parent = AVL_NULL; + + /* h is root of current subtree, child is one of its children. */ + AVL_HANDLE h; + AVL_HANDLE child; + + if (num_nodes == 0) + { + AVL_SET_ROOT(L__tree, AVL_NULL); + return(1); + } + + for ( ; ; ) + { + while (num_sub > 2) + { + /* Subtract one for root of subtree. */ + num_sub--; + if (num_sub & 1) + L__BIT_ARR_1(rem, depth) + else + L__BIT_ARR_0(rem, depth) + L__BIT_ARR_0(branch, depth) + depth++; + num_sub >>= 1; + } + + if (num_sub == 2) + { + /* Build a subtree with two nodes, slanting to greater. + ** I arbitrarily chose to always have the extra node in the + ** greater subtree when there is an odd number of nodes to + ** split between the two subtrees. */ + + h = AVL_BUILD_ITER_VAL(p); + L__CHECK_READ_ERROR(0) + AVL_BUILD_ITER_INCR(p) + child = AVL_BUILD_ITER_VAL(p); + L__CHECK_READ_ERROR(0) + AVL_BUILD_ITER_INCR(p) + AVL_SET_LESS(child, AVL_NULL) + AVL_SET_GREATER(child, AVL_NULL) + AVL_SET_BALANCE_FACTOR(child, 0) + AVL_SET_GREATER(h, child) + AVL_SET_LESS(h, AVL_NULL) + AVL_SET_BALANCE_FACTOR(h, 1) + } + else /* num_sub == 1 */ + { + /* Build a subtree with one node. */ + + h = AVL_BUILD_ITER_VAL(p); + L__CHECK_READ_ERROR(0) + AVL_BUILD_ITER_INCR(p) + AVL_SET_LESS(h, AVL_NULL) + AVL_SET_GREATER(h, AVL_NULL) + AVL_SET_BALANCE_FACTOR(h, 0) + } + + while (depth) + { + depth--; + if (!L__BIT_ARR_VAL(branch, depth)) + /* We've completed a less subtree. */ + break; + + /* We've completed a greater subtree, so attach it to + ** its parent (that is less than it). We pop the parent + ** off the stack of less parents. */ + child = h; + h = less_parent; + less_parent = AVL_GET_GREATER(h, 1); + L__CHECK_READ_ERROR(0) + AVL_SET_GREATER(h, child) + /* num_sub = 2 * (num_sub - rem[depth]) + rem[depth] + 1 */ + num_sub <<= 1; + num_sub += L__BIT_ARR_VAL(rem, depth) ? 0 : 1; + if (num_sub & (num_sub - 1)) + /* num_sub is not a power of 2. */ + AVL_SET_BALANCE_FACTOR(h, 0) + else + /* num_sub is a power of 2. */ + AVL_SET_BALANCE_FACTOR(h, 1) + } + + if (num_sub == num_nodes) + /* We've completed the full tree. */ + break; + + /* The subtree we've completed is the less subtree of the + ** next node in the sequence. */ + + child = h; + h = AVL_BUILD_ITER_VAL(p); + L__CHECK_READ_ERROR(0) + AVL_BUILD_ITER_INCR(p) + AVL_SET_LESS(h, child) + + /* Put h into stack of less parents. */ + AVL_SET_GREATER(h, less_parent) + less_parent = h; + + /* Proceed to creating greater than subtree of h. */ + L__BIT_ARR_1(branch, depth) + num_sub += L__BIT_ARR_VAL(rem, depth) ? 1 : 0; + depth++; + + } /* end for ( ; ; ) */ + + AVL_SET_ROOT(L__tree, h); + + return(1); + } + +#endif + +#endif + +#if (L__IMPL_MASK & AVL_IMPL_INIT_ITER) + +/* Initialize depth to invalid value, to indicate iterator is +** invalid. (Depth is zero-base.) It's not necessary to initialize +** iterators prior to passing them to the "start" function. +*/ +L__SC void L__(init_iter)(L__(iter) *iter) { iter->depth = ~0; } + +#endif + +#ifdef AVL_READ_ERRORS_HAPPEN + +#define L__CHECK_READ_ERROR_INV_DEPTH \ +{ if (AVL_READ_ERROR) { iter->depth = ~0; return; } } + +#else + +#define L__CHECK_READ_ERROR_INV_DEPTH + +#endif + +#if (L__IMPL_MASK & AVL_IMPL_START_ITER) + +L__SC void L__(start_iter)( + L__(avl) *L__tree, L__(iter) *iter, AVL_KEY k, avl_search_type st) + { + AVL_HANDLE h = L__tree->root; + unsigned d = 0; + int cmp, target_cmp; + + /* Save the tree that we're going to iterate through in a + ** member variable. */ + iter->tree_ = L__tree; + + iter->depth = ~0; + + if (h == AVL_NULL) + /* Tree is empty. */ + return; + + if (st & AVL_LESS) + /* Key can be greater than key of starting node. */ + target_cmp = 1; + else if (st & AVL_GREATER) + /* Key can be less than key of starting node. */ + target_cmp = -1; + else + /* Key must be same as key of starting node. */ + target_cmp = 0; + + for ( ; ; ) + { + cmp = AVL_COMPARE_KEY_NODE(k, h); + if (cmp == 0) + { + if (st & AVL_EQUAL) + { + /* Equal node was sought and found as starting node. */ + iter->depth = d; + break; + } + cmp = -target_cmp; + } + else if (target_cmp != 0) + if (!((cmp ^ target_cmp) & L__MASK_HIGH_BIT)) + /* cmp and target_cmp are both negative or both positive. */ + iter->depth = d; + h = cmp < 0 ? AVL_GET_LESS(h, 1) : AVL_GET_GREATER(h, 1); + L__CHECK_READ_ERROR_INV_DEPTH + if (h == AVL_NULL) + break; + if (cmp > 0) + L__BIT_ARR_1(iter->branch, d) + else + L__BIT_ARR_0(iter->branch, d) + iter->path_h[d++] = h; + } + } + +#endif + +#if (L__IMPL_MASK & AVL_IMPL_START_ITER_LEAST) + +L__SC void L__(start_iter_least)(L__(avl) *L__tree, L__(iter) *iter) + { + AVL_HANDLE h = L__tree->root; + + iter->tree_ = L__tree; + + iter->depth = ~0; + + L__BIT_ARR_ALL(iter->branch, 0) + + while (h != AVL_NULL) + { + if (iter->depth != ~0) + iter->path_h[iter->depth] = h; + iter->depth++; + h = AVL_GET_LESS(h, 1); + L__CHECK_READ_ERROR_INV_DEPTH + } + } + +#endif + +#if (L__IMPL_MASK & AVL_IMPL_START_ITER_GREATEST) + +L__SC void L__(start_iter_greatest)(L__(avl) *L__tree, L__(iter) *iter) + { + AVL_HANDLE h = L__tree->root; + + iter->tree_ = L__tree; + + iter->depth = ~0; + + L__BIT_ARR_ALL(iter->branch, 1) + + while (h != AVL_NULL) + { + if (iter->depth != ~0) + iter->path_h[iter->depth] = h; + iter->depth++; + h = AVL_GET_GREATER(h, 1); + L__CHECK_READ_ERROR_INV_DEPTH + } + } + +#endif + +#if (L__IMPL_MASK & AVL_IMPL_GET_ITER) + +L__SC AVL_HANDLE L__(get_iter)(L__(iter) *iter) + { + if (iter->depth == ~0) + return(AVL_NULL); + + return(iter->depth == 0 ? + iter->tree_->root : iter->path_h[iter->depth - 1]); + } + +#endif + +#if (L__IMPL_MASK & AVL_IMPL_INCR_ITER) + +L__SC void L__(incr_iter)(L__(iter) *iter) + { + #define L__tree (iter->tree_) + + if (iter->depth != ~0) + { + AVL_HANDLE h = + AVL_GET_GREATER((iter->depth == 0 ? + iter->tree_->root : iter->path_h[iter->depth - 1]), 1); + L__CHECK_READ_ERROR_INV_DEPTH + + if (h == AVL_NULL) + do + { + if (iter->depth == 0) + { + iter->depth = ~0; + break; + } + iter->depth--; + } + while (L__BIT_ARR_VAL(iter->branch, iter->depth)); + else + { + L__BIT_ARR_1(iter->branch, iter->depth) + iter->path_h[iter->depth++] = h; + for ( ; ; ) + { + h = AVL_GET_LESS(h, 1); + L__CHECK_READ_ERROR_INV_DEPTH + if (h == AVL_NULL) + break; + L__BIT_ARR_0(iter->branch, iter->depth) + iter->path_h[iter->depth++] = h; + } + } + } + + #undef L__tree + } + +#endif + +#if (L__IMPL_MASK & AVL_IMPL_DECR_ITER) + +L__SC void L__(decr_iter)(L__(iter) *iter) + { + #define L__tree (iter->tree_) + + if (iter->depth != ~0) + { + AVL_HANDLE h = + AVL_GET_LESS((iter->depth == 0 ? + iter->tree_->root : iter->path_h[iter->depth - 1]), 1); + L__CHECK_READ_ERROR_INV_DEPTH + + if (h == AVL_NULL) + do + { + if (iter->depth == 0) + { + iter->depth = ~0; + break; + } + iter->depth--; + } + while (!L__BIT_ARR_VAL(iter->branch, iter->depth)); + else + { + L__BIT_ARR_0(iter->branch, iter->depth) + iter->path_h[iter->depth++] = h; + for ( ; ; ) + { + h = AVL_GET_GREATER(h, 1); + L__CHECK_READ_ERROR_INV_DEPTH + if (h == AVL_NULL) + break; + L__BIT_ARR_1(iter->branch, iter->depth) + iter->path_h[iter->depth++] = h; + } + } + } + + #undef L__tree + } + +#endif + +/* Tidy up the preprocessor symbol name space. */ +#undef L__ +#undef L__EST_LONG_BIT +#undef L__SIZE +#undef L__MASK_HIGH_BIT +#undef L__LONG_BIT +#undef L__BIT_ARR_DEFN +#undef L__BIT_ARR_VAL +#undef L__BIT_ARR_0 +#undef L__BIT_ARR_1 +#undef L__BIT_ARR_ALL +#undef L__CHECK_READ_ERROR +#undef L__CHECK_READ_ERROR_INV_DEPTH +#undef L__BIT_ARR_LONGS +#undef L__IMPL_MASK +#undef L__CHECK_READ_ERROR +#undef L__CHECK_READ_ERROR_INV_DEPTH +#undef L__SC +#undef L__BALANCE_PARAM_CALL_PREFIX +#undef L__BALANCE_PARAM_DECL_PREFIX diff --git a/servers/vm/exec.c b/servers/vm/exec.c index 47ec93889..65106af72 100644 --- a/servers/vm/exec.c +++ b/servers/vm/exec.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -31,7 +32,9 @@ FORWARD _PROTOTYPE( int new_mem, (struct vmproc *vmp, struct vmproc *sh_vmp, vir_bytes text_bytes, vir_bytes data_bytes, vir_bytes bss_bytes, - vir_bytes stk_bytes, phys_bytes tot_bytes) ); + vir_bytes stk_bytes, phys_bytes tot_bytes, vir_bytes *stack_top)); + +static int failcount; /*===========================================================================* * find_share * @@ -78,15 +81,17 @@ PUBLIC int do_exec_newmem(message *msg) proc_e= msg->VMEN_ENDPOINT; if (vm_isokendpt(proc_e, &proc_n) != OK) { - printf("VM:exec_newmem: bad endpoint %d from %d\n", + printf("VM: exec_newmem: bad endpoint %d from %d\n", proc_e, msg->m_source); return ESRCH; } vmp= &vmproc[proc_n]; ptr= msg->VMEN_ARGSPTR; + NOTRUNNABLE(vmp->vm_endpoint); + if(msg->VMEN_ARGSSIZE != sizeof(args)) { - printf("VM:exec_newmem: args size %d != %ld\n", + printf("VM: exec_newmem: args size %d != %ld\n", msg->VMEN_ARGSSIZE, sizeof(args)); return EINVAL; } @@ -97,18 +102,30 @@ SANITYCHECK(SCL_DETAIL); if (r != OK) vm_panic("exec_newmem: sys_datacopy failed", r); + /* Minimum stack region (not preallocated) + * Stopgap for better rlimit-based stack size system + */ + if(args.tot_bytes < MINSTACKREGION) { + args.tot_bytes = MINSTACKREGION; + } + /* Check to see if segment sizes are feasible. */ tc = ((unsigned long) args.text_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT; dc = (args.data_bytes+args.bss_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT; totc = (args.tot_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT; sc = (args.args_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT; - if (dc >= totc) return(ENOEXEC); /* stack must be at least 1 click */ + if (dc >= totc) { + printf("VM: newmem: no stack?\n"); + return(ENOEXEC); /* stack must be at least 1 click */ + } dvir = (args.sep_id ? 0 : tc); s_vir = dvir + (totc - sc); r = (dvir + dc > s_vir) ? ENOMEM : OK; - if (r != OK) + if (r != OK) { + printf("VM: newmem: no virtual space?\n"); return r; + } /* Can the process' text be shared with that of one already running? */ if(!vm_paged) { @@ -121,29 +138,30 @@ SANITYCHECK(SCL_DETAIL); * kernel. */ r = new_mem(vmp, sh_mp, args.text_bytes, args.data_bytes, - args.bss_bytes, args.args_bytes, args.tot_bytes); - if (r != OK) return(r); + args.bss_bytes, args.args_bytes, args.tot_bytes, &stack_top); + if (r != OK) { + printf("VM: newmem: new_mem failed\n"); + return(r); + } /* Save file identification to allow it to be shared. */ vmp->vm_ino = args.st_ino; vmp->vm_dev = args.st_dev; vmp->vm_ctime = args.st_ctime; - stack_top= ((vir_bytes)vmp->vm_arch.vm_seg[S].mem_vir << CLICK_SHIFT) + - ((vir_bytes)vmp->vm_arch.vm_seg[S].mem_len << CLICK_SHIFT); - /* set/clear separate I&D flag */ if (args.sep_id) vmp->vm_flags |= VMF_SEPARATE; else vmp->vm_flags &= ~VMF_SEPARATE; - msg->VMEN_STACK_TOP = (void *) stack_top; msg->VMEN_FLAGS = 0; if (!sh_mp) /* Load text if sh_mp = NULL */ msg->VMEN_FLAGS |= EXC_NM_RF_LOAD_TEXT; + NOTRUNNABLE(vmp->vm_endpoint); + return OK; } @@ -151,7 +169,7 @@ SANITYCHECK(SCL_DETAIL); * new_mem * *===========================================================================*/ PRIVATE int new_mem(rmp, sh_mp, text_bytes, data_bytes, - bss_bytes,stk_bytes,tot_bytes) + bss_bytes,stk_bytes,tot_bytes,stack_top) struct vmproc *rmp; /* process to get a new memory map */ struct vmproc *sh_mp; /* text can be shared with this process */ vir_bytes text_bytes; /* text segment size in bytes */ @@ -159,6 +177,7 @@ vir_bytes data_bytes; /* size of initialized data in bytes */ vir_bytes bss_bytes; /* size of bss in bytes */ vir_bytes stk_bytes; /* size of initial stack segment in bytes */ phys_bytes tot_bytes; /* total memory to allocate, including gap */ +vir_bytes *stack_top; /* top of process stack */ { /* Allocate new memory and release the old memory. Change the map and report * the new map to the kernel. Zero the new core image's bss, gap and stack. @@ -166,10 +185,15 @@ phys_bytes tot_bytes; /* total memory to allocate, including gap */ vir_clicks text_clicks, data_clicks, gap_clicks, stack_clicks, tot_clicks; phys_bytes bytes, base, bss_offset; - int s, r2; + int s, r2, r, hadpt = 0; + struct vmproc *vmpold = &vmproc[VMP_EXECTMP]; SANITYCHECK(SCL_FUNCTIONS); + if(rmp->vm_flags & VMF_HASPT) { + hadpt = 1; + } + /* No need to allocate text if it can be shared. */ if (sh_mp != NULL) { text_bytes = 0; @@ -185,19 +209,31 @@ phys_bytes tot_bytes; /* total memory to allocate, including gap */ stack_clicks = (stk_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT; tot_clicks = (tot_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT; gap_clicks = tot_clicks - data_clicks - stack_clicks; - if ( (int) gap_clicks < 0) return(ENOMEM); - -SANITYCHECK(SCL_DETAIL); - + if ( (int) gap_clicks < 0) { + printf("VM: new_mem: no gap?\n"); + return(ENOMEM); + } - /* We've got memory for the new core image. Release the old one. */ - if(rmp->vm_flags & VMF_HASPT) { - /* Free page table and memory allocated by pagetable functions. */ - rmp->vm_flags &= ~VMF_HASPT; - free_proc(rmp); - } else { + /* Keep previous process state for recovery; the sanity check functions + * know about the 'vmpold' slot, so the memory that the exec()ing + * process is still holding is referenced there. + * + * Throw away the old page table to avoid having two process slots + * using the same vm_pt. + * Just recreate it in the case that we have to revert. + */ +SANITYCHECK(SCL_DETAIL); + if(hadpt) { + pt_free(&rmp->vm_pt); + rmp->vm_flags &= ~VMF_HASPT; + } + vm_assert(!(vmpold->vm_flags & VMF_INUSE)); + *vmpold = *rmp; /* copy current state. */ + rmp->vm_regions = NULL; /* exec()ing process regions thrown out. */ +SANITYCHECK(SCL_DETAIL); + if(!hadpt) { if (find_share(rmp, rmp->vm_ino, rmp->vm_dev, rmp->vm_ctime) == NULL) { /* No other process shares the text segment, so free it. */ FREE_MEM(rmp->vm_arch.vm_seg[T].mem_phys, rmp->vm_arch.vm_seg[T].mem_len); @@ -210,17 +246,20 @@ SANITYCHECK(SCL_DETAIL); - rmp->vm_arch.vm_seg[D].mem_vir); } - /* We have now passed the point of no return. The old core image has been - * forever lost, memory for a new core image has been allocated. Set up - * and report new map. + /* Build new process in current slot, without freeing old + * one. If it fails, revert. */ if(vm_paged) { - if(pt_new(&rmp->vm_pt) != OK) - vm_panic("exec_newmem: no new pagetable", NO_NUM); + int ptok = 1; + SANITYCHECK(SCL_DETAIL); + if((r=pt_new(&rmp->vm_pt)) != OK) { + ptok = 0; + printf("exec_newmem: no new pagetable\n"); + } SANITYCHECK(SCL_DETAIL); - proc_new(rmp, + if(r != OK || (r=proc_new(rmp, VM_PROCSTART, /* where to start the process in the page table */ CLICK2ABS(text_clicks),/* how big is the text in bytes, page-aligned */ CLICK2ABS(data_clicks),/* how big is data+bss, page-aligned */ @@ -228,13 +267,48 @@ SANITYCHECK(SCL_DETAIL); CLICK2ABS(gap_clicks), /* how big is gap, page-aligned */ 0,0, /* not preallocated */ VM_STACKTOP /* regular stack top */ - ); + )) != OK) { + SANITYCHECK(SCL_DETAIL); + printf("VM: new_mem: failed\n"); + if(ptok) { + pt_free(&rmp->vm_pt); + } + *rmp = *vmpold; /* undo. */ + clear_proc(vmpold); /* disappear. */ + SANITYCHECK(SCL_DETAIL); + if(hadpt) { + if(pt_new(&rmp->vm_pt) != OK) { + /* We secretly know that making a new pagetable + * in the same slot if one was there will never fail. + */ + vm_panic("new_mem: pt_new failed", s); + } + rmp->vm_flags |= VMF_HASPT; + SANITYCHECK(SCL_DETAIL); + if(map_writept(rmp) != OK) { + printf("VM: warning: exec undo failed\n"); + } + SANITYCHECK(SCL_DETAIL); + } + return r; + } SANITYCHECK(SCL_DETAIL); + /* new process is made; free and unreference + * page table and memory still held by exec()ing process. + */ + SANITYCHECK(SCL_DETAIL); + free_proc(vmpold); + clear_proc(vmpold); /* disappear. */ + SANITYCHECK(SCL_DETAIL); + *stack_top = VM_STACKTOP; } else { phys_clicks new_base; new_base = ALLOC_MEM(text_clicks + tot_clicks, 0); - if (new_base == NO_MEM) return(ENOMEM); + if (new_base == NO_MEM) { + printf("VM: new_mem: ALLOC_MEM failed\n"); + return(ENOMEM); + } if (sh_mp != NULL) { /* Share the text segment. */ @@ -294,6 +368,8 @@ SANITYCHECK(SCL_DETAIL); /* Tell kernel this thing has no page table. */ if((s=pt_bind(NULL, rmp)) != OK) vm_panic("exec_newmem: pt_bind failed", s); + *stack_top= ((vir_bytes)rmp->vm_arch.vm_seg[S].mem_vir << CLICK_SHIFT) + + ((vir_bytes)rmp->vm_arch.vm_seg[S].mem_len << CLICK_SHIFT); } SANITYCHECK(SCL_FUNCTIONS); @@ -348,13 +424,6 @@ PUBLIC int proc_new(struct vmproc *vmp, vm_assert(!(data_start % VM_PAGE_SIZE)); vm_assert((!text_start && !data_start) || (text_start && data_start)); -#if 0 - if(!map_proc_kernel(vmp)) { - printf("VM: exec: map_proc_kernel failed\n"); - return ENOMEM; - } -#endif - /* Place text at start of process. */ vmp->vm_arch.vm_seg[T].mem_phys = ABS2CLICK(vstart); vmp->vm_arch.vm_seg[T].mem_vir = 0; @@ -371,6 +440,8 @@ PUBLIC int proc_new(struct vmproc *vmp, VR_ANON | VR_WRITABLE, text_start ? 0 : MF_PREALLOC)) { SANITYCHECK(SCL_DETAIL); printf("VM: proc_new: map_page_region failed (text)\n"); + map_free_proc(vmp); + SANITYCHECK(SCL_DETAIL); return(ENOMEM); } SANITYCHECK(SCL_DETAIL); @@ -385,6 +456,8 @@ PUBLIC int proc_new(struct vmproc *vmp, data_bytes, data_start ? data_start : MAP_NONE, VR_ANON | VR_WRITABLE, data_start ? 0 : MF_PREALLOC))) { printf("VM: exec: map_page_region for data failed\n"); + map_free_proc(vmp); + SANITYCHECK(SCL_DETAIL); return ENOMEM; } @@ -432,13 +505,8 @@ PUBLIC int proc_new(struct vmproc *vmp, vmp->vm_flags |= VMF_HASPT; - if((s=sys_newmap(vmp->vm_endpoint, vmp->vm_arch.vm_seg)) != OK) { + if((s=sys_newmap(vmp->vm_endpoint, vmp->vm_arch.vm_seg)) != OK) vm_panic("sys_newmap (vm) failed", s); - } - - - /* This is the real stack clicks. */ - vmp->vm_arch.vm_seg[S].mem_len = ABS2CLICK(stack_bytes); if((s=pt_bind(&vmp->vm_pt, vmp)) != OK) vm_panic("exec_newmem: pt_bind failed", s); diff --git a/servers/vm/exit.c b/servers/vm/exit.c index c311e17e0..7990684a0 100644 --- a/servers/vm/exit.c +++ b/servers/vm/exit.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -24,8 +25,10 @@ PUBLIC void free_proc(struct vmproc *vmp) { - vmp->vm_flags &= ~VMF_HASPT; - pt_free(&vmp->vm_pt); + if(vmp->vm_flags & VMF_HASPT) { + vmp->vm_flags &= ~VMF_HASPT; + pt_free(&vmp->vm_pt); + } map_free_proc(vmp); vmp->vm_regions = NULL; #if VMSTATS diff --git a/servers/vm/fork.c b/servers/vm/fork.c index 8a5aa6ae7..041c1243b 100644 --- a/servers/vm/fork.c +++ b/servers/vm/fork.c @@ -13,7 +13,10 @@ #include #include #include +#include +#include +#include #include #include @@ -31,6 +34,8 @@ PUBLIC int do_fork(message *msg) { int r, proc, s, childproc, fullvm; struct vmproc *vmp, *vmc; + pt_t origpt; + vir_bytes msgaddr; SANITYCHECK(SCL_FUNCTIONS); @@ -49,6 +54,9 @@ PUBLIC int do_fork(message *msg) vmp = &vmproc[proc]; /* parent */ vmc = &vmproc[childproc]; /* child */ + vm_assert(vmc->vm_slot == childproc); + + NOTRUNNABLE(vmp->vm_endpoint); if(vmp->vm_flags & VMF_HAS_DMA) { printf("VM: %d has DMA memory and may not fork\n", msg->VMF_ENDPOINT); @@ -58,14 +66,20 @@ PUBLIC int do_fork(message *msg) fullvm = vmp->vm_flags & VMF_HASPT; /* The child is basically a copy of the parent. */ + origpt = vmc->vm_pt; *vmc = *vmp; + vmc->vm_slot = childproc; vmc->vm_regions = NULL; vmc->vm_endpoint = NONE; /* In case someone tries to use it. */ + vmc->vm_pt = origpt; + vmc->vm_flags &= ~VMF_HASPT; #if VMSTATS vmc->vm_bytecopies = 0; #endif + SANITYCHECK(SCL_DETAIL); + if(fullvm) { SANITYCHECK(SCL_DETAIL); @@ -74,6 +88,8 @@ PUBLIC int do_fork(message *msg) return ENOMEM; } + vmc->vm_flags |= VMF_HASPT; + SANITYCHECK(SCL_DETAIL); if(map_proc_copy(vmc, vmp) != OK) { @@ -108,6 +124,7 @@ PUBLIC int do_fork(message *msg) /* Create a copy of the parent's core image for the child. */ child_abs = (phys_bytes) child_base << CLICK_SHIFT; parent_abs = (phys_bytes) vmp->vm_arch.vm_seg[D].mem_phys << CLICK_SHIFT; + FIXME("VM uses kernel for abscopy"); s = sys_abscopy(parent_abs, child_abs, prog_bytes); if (s < 0) vm_panic("do_fork can't copy", s); @@ -124,14 +141,29 @@ PUBLIC int do_fork(message *msg) /* Only inherit these flags. */ vmc->vm_flags &= (VMF_INUSE|VMF_SEPARATE|VMF_HASPT); + /* inherit the priv call bitmaps */ + memcpy(&vmc->vm_call_priv_mask, &vmp->vm_call_priv_mask, + sizeof(vmc->vm_call_priv_mask)); + /* Tell kernel about the (now successful) FORK. */ if((r=sys_fork(vmp->vm_endpoint, childproc, &vmc->vm_endpoint, vmc->vm_arch.vm_seg, - fullvm ? PFF_VMINHIBIT : 0)) != OK) { + fullvm ? PFF_VMINHIBIT : 0, &msgaddr)) != OK) { vm_panic("do_fork can't sys_fork", r); } + NOTRUNNABLE(vmp->vm_endpoint); + NOTRUNNABLE(vmc->vm_endpoint); + if(fullvm) { + vir_bytes vir; + /* making these messages writable is an optimisation + * and its return value needn't be checked. + */ + vir = arch_vir2map(vmc, msgaddr); + handle_memory(vmc, vir, sizeof(message), 1); + vir = arch_vir2map(vmp, msgaddr); + handle_memory(vmp, vir, sizeof(message), 1); if((r=pt_bind(&vmc->vm_pt, vmc)) != OK) vm_panic("fork can't pt_bind", r); } diff --git a/servers/vm/glo.h b/servers/vm/glo.h index 593fb1820..0889a8f06 100644 --- a/servers/vm/glo.h +++ b/servers/vm/glo.h @@ -12,17 +12,19 @@ #define EXTERN #endif -EXTERN struct vmproc vmproc[_NR_PROCS+1]; +#define VMP_SYSTEM _NR_PROCS +#define VMP_EXECTMP _NR_PROCS+1 +#define VMP_NR _NR_PROCS+2 + +EXTERN struct vmproc vmproc[VMP_NR]; #if SANITYCHECKS EXTERN int nocheck; -u32_t data1[200]; -#define CHECKADDR 0 +EXTERN int incheck; EXTERN long vm_sanitychecklevel; #endif -#define VMP_SYSTEM _NR_PROCS - /* vm operation mode state and values */ EXTERN long vm_paged; -EXTERN phys_bytes kernel_top_bytes; + +EXTERN int meminit_done; diff --git a/servers/vm/i386/arch_pagefaults.c b/servers/vm/i386/arch_pagefaults.c index 9ec324639..83c38723b 100644 --- a/servers/vm/i386/arch_pagefaults.c +++ b/servers/vm/i386/arch_pagefaults.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/servers/vm/i386/arch_vmproc.h b/servers/vm/i386/arch_vmproc.h index cab280383..e6ea34bae 100644 --- a/servers/vm/i386/arch_vmproc.h +++ b/servers/vm/i386/arch_vmproc.h @@ -1,5 +1,12 @@ #include +#include +#include +#include +#include +#include +#include +#include struct vm_arch { struct mem_map vm_seg[NR_LOCAL_SEGS]; /* text, data, stack */ diff --git a/servers/vm/i386/memory.h b/servers/vm/i386/memory.h index 3e44243b7..05af779f7 100644 --- a/servers/vm/i386/memory.h +++ b/servers/vm/i386/memory.h @@ -15,7 +15,7 @@ #define VM_PAGE_SIZE I386_PAGE_SIZE /* Where do processes start in linear (i.e. page table) memory? */ -#define VM_PROCSTART (I386_BIG_PAGE_SIZE*10) +#define VM_PROCSTART (I386_BIG_PAGE_SIZE*100) #define CLICKSPERPAGE (I386_PAGE_SIZE/CLICK_SIZE) diff --git a/servers/vm/i386/pagetable.c b/servers/vm/i386/pagetable.c index 8e3c827ce..444729d90 100644 --- a/servers/vm/i386/pagetable.c +++ b/servers/vm/i386/pagetable.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -34,14 +35,14 @@ #include "memory.h" -int global_bit_ok = 0; -int bigpage_ok = 0; +/* PDE used to map in kernel, kernel physical address. */ +PRIVATE int kernel_pde = -1, pagedir_pde = -1; +PRIVATE u32_t kern_pde_val = 0, global_bit = 0, pagedir_pde_val; -/* Location in our virtual address space where we can map in - * any physical page we want. -*/ -static unsigned char *varmap = NULL; /* Our address space. */ -static u32_t varmap_loc; /* Our page table. */ +PRIVATE int proc_pde = 0; + +/* 4MB page size available in hardware? */ +PRIVATE int bigpage_ok = 0; /* Our process table entry. */ struct vmproc *vmp = &vmproc[VM_PROC_NR]; @@ -52,7 +53,7 @@ struct vmproc *vmp = &vmproc[VM_PROC_NR]; */ #define SPAREPAGES 5 int missing_spares = SPAREPAGES; -static struct { +PRIVATE struct { void *page; u32_t phys; } sparepages[SPAREPAGES]; @@ -78,7 +79,6 @@ static struct { u32_t page_directories_phys, *page_directories = NULL; #if SANITYCHECKS -#define PT_SANE(p) { pt_sanitycheck((p), __FILE__, __LINE__); SANITYCHECK(SCL_DETAIL); } /*===========================================================================* * pt_sanitycheck * *===========================================================================*/ @@ -86,21 +86,37 @@ PUBLIC void pt_sanitycheck(pt_t *pt, char *file, int line) { /* Basic pt sanity check. */ int i; + int slot; MYASSERT(pt); MYASSERT(pt->pt_dir); MYASSERT(pt->pt_dir_phys); - for(i = 0; i < I386_VM_DIR_ENTRIES; i++) { + for(slot = 0; slot < ELEMENTS(vmproc); slot++) { + if(pt == &vmproc[slot].vm_pt) + break; + } + + if(slot >= ELEMENTS(vmproc)) { + vm_panic("pt_sanitycheck: passed pt not in any proc", NO_NUM); + } + + MYASSERT(usedpages_add(pt->pt_dir_phys, I386_PAGE_SIZE) == OK); + + for(i = proc_pde; i < I386_VM_DIR_ENTRIES; i++) { if(pt->pt_pt[i]) { + if(!(pt->pt_dir[i] & I386_VM_PRESENT)) { + printf("slot %d: pt->pt_pt[%d] = 0x%lx, but pt_dir entry 0x%lx\n", + slot, i, pt->pt_pt[i], pt->pt_dir[i]); + } MYASSERT(pt->pt_dir[i] & I386_VM_PRESENT); + MYASSERT(usedpages_add(I386_VM_PFA(pt->pt_dir[i]), + I386_PAGE_SIZE) == OK); } else { MYASSERT(!(pt->pt_dir[i] & I386_VM_PRESENT)); } } } -#else -#define PT_SANE(p) #endif /*===========================================================================* @@ -240,7 +256,6 @@ PRIVATE void *vm_getsparepage(u32_t *phys) return sp; } } - vm_panic("VM: out of spare pages", NO_NUM); return NULL; } @@ -255,17 +270,16 @@ PRIVATE void *vm_checkspares(void) for(s = 0; s < SPAREPAGES && missing_spares > 0; s++) if(!sparepages[s].page) { n++; - sparepages[s].page = vm_allocpages(&sparepages[s].phys, 1, - VMP_SPARE); - missing_spares--; - vm_assert(missing_spares >= 0 && missing_spares <= SPAREPAGES); + if((sparepages[s].page = vm_allocpages(&sparepages[s].phys, 1, + VMP_SPARE))) { + missing_spares--; + vm_assert(missing_spares >= 0); + vm_assert(missing_spares <= SPAREPAGES); + } } if(worst < n) worst = n; total += n; -#if 0 - if(n > 0) - printf("VM: made %d spares, total %d, worst %d\n", n, total, worst); -#endif + return NULL; } @@ -293,7 +307,7 @@ PUBLIC void *vm_allocpages(phys_bytes *phys, int pages, int reason) vm_assert(level >= 1); vm_assert(level <= 2); - if(level > 1 || !(vmp->vm_flags & VMF_HASPT)) { + if(level > 1 || !(vmp->vm_flags & VMF_HASPT) || !meminit_done) { int r; void *s; vm_assert(pages == 1); @@ -336,6 +350,38 @@ PUBLIC void *vm_allocpages(phys_bytes *phys, int pages, int reason) return (void *) arch_map2vir(vmp, loc); } +/*===========================================================================* + * vm_pagelock * + *===========================================================================*/ +PUBLIC void vm_pagelock(void *vir, int lockflag) +{ +/* Mark a page allocated by vm_allocpages() unwritable, i.e. only for VM. */ + vir_bytes m; + int r; + u32_t flags = I386_VM_PRESENT | I386_VM_USER; + pt_t *pt; + + pt = &vmp->vm_pt; + m = arch_vir2map(vmp, (vir_bytes) vir); + + vm_assert(!(m % I386_PAGE_SIZE)); + + if(!lockflag) + flags |= I386_VM_WRITE; + + /* Update flags. */ + if((r=pt_writemap(pt, m, 0, I386_PAGE_SIZE, + flags, WMF_OVERWRITE | WMF_WRITEFLAGSONLY)) != OK) { + vm_panic("vm_lockpage: pt_writemap failed\n", NO_NUM); + } + + if((r=sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) { + vm_panic("VMCTL_FLUSHTLB failed", r); + } + + return; +} + /*===========================================================================* * pt_ptalloc * *===========================================================================*/ @@ -347,14 +393,13 @@ PRIVATE int pt_ptalloc(pt_t *pt, int pde, u32_t flags) /* Argument must make sense. */ vm_assert(pde >= 0 && pde < I386_VM_DIR_ENTRIES); - vm_assert(!(flags & ~(PTF_ALLFLAGS | PTF_MAPALLOC))); + vm_assert(!(flags & ~(PTF_ALLFLAGS))); /* We don't expect to overwrite page directory entry, nor * storage for the page table. */ vm_assert(!(pt->pt_dir[pde] & I386_VM_PRESENT)); vm_assert(!pt->pt_pt[pde]); - PT_SANE(pt); /* Get storage for the page table. */ if(!(pt->pt_pt[pde] = vm_allocpages(&pt_phys, 1, VMP_PAGETABLE))) @@ -370,7 +415,6 @@ PRIVATE int pt_ptalloc(pt_t *pt, int pde, u32_t flags) pt->pt_dir[pde] = (pt_phys & I386_VM_ADDR_MASK) | flags | I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE; vm_assert(flags & I386_VM_PRESENT); - PT_SANE(pt); return OK; } @@ -385,10 +429,9 @@ PUBLIC int pt_writemap(pt_t *pt, vir_bytes v, phys_bytes physaddr, /* Page directory and table entries for this virtual address. */ int p, pages, pde; int finalpde; - SANITYCHECK(SCL_FUNCTIONS); vm_assert(!(bytes % I386_PAGE_SIZE)); - vm_assert(!(flags & ~(PTF_ALLFLAGS | PTF_MAPALLOC))); + vm_assert(!(flags & ~(PTF_ALLFLAGS))); pages = bytes / I386_PAGE_SIZE; @@ -405,8 +448,6 @@ PUBLIC int pt_writemap(pt_t *pt, vir_bytes v, phys_bytes physaddr, } #endif - PT_SANE(pt); - finalpde = I386_VM_PDE(v + I386_PAGE_SIZE * pages); /* First make sure all the necessary page tables are allocated, @@ -417,6 +458,8 @@ PUBLIC int pt_writemap(pt_t *pt, vir_bytes v, phys_bytes physaddr, for(pde = I386_VM_PDE(v); pde <= finalpde; pde++) { vm_assert(pde >= 0 && pde < I386_VM_DIR_ENTRIES); if(pt->pt_dir[pde] & I386_VM_BIGPAGE) { + printf("pt_writemap: trying to write 0x%lx into 0x%lx\n", + physaddr, v); vm_panic("pt_writemap: BIGPAGE found", NO_NUM); } if(!(pt->pt_dir[pde] & I386_VM_PRESENT)) { @@ -436,13 +479,10 @@ PUBLIC int pt_writemap(pt_t *pt, vir_bytes v, phys_bytes physaddr, vm_assert(pt->pt_dir[pde] & I386_VM_PRESENT); } - PT_SANE(pt); - /* Now write in them. */ for(p = 0; p < pages; p++) { int pde = I386_VM_PDE(v); int pte = I386_VM_PTE(v); - PT_SANE(pt); vm_assert(!(v % I386_PAGE_SIZE)); vm_assert(pte >= 0 && pte < I386_VM_PT_ENTRIES); @@ -456,22 +496,25 @@ PUBLIC int pt_writemap(pt_t *pt, vir_bytes v, phys_bytes physaddr, */ vm_assert((pt->pt_dir[pde] & I386_VM_PRESENT) && pt->pt_pt[pde]); - PT_SANE(pt); #if SANITYCHECKS /* We don't expect to overwrite a page. */ if(!(writemapflags & WMF_OVERWRITE)) vm_assert(!(pt->pt_pt[pde][pte] & I386_VM_PRESENT)); #endif + if(writemapflags & WMF_WRITEFLAGSONLY) { + physaddr = pt->pt_pt[pde][pte] & I386_VM_ADDR_MASK; + } + + if(writemapflags & WMF_FREE) { + printf("pt_writemap: should free 0x%lx\n", physaddr); + } /* Write pagetable entry. */ pt->pt_pt[pde][pte] = (physaddr & I386_VM_ADDR_MASK) | flags; physaddr += I386_PAGE_SIZE; v += I386_PAGE_SIZE; - PT_SANE(pt); } - SANITYCHECK(SCL_FUNCTIONS); - PT_SANE(pt); return OK; } @@ -488,7 +531,14 @@ PUBLIC int pt_new(pt_t *pt) */ int i; - if(!(pt->pt_dir = vm_allocpages(&pt->pt_dir_phys, 1, VMP_PAGEDIR))) { + /* Don't ever re-allocate/re-move a certain process slot's + * page directory once it's been created. This is a fraction + * faster, but also avoids having to invalidate the page + * mappings from in-kernel page tables pointing to + * the page directories (the page_directories data). + */ + if(!pt->pt_dir && + !(pt->pt_dir = vm_allocpages(&pt->pt_dir_phys, 1, VMP_PAGEDIR))) { return ENOMEM; } @@ -520,13 +570,14 @@ PUBLIC void pt_init(void) */ pt_t *newpt; int s, r; - vir_bytes v; + vir_bytes v, kpagedir; phys_bytes lo, hi; vir_bytes extra_clicks; u32_t moveup = 0; - - global_bit_ok = _cpufeature(_CPUF_I386_PGE); - bigpage_ok = _cpufeature(_CPUF_I386_PSE); + int global_bit_ok = 0; + int free_pde; + int p; + vir_bytes kernlimit; /* Shorthand. */ newpt = &vmp->vm_pt; @@ -541,12 +592,37 @@ PUBLIC void pt_init(void) } missing_spares = 0; - - /* Make new page table for ourselves, partly copied - * from the current one. - */ - if(pt_new(newpt) != OK) - vm_panic("pt_init: pt_new failed", NO_NUM); + + /* global bit and 4MB pages available? */ + global_bit_ok = _cpufeature(_CPUF_I386_PGE); + bigpage_ok = _cpufeature(_CPUF_I386_PSE); + + /* Set bit for PTE's and PDE's if available. */ + if(global_bit_ok) + global_bit = I386_VM_GLOBAL; + + /* Figure out kernel pde slot. */ + { + int pde1, pde2; + pde1 = I386_VM_PDE(KERNEL_TEXT); + pde2 = I386_VM_PDE(KERNEL_DATA+KERNEL_DATA_LEN); + if(pde1 != pde2) + vm_panic("pt_init: kernel too big", NO_NUM); + + /* Map in kernel with this single pde value if 4MB pages + * supported. + */ + kern_pde_val = (KERNEL_TEXT & I386_VM_ADDR_MASK_4MB) | + I386_VM_BIGPAGE| + I386_VM_USER| + I386_VM_PRESENT|I386_VM_WRITE|global_bit; + kernel_pde = pde1; + vm_assert(kernel_pde >= 0); + free_pde = kernel_pde+1; + } + + /* First unused pde. */ + proc_pde = free_pde; /* Initial (current) range of our virtual address space. */ lo = CLICK2ABS(vmp->vm_arch.vm_seg[T].mem_phys); @@ -562,21 +638,27 @@ PUBLIC void pt_init(void) vm_assert(!(lo % I386_PAGE_SIZE)); vm_assert(!(moveup % I386_PAGE_SIZE)); } + + /* Make new page table for ourselves, partly copied + * from the current one. + */ + if(pt_new(newpt) != OK) + vm_panic("pt_init: pt_new failed", NO_NUM); + + /* Old position mapped in? */ + pt_check(vmp); /* Set up mappings for VM process. */ for(v = lo; v < hi; v += I386_PAGE_SIZE) { phys_bytes addr; u32_t flags; - /* We have to write the old and new position in the PT, + /* We have to write the new position in the PT, * so we can move our segments. */ if(pt_writemap(newpt, v+moveup, v, I386_PAGE_SIZE, I386_VM_PRESENT|I386_VM_WRITE|I386_VM_USER, 0) != OK) vm_panic("pt_init: pt_writemap failed", NO_NUM); - if(pt_writemap(newpt, v, v, I386_PAGE_SIZE, - I386_VM_PRESENT|I386_VM_WRITE|I386_VM_USER, 0) != OK) - vm_panic("pt_init: pt_writemap failed", NO_NUM); } /* Move segments up too. */ @@ -584,21 +666,14 @@ PUBLIC void pt_init(void) vmp->vm_arch.vm_seg[D].mem_phys += ABS2CLICK(moveup); vmp->vm_arch.vm_seg[S].mem_phys += ABS2CLICK(moveup); -#if 0 - /* Map in kernel. */ - if(pt_mapkernel(newpt) != OK) - vm_panic("pt_init: pt_mapkernel failed", NO_NUM); - /* Allocate us a page table in which to remember page directory * pointers. */ if(!(page_directories = vm_allocpages(&page_directories_phys, 1, VMP_PAGETABLE))) vm_panic("no virt addr for vm mappings", NO_NUM); -#endif - - /* Give our process the new, copied, private page table. */ - pt_bind(newpt, vmp); + + memset(page_directories, 0, I386_PAGE_SIZE); /* Increase our hardware data segment to create virtual address * space above our stack. We want to increase it to VM_DATATOP, @@ -614,19 +689,6 @@ PUBLIC void pt_init(void) (vmp->vm_arch.vm_seg[S].mem_vir + vmp->vm_arch.vm_seg[S].mem_len) << CLICK_SHIFT; - if((s=sys_newmap(VM_PROC_NR, vmp->vm_arch.vm_seg)) != OK) - vm_panic("VM: pt_init: sys_newmap failed", s); - - /* Back to reality - this is where the stack actually is. */ - vmp->vm_arch.vm_seg[S].mem_len -= extra_clicks; - - /* Wipe old mappings from VM. */ - for(v = lo; v < hi; v += I386_PAGE_SIZE) { - if(pt_writemap(newpt, v, MAP_NONE, I386_PAGE_SIZE, - 0, WMF_OVERWRITE) != OK) - vm_panic("pt_init: pt_writemap failed", NO_NUM); - } - /* Where our free virtual address space starts. * This is only a hint to the VM system. */ @@ -635,17 +697,49 @@ PUBLIC void pt_init(void) /* Let other functions know VM now has a private page table. */ vmp->vm_flags |= VMF_HASPT; - /* Reserve a page in our virtual address space that we - * can use to map in arbitrary physical pages. - */ - varmap_loc = findhole(newpt, I386_PAGE_SIZE, - arch_vir2map(vmp, vmp->vm_stacktop), - vmp->vm_arch.vm_data_top); - if(varmap_loc == NO_MEM) { - vm_panic("no virt addr for vm mappings", NO_NUM); - } - varmap = (unsigned char *) arch_map2vir(vmp, varmap_loc); + /* Find a PDE below processes available for mapping in the + * page directories (readonly). + */ + pagedir_pde = free_pde++; + pagedir_pde_val = (page_directories_phys & I386_VM_ADDR_MASK) | + I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE; + + /* Tell kernel about free pde's. */ + while(free_pde*I386_BIG_PAGE_SIZE < VM_PROCSTART) { + if((r=sys_vmctl(SELF, VMCTL_I386_FREEPDE, free_pde++)) != OK) { + vm_panic("VMCTL_I386_FREEPDE failed", r); + } + } + + /* first pde in use by process. */ + proc_pde = free_pde; + + kernlimit = free_pde*I386_BIG_PAGE_SIZE; + + /* Increase kernel segment to address this memory. */ + if((r=sys_vmctl(SELF, VMCTL_I386_KERNELLIMIT, kernlimit)) != OK) { + vm_panic("VMCTL_I386_KERNELLIMIT failed", r); + } + kpagedir = arch_map2vir(&vmproc[VMP_SYSTEM], + pagedir_pde*I386_BIG_PAGE_SIZE); + + /* Tell kernel how to get at the page directories. */ + if((r=sys_vmctl(SELF, VMCTL_I386_PAGEDIRS, kpagedir)) != OK) { + vm_panic("VMCTL_I386_KERNELLIMIT failed", r); + } + + /* Give our process the new, copied, private page table. */ + pt_mapkernel(newpt); /* didn't know about vm_dir pages earlier */ + pt_bind(newpt, vmp); + + /* Now actually enable paging. */ + if(sys_vmctl_enable_paging(vmp->vm_arch.vm_seg) != OK) + vm_panic("pt_init: enable paging failed", NO_NUM); + + /* Back to reality - this is where the stack actually is. */ + vmp->vm_arch.vm_seg[S].mem_len -= extra_clicks; + /* All OK. */ return; } @@ -656,24 +750,28 @@ PUBLIC void pt_init(void) *===========================================================================*/ PUBLIC int pt_bind(pt_t *pt, struct vmproc *who) { - int slot; + int slot, ispt; + u32_t phys; /* Basic sanity checks. */ vm_assert(who); vm_assert(who->vm_flags & VMF_INUSE); - if(pt) PT_SANE(pt); vm_assert(pt); -#if 0 slot = who->vm_slot; vm_assert(slot >= 0); vm_assert(slot < ELEMENTS(vmproc)); - vm_assert(!(pt->pt_dir_phys & ~I386_VM_ADDR_MASK)); + vm_assert(slot < I386_VM_PT_ENTRIES); - page_directories[slot] = (pt->pt_dir_phys & I386_VM_ADDR_MASK) | - (I386_VM_PRESENT|I386_VM_WRITE); -#endif + phys = pt->pt_dir_phys & I386_VM_ADDR_MASK; + vm_assert(pt->pt_dir_phys == phys); + + /* Update "page directory pagetable." */ + page_directories[slot] = phys | I386_VM_PRESENT|I386_VM_WRITE; +#if 0 + printf("VM: slot %d has pde val 0x%lx\n", slot, page_directories[slot]); +#endif /* Tell kernel about new page table root. */ return sys_vmctl(who->vm_endpoint, VMCTL_I386_SETCR3, pt ? pt->pt_dir_phys : 0); @@ -687,24 +785,10 @@ PUBLIC void pt_free(pt_t *pt) /* Free memory associated with this pagetable. */ int i; - PT_SANE(pt); - - for(i = 0; i < I386_VM_DIR_ENTRIES; i++) { - int p; - if(pt->pt_pt[i]) { - for(p = 0; p < I386_VM_PT_ENTRIES; p++) { - if((pt->pt_pt[i][p] & (PTF_MAPALLOC | I386_VM_PRESENT)) - == (PTF_MAPALLOC | I386_VM_PRESENT)) { - u32_t pa = I386_VM_PFA(pt->pt_pt[i][p]); - FREE_MEM(ABS2CLICK(pa), CLICKSPERPAGE); - } - } - vm_freepages((vir_bytes) pt->pt_pt[i], - I386_VM_PFA(pt->pt_dir[i]), 1, VMP_PAGETABLE); - } - } - - vm_freepages((vir_bytes) pt->pt_dir, pt->pt_dir_phys, 1, VMP_PAGEDIR); + for(i = 0; i < I386_VM_DIR_ENTRIES; i++) + if(pt->pt_pt[i]) + vm_freepages((vir_bytes) pt->pt_pt[i], + I386_VM_PFA(pt->pt_dir[i]), 1, VMP_PAGETABLE); return; } @@ -715,77 +799,51 @@ PUBLIC void pt_free(pt_t *pt) PUBLIC int pt_mapkernel(pt_t *pt) { int r; - static int pde = -1, do_bigpage = 0; - u32_t global = 0; - static u32_t kern_phys; static int printed = 0; - if(global_bit_ok) global = I386_VM_GLOBAL; - /* Any i386 page table needs to map in the kernel address space. */ vm_assert(vmproc[VMP_SYSTEM].vm_flags & VMF_INUSE); - if(pde == -1 && bigpage_ok) { - int pde1, pde2; - pde1 = I386_VM_PDE(KERNEL_TEXT); - pde2 = I386_VM_PDE(KERNEL_DATA+KERNEL_DATA_LEN); - if(pde1 != pde2) { - printf("VM: pt_mapkernel: kernel too big?"); - bigpage_ok = 0; - } else { - kern_phys = KERNEL_TEXT & I386_VM_ADDR_MASK_4MB; - pde = pde1; - do_bigpage = 1; - vm_assert(pde >= 0); - } - } - - if(do_bigpage) { - pt->pt_dir[pde] = kern_phys | - I386_VM_BIGPAGE|I386_VM_PRESENT|I386_VM_WRITE|global; + if(bigpage_ok) { + if(kernel_pde >= 0) { + pt->pt_dir[kernel_pde] = kern_pde_val; + } else + vm_panic("VM: pt_mapkernel: no kernel pde", NO_NUM); } else { + vm_panic("VM: pt_mapkernel: no bigpage", NO_NUM); + /* Map in text. flags: don't write, supervisor only */ if((r=pt_writemap(pt, KERNEL_TEXT, KERNEL_TEXT, KERNEL_TEXT_LEN, - I386_VM_PRESENT|global, 0)) != OK) + I386_VM_PRESENT|global_bit, 0)) != OK) return r; /* Map in data. flags: read-write, supervisor only */ if((r=pt_writemap(pt, KERNEL_DATA, KERNEL_DATA, KERNEL_DATA_LEN, - I386_VM_PRESENT|I386_VM_WRITE|global, 0)) != OK) + I386_VM_PRESENT|I386_VM_WRITE, 0)) != OK) return r; } + if(pagedir_pde >= 0) { + /* Kernel also wants to know about all page directories. */ + pt->pt_dir[pagedir_pde] = pagedir_pde_val; + } + return OK; } /*===========================================================================* - * pt_freerange * + * pt_check * *===========================================================================*/ -PUBLIC void pt_freerange(pt_t *pt, vir_bytes low, vir_bytes high) +PUBLIC void pt_check(struct vmproc *vmp) { -/* Free memory allocated by pagetable functions in this range. */ - int pde; - u32_t v; - - PT_SANE(pt); - - for(v = low; v < high; v += I386_PAGE_SIZE) { - int pte; - pde = I386_VM_PDE(v); - pte = I386_VM_PTE(v); - if(!(pt->pt_dir[pde] & I386_VM_PRESENT)) - continue; - if((pt->pt_pt[pde][pte] & (PTF_MAPALLOC | I386_VM_PRESENT)) - == (PTF_MAPALLOC | I386_VM_PRESENT)) { - u32_t pa = I386_VM_PFA(pt->pt_pt[pde][pte]); - FREE_MEM(ABS2CLICK(pa), CLICKSPERPAGE); - pt->pt_pt[pde][pte] = 0; - } + phys_bytes hi; + hi = CLICK2ABS(vmp->vm_arch.vm_seg[S].mem_phys + + vmp->vm_arch.vm_seg[S].mem_len); + if(hi > (kernel_pde+1) * I386_BIG_PAGE_SIZE) { + printf("VM: %d doesn't fit in kernel range (0x%lx)\n", + vmp->vm_endpoint, hi); + vm_panic("boot time processes too big", NO_NUM); } - - PT_SANE(pt); - - return; } /*===========================================================================* @@ -796,82 +854,3 @@ PUBLIC void pt_cycle(void) vm_checkspares(); } -/* In sanity check mode, pages are mapped and unmapped explicitly, so - * unexpected double mappings (overwriting a page table entry) are caught. - * If not sanity checking, simply keep the page mapped in and overwrite - * the mapping entry; we need WMF_OVERWRITE for that in PHYS_MAP though. - */ -#if SANITYCHECKS -#define MAPFLAGS 0 -#else -#define MAPFLAGS WMF_OVERWRITE -#endif - -static u32_t ismapped = MAP_NONE; - -#define PHYS_MAP(a, o) \ -{ int r; \ - u32_t wantmapped; \ - vm_assert(varmap); \ - (o) = (a) % I386_PAGE_SIZE; \ - wantmapped = (a) - (o); \ - if(wantmapped != ismapped || ismapped == MAP_NONE) { \ - r = pt_writemap(&vmp->vm_pt, (vir_bytes) varmap_loc, \ - wantmapped, I386_PAGE_SIZE, \ - I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE, \ - MAPFLAGS); \ - if(r != OK) \ - vm_panic("PHYS_MAP: pt_writemap", NO_NUM); \ - ismapped = wantmapped; \ - /* pt_bind() flushes TLB. */ \ - pt_bind(&vmp->vm_pt, vmp); \ - } \ -} - -#define PHYSMAGIC 0x7b9a0590 - -#if SANITYCHECKS -#define PHYS_UNMAP if(OK != pt_writemap(&vmp->vm_pt, varmap_loc, MAP_NONE,\ - I386_PAGE_SIZE, 0, WMF_OVERWRITE)) { \ - vm_panic("PHYS_UNMAP: pt_writemap failed", NO_NUM); } - ismapped = MAP_NONE; -#endif - -#define PHYS_VAL(o) (* (phys_bytes *) (varmap + (o))) - - -/*===========================================================================* - * phys_writeaddr * - *===========================================================================*/ -PUBLIC void phys_writeaddr(phys_bytes addr, phys_bytes v1, phys_bytes v2) -{ - phys_bytes offset; - - SANITYCHECK(SCL_DETAIL); - PHYS_MAP(addr, offset); - PHYS_VAL(offset) = v1; - PHYS_VAL(offset + sizeof(phys_bytes)) = v2; -#if SANITYCHECKS - PHYS_VAL(offset + 2*sizeof(phys_bytes)) = PHYSMAGIC; - PHYS_UNMAP; -#endif - SANITYCHECK(SCL_DETAIL); -} - -/*===========================================================================* - * phys_readaddr * - *===========================================================================*/ -PUBLIC void phys_readaddr(phys_bytes addr, phys_bytes *v1, phys_bytes *v2) -{ - phys_bytes offset; - - SANITYCHECK(SCL_DETAIL); - PHYS_MAP(addr, offset); - *v1 = PHYS_VAL(offset); - *v2 = PHYS_VAL(offset + sizeof(phys_bytes)); -#if SANITYCHECKS - vm_assert(PHYS_VAL(offset + 2*sizeof(phys_bytes)) == PHYSMAGIC); - PHYS_UNMAP; -#endif - SANITYCHECK(SCL_DETAIL); -} diff --git a/servers/vm/i386/pagetable.h b/servers/vm/i386/pagetable.h index b26ac9cd3..42fb7d7e6 100644 --- a/servers/vm/i386/pagetable.h +++ b/servers/vm/i386/pagetable.h @@ -5,6 +5,8 @@ #include #include +#include "../vm.h" + /* An i386 pagetable. */ typedef struct { /* Directory entries in VM addr space - root of page table. */ @@ -34,5 +36,12 @@ typedef struct { */ #define PTF_ALLFLAGS (PTF_WRITE|PTF_PRESENT|PTF_USER|PTF_GLOBAL) +#if SANITYCHECKS +#define PT_SANE(p) { pt_sanitycheck((p), __FILE__, __LINE__); } +#else +#define PT_SANE(p) +#endif + #endif + diff --git a/servers/vm/i386/vm.c b/servers/vm/i386/vm.c index 1f55874cf..aa8286c28 100644 --- a/servers/vm/i386/vm.c +++ b/servers/vm/i386/vm.c @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -25,83 +26,18 @@ #include "memory.h" -#define PAGE_SIZE 4096 -#define PAGE_DIR_SIZE (1024*PAGE_SIZE) -#define PAGE_TABLE_COVER (1024*PAGE_SIZE) -/*=========================================================================* - * arch_init_vm * - *=========================================================================*/ -PUBLIC void arch_init_vm(mem_chunks) -struct memory mem_chunks[NR_MEMS]; -{ - phys_bytes high, bytes; - phys_clicks clicks, base_click; - unsigned pages; - int i, r; - - /* Compute the highest memory location */ - high= 0; - for (i= 0; i high) - high= mem_chunks[i].base + mem_chunks[i].size; - } - - high <<= CLICK_SHIFT; -#if VERBOSE_VM - printf("do_x86_vm: found high 0x%x\n", high); -#endif - - /* Rounding up */ - high= (high-1+PAGE_DIR_SIZE) & ~(PAGE_DIR_SIZE-1); - - /* The number of pages we need is one for the page directory, enough - * page tables to cover the memory, and one page for alignement. - */ - pages= 1 + (high + PAGE_TABLE_COVER-1)/PAGE_TABLE_COVER + 1; - bytes= pages*PAGE_SIZE; - clicks= (bytes + CLICK_SIZE-1) >> CLICK_SHIFT; - -#if VERBOSE_VM - printf("do_x86_vm: need %d pages\n", pages); - printf("do_x86_vm: need %d bytes\n", bytes); - printf("do_x86_vm: need %d clicks\n", clicks); -#endif - - for (i= 0; i= NR_MEMS) - panic("VM", "not enough memory for VM page tables?", NO_NUM); - base_click= mem_chunks[i].base; - mem_chunks[i].base += clicks; - mem_chunks[i].size -= clicks; - -#if VERBOSE_VM - printf("do_x86_vm: using 0x%x clicks @ 0x%x\n", clicks, base_click); -#endif - r= sys_vm_setbuf(base_click << CLICK_SHIFT, clicks << CLICK_SHIFT, - high); - if (r != 0) - printf("do_x86_vm: sys_vm_setbuf failed: %d\n", r); - -} - /*===========================================================================* * arch_map2vir * *===========================================================================*/ PUBLIC vir_bytes arch_map2vir(struct vmproc *vmp, vir_bytes addr) { - vir_bytes bottom = CLICK2ABS(vmp->vm_arch.vm_seg[D].mem_phys); + vir_bytes textstart = CLICK2ABS(vmp->vm_arch.vm_seg[T].mem_phys); + vir_bytes datastart = CLICK2ABS(vmp->vm_arch.vm_seg[D].mem_phys); - vm_assert(bottom <= addr); + /* Could be a text address. */ + vm_assert(datastart <= addr || textstart <= addr); - return addr - bottom; + return addr - datastart; } /*===========================================================================* @@ -113,3 +49,13 @@ PUBLIC vir_bytes arch_vir2map(struct vmproc *vmp, vir_bytes addr) return addr + bottom; } + +/*===========================================================================* + * arch_vir2map_text * + *===========================================================================*/ +PUBLIC vir_bytes arch_vir2map_text(struct vmproc *vmp, vir_bytes addr) +{ + vir_bytes bottom = CLICK2ABS(vmp->vm_arch.vm_seg[T].mem_phys); + + return addr + bottom; +} diff --git a/servers/vm/main.c b/servers/vm/main.c index 8b3b18d3a..910bc6cde 100644 --- a/servers/vm/main.c +++ b/servers/vm/main.c @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include @@ -45,6 +47,7 @@ typedef u32_t mask_t; #define MAXEPM (ANYEPM-1) #define EPM(e) ((1L) << ((e)-MINEPM)) #define EPMOK(mask, ep) (((mask) & EPM(ANYEPM)) || ((ep) >= MINEPM && (ep) <= MAXEPM && (EPM(ep) & (mask)))) +#define EPMANYOK(mask, ep) ((mask) & EPM(ANYEPM)) /* Table of calls and a macro to test for being in range. */ struct { @@ -76,10 +79,9 @@ PUBLIC int main(void) int result, who_e; #if SANITYCHECKS - nocheck = 0; - memcpy(data1, CHECKADDR, sizeof(data1)); + incheck = nocheck = 0; + FIXME("VM SANITYCHECKS are on"); #endif - SANITYCHECK(SCL_TOP); vm_paged = 1; env_parse("vm_paged", "d", 0, &vm_paged, 0, 1); @@ -87,10 +89,7 @@ PUBLIC int main(void) env_parse("vm_sanitychecklevel", "d", 0, &vm_sanitychecklevel, 0, SCL_MAX); #endif - SANITYCHECK(SCL_TOP); - vm_init(); - SANITYCHECK(SCL_TOP); /* This is VM's main loop. */ while (TRUE) { @@ -100,9 +99,6 @@ PUBLIC int main(void) if(missing_spares > 0) { pt_cycle(); /* pagetable code wants to be called */ } -#if SANITYCHECKS - slabstats(); -#endif SANITYCHECK(SCL_DETAIL); if ((r=receive(ANY, &msg)) != OK) @@ -114,21 +110,18 @@ PUBLIC int main(void) switch(msg.m_source) { case SYSTEM: /* Kernel wants to have memory ranges - * verified. + * verified, and/or pagefaults handled. */ do_memory(); break; + case HARDWARE: + do_pagefaults(); + break; case PM_PROC_NR: /* PM sends a notify() on shutdown, which * is OK and we ignore. */ break; - case HARDWARE: - /* This indicates a page fault has happened, - * which we have to handle. - */ - do_pagefaults(); - break; default: /* No-one else should send us notifies. */ printf("VM: ignoring notify() from %d\n", @@ -147,6 +140,26 @@ PUBLIC int main(void) printf("VM: restricted call %s from %d instead of 0x%lx\n", vm_calls[c].vmc_name, msg.m_source, vm_calls[c].vmc_callers); + } else if (EPMANYOK(vm_calls[c].vmc_callers, who_e) && + c != VM_MMAP-VM_RQ_BASE && + c != VM_MUNMAP_TEXT-VM_RQ_BASE && + c != VM_MUNMAP-VM_RQ_BASE) { + /* check VM acl, we care ANYEPM only, + * and omit other hard-coded permission checks. + */ + int n; + + if ((r = vm_isokendpt(who_e, &n)) != OK) + vm_panic("VM: from strange source.", who_e); + + if (!GET_BIT(vmproc[n].vm_call_priv_mask, c)) + printf("VM: restricted call %s from %d\n", + vm_calls[c].vmc_name, who_e); + else { + SANITYCHECK(SCL_FUNCTIONS); + result = vm_calls[c].vmc_func(&msg); + SANITYCHECK(SCL_FUNCTIONS); + } } else { SANITYCHECK(SCL_FUNCTIONS); result = vm_calls[c].vmc_func(&msg); @@ -171,12 +184,15 @@ PUBLIC int main(void) return(OK); } +extern int unmap_ok; + /*===========================================================================* * vm_init * *===========================================================================*/ PRIVATE void vm_init(void) { int s, i; + int click, clicksforgotten = 0; struct memory mem_chunks[NR_MEMS]; struct boot_image image[NR_BOOT_PROCS]; struct boot_image *ip; @@ -241,37 +257,12 @@ PRIVATE void vm_init(void) vmp->vm_flags |= VMF_SEPARATE; } - - /* Let architecture-dependent VM initialization use some memory. */ - arch_init_vm(mem_chunks); - /* Architecture-dependent initialization. */ pt_init(); /* Initialize tables to all physical memory. */ mem_init(mem_chunks); - - /* Bits of code need to know where a process can - * start in a pagetable. - */ - kernel_top_bytes = find_kernel_top(); - - /* Can first kernel pages of code and data be (left) mapped out? - * If so, change the SYSTEM process' memory map to reflect this - * (future mappings of SYSTEM into other processes will not include - * first pages), and free the first pages. - */ - if(vm_paged && sys_vmctl(SELF, VMCTL_NOPAGEZERO, 0) == OK) { - struct vmproc *vmp; - vmp = &vmproc[VMP_SYSTEM]; - if(vmp->vm_arch.vm_seg[T].mem_len > 0) { -#define DIFF CLICKSPERPAGE - vmp->vm_arch.vm_seg[T].mem_phys += DIFF; - vmp->vm_arch.vm_seg[T].mem_len -= DIFF; - } - vmp->vm_arch.vm_seg[D].mem_phys += DIFF; - vmp->vm_arch.vm_seg[D].mem_len -= DIFF; - } + meminit_done = 1; /* Give these processes their own page table. */ for (ip = &image[0]; ip < &image[NR_BOOT_PROCS]; ip++) { @@ -283,14 +274,22 @@ PRIVATE void vm_init(void) GETVMP(vmp, ip->proc_nr); + if(!(ip->flags & PROC_FULLVM)) { + /* See if this process fits in kernel + * mapping. VM has its own pagetable, + * don't check it. + */ + if(!(vmp->vm_flags & VMF_HASPT)) { + pt_check(vmp); + } + continue; + } + old_stack = vmp->vm_arch.vm_seg[S].mem_vir + vmp->vm_arch.vm_seg[S].mem_len - vmp->vm_arch.vm_seg[D].mem_len; - if(!(ip->flags & PROC_FULLVM)) - continue; - if(pt_new(&vmp->vm_pt) != OK) vm_panic("vm_init: no new pagetable", NO_NUM); #define BASICSTACK VM_PAGE_SIZE @@ -305,7 +304,7 @@ PRIVATE void vm_init(void) vmp->vm_arch.vm_seg[D].mem_len, old_stack); - proc_new(vmp, + if(proc_new(vmp, VM_PROCSTART, CLICK2ABS(vmp->vm_arch.vm_seg[T].mem_len), CLICK2ABS(vmp->vm_arch.vm_seg[D].mem_len), @@ -315,7 +314,9 @@ PRIVATE void vm_init(void) vmp->vm_arch.vm_seg[D].mem_len) - BASICSTACK, CLICK2ABS(vmp->vm_arch.vm_seg[T].mem_phys), CLICK2ABS(vmp->vm_arch.vm_seg[D].mem_phys), - VM_STACKTOP); + VM_STACKTOP) != OK) { + vm_panic("failed proc_new for boot process", NO_NUM); + } } /* Set up table of calls. */ @@ -347,6 +348,7 @@ PRIVATE void vm_init(void) CALLMAP(VM_DELDMA, do_deldma, PM_PROC_NR); CALLMAP(VM_GETDMA, do_getdma, PM_PROC_NR); CALLMAP(VM_ALLOCMEM, do_allocmem, PM_PROC_NR); + CALLMAP(VM_NOTIFY_SIG, do_notify_sig, PM_PROC_NR); /* Physical mapping requests. * tty (for /dev/video) does this. @@ -359,22 +361,34 @@ PRIVATE void vm_init(void) /* Requests from userland (source unrestricted). */ CALLMAP(VM_MMAP, do_mmap, ANYEPM); + CALLMAP(VM_MUNMAP, do_munmap, ANYEPM); + CALLMAP(VM_MUNMAP_TEXT, do_munmap, ANYEPM); + CALLMAP(VM_REMAP, do_remap, ANYEPM); + CALLMAP(VM_GETPHYS, do_get_phys, ANYEPM); + CALLMAP(VM_SHM_UNMAP, do_shared_unmap, ANYEPM); + CALLMAP(VM_GETREF, do_get_refcount, ANYEPM); + CALLMAP(VM_CTL, do_ctl, ANYEPM); + + /* Request only from IPC server */ + CALLMAP(VM_QUERY_EXIT, do_query_exit, ANYEPM); /* Requests (actually replies) from VFS (restricted to VFS only). */ CALLMAP(VM_VFS_REPLY_OPEN, do_vfs_reply, VFS_PROC_NR); CALLMAP(VM_VFS_REPLY_MMAP, do_vfs_reply, VFS_PROC_NR); CALLMAP(VM_VFS_REPLY_CLOSE, do_vfs_reply, VFS_PROC_NR); + /* Requests from RS */ + CALLMAP(VM_RS_SET_PRIV, do_rs_set_priv, RS_PROC_NR); + /* Sanity checks */ if(find_kernel_top() >= VM_PROCSTART) vm_panic("kernel loaded too high", NO_NUM); -} -#if 0 -void kputc(int c) -{ - if(c == '\n') - ser_putc('\r'); - ser_putc(c); + /* Initialize the structures for queryexit */ + init_query_exit(); + + /* Unmap our own low pages. */ + unmap_ok = 1; + _minix_unmapzero(); } -#endif + diff --git a/servers/vm/mmap.c b/servers/vm/mmap.c index 1bbbf699e..9def7b482 100644 --- a/servers/vm/mmap.c +++ b/servers/vm/mmap.c @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -47,10 +48,6 @@ PUBLIC int do_mmap(message *m) vmp = &vmproc[n]; - if(m->VMM_FLAGS & MAP_LOWER16M) - printf("VM: warning for %d: MAP_LOWER16M not implemented\n", - m->m_source); - if(!(vmp->vm_flags & VMF_HASPT)) return ENXIO; @@ -66,14 +63,17 @@ PUBLIC int do_mmap(message *m) if(m->VMM_FLAGS & MAP_CONTIG) mfflags |= MF_CONTIG; if(m->VMM_FLAGS & MAP_PREALLOC) mfflags |= MF_PREALLOC; + if(m->VMM_FLAGS & MAP_LOWER16M) vrflags |= VR_LOWER16MB; + if(m->VMM_FLAGS & MAP_LOWER1M) vrflags |= VR_LOWER1MB; if(m->VMM_FLAGS & MAP_ALIGN64K) vrflags |= VR_PHYS64K; + if(m->VMM_FLAGS & MAP_SHARED) vrflags |= VR_SHARED; if(len % VM_PAGE_SIZE) len += VM_PAGE_SIZE - (len % VM_PAGE_SIZE); if(!(vr = map_page_region(vmp, - arch_vir2map(vmp, vmp->vm_stacktop), VM_DATATOP, len, MAP_NONE, - vrflags, mfflags))) { + arch_vir2map(vmp, vmp->vm_stacktop), + VM_DATATOP, len, MAP_NONE, vrflags, mfflags))) { return ENOMEM; } } else { @@ -84,6 +84,7 @@ PUBLIC int do_mmap(message *m) vm_assert(vr); m->VMM_RETADDR = arch_map2vir(vmp, vr->vaddr); + return OK; } @@ -153,9 +154,244 @@ PUBLIC int do_unmap_phys(message *m) return EINVAL; } - if(map_unmap_region(vmp, region) != OK) { + if(map_unmap_region(vmp, region, region->length) != OK) { + return EINVAL; + } + + return OK; +} + +/*===========================================================================* + * do_remap * + *===========================================================================*/ +PUBLIC int do_remap(message *m) +{ + int d, dn, s, sn; + vir_bytes da, sa, startv; + size_t size; + struct vir_region *vr, *region; + struct vmproc *dvmp, *svmp; + int r; + + d = m->VMRE_D; + s = m->VMRE_S; + da = (vir_bytes) m->VMRE_DA; + sa = (vir_bytes) m->VMRE_SA; + size = m->VMRE_SIZE; + + if ((r = vm_isokendpt(d, &dn)) != OK) + return EINVAL; + if ((r = vm_isokendpt(s, &sn)) != OK) + return EINVAL; + + dvmp = &vmproc[dn]; + svmp = &vmproc[sn]; + + /* da is not translated by arch_vir2map(), + * it's handled a little differently, + * since in map_remap(), we have to know + * about whether the user needs to bind to + * THAT address or be chosen by the system. + */ + sa = arch_vir2map(svmp, sa); + + if (!(region = map_lookup(svmp, sa))) + return EINVAL; + + if ((r = map_remap(dvmp, da, size, region, &startv)) != OK) + return r; + + m->VMRE_RETA = (char *) arch_map2vir(dvmp, startv); + return OK; +} + +/*===========================================================================* + * do_shared_unmap * + *===========================================================================*/ +PUBLIC int do_shared_unmap(message *m) +{ + int r, n; + struct vmproc *vmp; + endpoint_t target; + struct vir_region *vr; + vir_bytes addr; + + target = m->VMUN_ENDPT; + + if ((r = vm_isokendpt(target, &n)) != OK) + return EINVAL; + + vmp = &vmproc[n]; + + addr = arch_vir2map(vmp, m->VMUN_ADDR); + + if(!(vr = map_lookup(vmp, addr))) { + printf("VM: addr 0x%lx not found.\n", m->VMUN_ADDR); + return EFAULT; + } + + if(vr->vaddr != addr) { + printf("VM: wrong address for shared_unmap.\n"); + return EFAULT; + } + + if(!(vr->flags & VR_SHARED)) { + printf("VM: address does not point to shared region.\n"); + return EFAULT; + } + + if(map_unmap_region(vmp, vr, vr->length) != OK) + vm_panic("do_shared_unmap: map_unmap_region failed", NO_NUM); + + return OK; +} + +/*===========================================================================* + * do_get_phys * + *===========================================================================*/ +PUBLIC int do_get_phys(message *m) +{ + int r, n; + struct vmproc *vmp; + endpoint_t target; + phys_bytes ret; + vir_bytes addr; + + target = m->VMPHYS_ENDPT; + addr = m->VMPHYS_ADDR; + + if ((r = vm_isokendpt(target, &n)) != OK) + return EINVAL; + + vmp = &vmproc[n]; + addr = arch_vir2map(vmp, addr); + + r = map_get_phys(vmp, addr, &ret); + + m->VMPHYS_RETA = ret; + return r; +} + +/*===========================================================================* + * do_get_refcount * + *===========================================================================*/ +PUBLIC int do_get_refcount(message *m) +{ + int r, n; + struct vmproc *vmp; + endpoint_t target; + u8_t cnt; + vir_bytes addr; + + target = m->VMREFCNT_ENDPT; + addr = m->VMREFCNT_ADDR; + + if ((r = vm_isokendpt(target, &n)) != OK) return EINVAL; + + vmp = &vmproc[n]; + addr = arch_vir2map(vmp, addr); + + r = map_get_ref(vmp, addr, &cnt); + + m->VMREFCNT_RETC = cnt; + return r; +} + +/*===========================================================================* + * do_munmap * + *===========================================================================*/ +PUBLIC int do_munmap(message *m) +{ + int r, n; + struct vmproc *vmp; + vir_bytes addr, len; + struct vir_region *vr; + + if((r=vm_isokendpt(m->m_source, &n)) != OK) { + vm_panic("do_mmap: message from strange source", m->m_source); + } + + vmp = &vmproc[n]; + + if(!(vmp->vm_flags & VMF_HASPT)) + return ENXIO; + + if(m->m_type == VM_MUNMAP) { + addr = (vir_bytes) arch_vir2map(vmp, (vir_bytes) m->VMUM_ADDR); + } else if(m->m_type == VM_MUNMAP_TEXT) { + addr = (vir_bytes) arch_vir2map_text(vmp, (vir_bytes) m->VMUM_ADDR); + } else { + vm_panic("do_munmap: strange type", NO_NUM); + } + + if(!(vr = map_lookup(vmp, addr))) { + printf("VM: unmap: virtual address 0x%lx not found in %d\n", + m->VMUM_ADDR, vmp->vm_endpoint); + return EFAULT; + } + + len = m->VMUM_LEN; + len -= len % VM_PAGE_SIZE; + + if(addr != vr->vaddr || len > vr->length || len < VM_PAGE_SIZE) { + return EFAULT; + } + + if(map_unmap_region(vmp, vr, len) != OK) + vm_panic("do_munmap: map_unmap_region failed", NO_NUM); + + return OK; +} + +int unmap_ok = 0; + +/*===========================================================================* + * munmap_lin (used for overrides for VM) * + *===========================================================================*/ +PRIVATE int munmap_lin(vir_bytes addr, size_t len) +{ + if(addr % VM_PAGE_SIZE) { + printf("munmap_lin: offset not page aligned\n"); + return EFAULT; + } + + if(len % VM_PAGE_SIZE) { + printf("munmap_lin: len not page aligned\n"); + return EFAULT; + } + + if(pt_writemap(&vmproc[VM_PROC_NR].vm_pt, addr, MAP_NONE, len, 0, + WMF_OVERWRITE | WMF_FREE) != OK) { + printf("munmap_lin: pt_writemap failed\n"); + return EFAULT; } return OK; } + +/*===========================================================================* + * munmap (override for VM) * + *===========================================================================*/ +PUBLIC int munmap(void *addr, size_t len) +{ + vir_bytes laddr; + if(!unmap_ok) + return ENOSYS; + laddr = (vir_bytes) arch_vir2map(&vmproc[VM_PROC_NR], (vir_bytes) addr); + return munmap_lin(laddr, len); +} + +/*===========================================================================* + * munmap_text (override for VM) * + *===========================================================================*/ +PUBLIC int munmap_text(void *addr, size_t len) +{ + vir_bytes laddr; + if(!unmap_ok) + return ENOSYS; + laddr = (vir_bytes) arch_vir2map_text(&vmproc[VM_PROC_NR], + (vir_bytes) addr); + return munmap_lin(laddr, len); +} + diff --git a/servers/vm/pagefaults.c b/servers/vm/pagefaults.c index 3790df238..8ba94db4a 100644 --- a/servers/vm/pagefaults.c +++ b/servers/vm/pagefaults.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -61,12 +62,21 @@ PUBLIC void do_pagefaults(void) vir_bytes offset; int p, wr = PFERR_WRITE(err); +#if 0 + printf("VM: pagefault: ep %d 0x%lx %s\n", + ep, arch_map2vir(vmp, addr), pf_errstr(err)); +#endif + if(vm_isokendpt(ep, &p) != OK) vm_panic("do_pagefaults: endpoint wrong", ep); vmp = &vmproc[p]; vm_assert(vmp->vm_flags & VMF_INUSE); +#if 0 + map_printmap(vmp); +#endif + /* See if address is valid at all. */ if(!(region = map_lookup(vmp, addr))) { vm_assert(PFERR_NOPAGE(err)); @@ -75,6 +85,8 @@ PUBLIC void do_pagefaults(void) sys_sysctl_stacktrace(vmp->vm_endpoint); if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK) vm_panic("sys_kill failed", s); + if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, r)) != OK) + vm_panic("do_pagefaults: sys_vmctl failed", ep); continue; } @@ -83,6 +95,11 @@ PUBLIC void do_pagefaults(void) */ vm_assert(!(region->flags & VR_NOPF)); + /* We do not allow shared memory to cause pagefaults. + * These pages have to be pre-allocated. + */ + vm_assert(!(region->flags & VR_SHARED)); + /* If process was writing, see if it's writable. */ if(!(region->flags & VR_WRITABLE) && wr) { printf("VM: pagefault: SIGSEGV %d ro map 0x%lx %s\n", @@ -90,6 +107,8 @@ PUBLIC void do_pagefaults(void) sys_sysctl_stacktrace(vmp->vm_endpoint); if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK) vm_panic("sys_kill failed", s); + if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, r)) != OK) + vm_panic("do_pagefaults: sys_vmctl failed", ep); continue; } @@ -102,13 +121,23 @@ PUBLIC void do_pagefaults(void) sys_sysctl_stacktrace(vmp->vm_endpoint); if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK) vm_panic("sys_kill failed", s); + if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, r)) != OK) + vm_panic("do_pagefaults: sys_vmctl failed", ep); continue; } +#if 0 + printf("VM: map_pf done; ep %d 0x%lx %s\n", + ep, arch_map2vir(vmp, addr), pf_errstr(err)); + + printf("VM: handling pagefault OK: %d addr 0x%lx %s\n", + ep, arch_map2vir(vmp, addr), pf_errstr(err)); +#endif /* Pagefault is handled, so now reactivate the process. */ if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, r)) != OK) vm_panic("do_pagefaults: sys_vmctl failed", ep); + } return; @@ -120,55 +149,73 @@ PUBLIC void do_pagefaults(void) PUBLIC void do_memory(void) { int r, s; - endpoint_t who; + endpoint_t who, requestor; vir_bytes mem; vir_bytes len; int wrflag; - while((r=sys_vmctl_get_memreq(&who, &mem, &len, &wrflag)) == OK) { + while((r=sys_vmctl_get_memreq(&who, &mem, &len, &wrflag, &requestor)) + == OK) { int p, r = OK; - struct vir_region *region; struct vmproc *vmp; - vir_bytes o; if(vm_isokendpt(who, &p) != OK) vm_panic("do_memory: endpoint wrong", who); vmp = &vmproc[p]; - /* Page-align memory and length. */ - o = mem % VM_PAGE_SIZE; - mem -= o; - len += o; - o = len % VM_PAGE_SIZE; - if(o > 0) len += VM_PAGE_SIZE - o; - - if(!(region = map_lookup(vmp, mem))) { - printf("VM: do_memory: memory doesn't exist\n"); - r = EFAULT; - } else if(mem + len > region->vaddr + region->length) { - vm_assert(region->vaddr <= mem); - vm_panic("do_memory: not contained", NO_NUM); - } else if(!(region->flags & VR_WRITABLE) && wrflag) { - printf("VM: do_memory: write to unwritable map\n"); - r = EFAULT; - } else { - vir_bytes offset; - vm_assert(region->vaddr <= mem); - vm_assert(!(region->flags & VR_NOPF)); - vm_assert(!(region->vaddr % VM_PAGE_SIZE)); - offset = mem - region->vaddr; - - r = map_handle_memory(vmp, region, offset, len, wrflag); - } - - if(r != OK) { - printf("VM: memory range 0x%lx-0x%lx not available in %d\n", - arch_map2vir(vmp, mem), arch_map2vir(vmp, mem+len), - vmp->vm_endpoint); - } + r = handle_memory(vmp, mem, len, wrflag); - if(sys_vmctl(who, VMCTL_MEMREQ_REPLY, r) != OK) + if(sys_vmctl(requestor, VMCTL_MEMREQ_REPLY, r) != OK) vm_panic("do_memory: sys_vmctl failed", r); + +#if 0 + printf("VM: handling memory request %d done OK\n", + who); +#endif } } +int handle_memory(struct vmproc *vmp, vir_bytes mem, vir_bytes len, int wrflag) +{ + struct vir_region *region; + vir_bytes o; + int r; + +#if 0 + printf("VM: handling memory request: %d, 0x%lx-0x%lx, wr %d\n", + vmp->vm_endpoint, mem, mem+len, wrflag); +#endif + + /* Page-align memory and length. */ + o = mem % VM_PAGE_SIZE; + mem -= o; + len += o; + o = len % VM_PAGE_SIZE; + if(o > 0) len += VM_PAGE_SIZE - o; + + if(!(region = map_lookup(vmp, mem))) { + map_printmap(vmp); + printf("VM: do_memory: memory doesn't exist\n"); + r = EFAULT; + } else if(mem + len > region->vaddr + region->length) { + vm_assert(region->vaddr <= mem); + vm_panic("do_memory: not contained", NO_NUM); + } else if(!(region->flags & VR_WRITABLE) && wrflag) { + printf("VM: do_memory: write to unwritable map\n"); + r = EFAULT; + } else { + vir_bytes offset; + vm_assert(region->vaddr <= mem); + vm_assert(!(region->flags & VR_NOPF)); + vm_assert(!(region->vaddr % VM_PAGE_SIZE)); + offset = mem - region->vaddr; + + r = map_handle_memory(vmp, region, offset, len, wrflag); + } + + if(r != OK) { + printf("VM: memory range 0x%lx-0x%lx not available in %d\n", + arch_map2vir(vmp, mem), arch_map2vir(vmp, mem+len), + vmp->vm_endpoint); + } +} diff --git a/servers/vm/pagerange.h b/servers/vm/pagerange.h new file mode 100644 index 000000000..edf84e9c2 --- /dev/null +++ b/servers/vm/pagerange.h @@ -0,0 +1,24 @@ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct pagerange { + phys_bytes addr; /* in pages */ + phys_bytes size; /* in pages */ + + /* AVL fields */ + struct pagerange *less, *greater; /* children */ + int factor; /* AVL balance factor */ +} pagerange_t; diff --git a/servers/vm/physravl.c b/servers/vm/physravl.c new file mode 100644 index 000000000..5788f3472 --- /dev/null +++ b/servers/vm/physravl.c @@ -0,0 +1,8 @@ + +#include "sanitycheck.h" +#include "region.h" +#include "physravl.h" +#include "util.h" +#include "proto.h" +#include "cavl_impl.h" + diff --git a/servers/vm/physravl.h b/servers/vm/physravl.h new file mode 100644 index 000000000..969096453 --- /dev/null +++ b/servers/vm/physravl.h @@ -0,0 +1,24 @@ + +#ifndef _PHYSRAVL_H +#define _PHYSRAVL_H + +#define AVL_UNIQUE(id) physr_ ## id +#define AVL_HANDLE phys_region_t * +#define AVL_KEY phys_bytes +#define AVL_MAX_DEPTH 30 /* good for 2 million nodes */ +#define AVL_NULL NULL +#define AVL_GET_LESS(h, a) (h)->less +#define AVL_GET_GREATER(h, a) (h)->greater +#define AVL_SET_LESS(h1, h2) USE((h1), (h1)->less = h2;); +#define AVL_SET_GREATER(h1, h2) USE((h1), (h1)->greater = h2;); +#define AVL_GET_BALANCE_FACTOR(h) (h)->factor +#define AVL_SET_BALANCE_FACTOR(h, f) USE((h), (h)->factor = f;); +#define AVL_SET_ROOT(h, v) USE((h), (h)->root = v;); +#define AVL_COMPARE_KEY_KEY(k1, k2) ((k1) > (k2) ? 1 : ((k1) < (k2) ? -1 : 0)) +#define AVL_COMPARE_KEY_NODE(k, h) AVL_COMPARE_KEY_KEY((k), (h)->offset) +#define AVL_COMPARE_NODE_NODE(h1, h2) AVL_COMPARE_KEY_KEY((h1)->offset, (h2)->offset) +#define AVL_INSIDE_STRUCT char pad[4]; + +#include "cavl_if.h" + +#endif diff --git a/servers/vm/proto.h b/servers/vm/proto.h index 3e7158a0d..388397aeb 100644 --- a/servers/vm/proto.h +++ b/servers/vm/proto.h @@ -21,14 +21,18 @@ _PROTOTYPE( int do_deldma, (message *msg) ); _PROTOTYPE( int do_getdma, (message *msg) ); _PROTOTYPE( int do_allocmem, (message *msg) ); _PROTOTYPE( void release_dma, (struct vmproc *vmp) ); - +_PROTOTYPE( void memstats, (int *nodes, int *pages, int *largest) ); +_PROTOTYPE( void printmemstats, (void) ); +_PROTOTYPE( void usedpages_reset, (void) ); +_PROTOTYPE( int usedpages_add_f, (phys_bytes phys, phys_bytes len, + char *file, int line) ); _PROTOTYPE( void free_mem_f, (phys_clicks base, phys_clicks clicks) ); +#define usedpages_add(a, l) usedpages_add_f(a, l, __FILE__, __LINE__) #define ALLOC_MEM(clicks, flags) alloc_mem_f(clicks, flags) #define FREE_MEM(base, clicks) free_mem_f(base, clicks) _PROTOTYPE( void mem_init, (struct memory *chunks) ); -_PROTOTYPE( void memstats, (void) ); /* utility.c */ _PROTOTYPE( int get_mem_map, (int proc_nr, struct mem_map *mem_map) ); @@ -37,6 +41,7 @@ _PROTOTYPE( void reserve_proc_mem, (struct memory *mem_chunks, struct mem_map *map_ptr)); _PROTOTYPE( int vm_isokendpt, (endpoint_t ep, int *proc) ); _PROTOTYPE( int get_stack_ptr, (int proc_nr, vir_bytes *sp) ); +_PROTOTYPE( int do_ctl, (message *) ); /* exit.c */ _PROTOTYPE( void clear_proc, (struct vmproc *vmp) ); @@ -74,16 +79,24 @@ _PROTOTYPE( int vfs_close, (struct vmproc *for_who, callback_t callback, /* mmap.c */ _PROTOTYPE(int do_mmap, (message *msg) ); +_PROTOTYPE(int do_munmap, (message *msg) ); _PROTOTYPE(int do_map_phys, (message *msg) ); _PROTOTYPE(int do_unmap_phys, (message *msg) ); +_PROTOTYPE(int do_remap, (message *m) ); +_PROTOTYPE(int do_get_phys, (message *m) ); +_PROTOTYPE(int do_shared_unmap, (message *m) ); +_PROTOTYPE(int do_get_refcount, (message *m) ); /* pagefaults.c */ _PROTOTYPE( void do_pagefaults, (void) ); _PROTOTYPE( void do_memory, (void) ); _PROTOTYPE( char *pf_errstr, (u32_t err)); +_PROTOTYPE( int handle_memory, (struct vmproc *vmp, vir_bytes mem, + vir_bytes len, int wrflag)); /* $(ARCH)/pagetable.c */ _PROTOTYPE( void pt_init, (void) ); +_PROTOTYPE( void pt_check, (struct vmproc *vmp) ); _PROTOTYPE( int pt_new, (pt_t *pt) ); _PROTOTYPE( void pt_free, (pt_t *pt) ); _PROTOTYPE( void pt_freerange, (pt_t *pt, vir_bytes lo, vir_bytes hi) ); @@ -93,8 +106,8 @@ _PROTOTYPE( int pt_bind, (pt_t *pt, struct vmproc *who) ); _PROTOTYPE( void *vm_allocpages, (phys_bytes *p, int pages, int cat)); _PROTOTYPE( void pt_cycle, (void)); _PROTOTYPE( int pt_mapkernel, (pt_t *pt)); -_PROTOTYPE( void phys_readaddr, (phys_bytes addr, phys_bytes *v1, phys_bytes *v2)); -_PROTOTYPE( void phys_writeaddr, (phys_bytes addr, phys_bytes v1, phys_bytes v2)); +_PROTOTYPE( void vm_pagelock, (void *vir, int lockflag) ); + #if SANITYCHECKS _PROTOTYPE( void pt_sanitycheck, (pt_t *pt, char *file, int line) ); #endif @@ -106,18 +119,14 @@ _PROTOTYPE( int arch_get_pagefault, (endpoint_t *who, vir_bytes *addr, u32_t *er _PROTOTYPE(void *slaballoc,(int bytes)); _PROTOTYPE(void slabfree,(void *mem, int bytes)); _PROTOTYPE(void slabstats,(void)); +_PROTOTYPE(void slab_sanitycheck, (char *file, int line)); #define SLABALLOC(var) (var = slaballoc(sizeof(*var))) #define SLABFREE(ptr) slabfree(ptr, sizeof(*(ptr))) #if SANITYCHECKS -_PROTOTYPE(int slabsane,(void *mem, int bytes)); -#define SLABSANE(ptr) { \ - if(!slabsane(ptr, sizeof(*(ptr)))) { \ - printf("VM:%s:%d: SLABSANE(%s)\n", __FILE__, __LINE__, #ptr); \ - vm_panic("SLABSANE failed", NO_NUM); \ - } \ -} -#else -#define SLABSANE(ptr) + +_PROTOTYPE(void slabunlock,(void *mem, int bytes)); +_PROTOTYPE(void slablock,(void *mem, int bytes)); +_PROTOTYPE(int slabsane_f,(char *file, int line, void *mem, int bytes)); #endif /* region.c */ @@ -127,7 +136,7 @@ _PROTOTYPE(struct vir_region * map_page_region,(struct vmproc *vmp, \ _PROTOTYPE(struct vir_region * map_proc_kernel,(struct vmproc *dst)); _PROTOTYPE(int map_region_extend,(struct vmproc *vmp, struct vir_region *vr, vir_bytes delta)); _PROTOTYPE(int map_region_shrink,(struct vir_region *vr, vir_bytes delta)); -_PROTOTYPE(int map_unmap_region,(struct vmproc *vmp, struct vir_region *vr)); +_PROTOTYPE(int map_unmap_region,(struct vmproc *vmp, struct vir_region *vr, vir_bytes len)); _PROTOTYPE(int map_free_proc,(struct vmproc *vmp)); _PROTOTYPE(int map_proc_copy,(struct vmproc *dst, struct vmproc *src)); _PROTOTYPE(struct vir_region *map_lookup,(struct vmproc *vmp, vir_bytes addr)); @@ -135,11 +144,17 @@ _PROTOTYPE(int map_pf,(struct vmproc *vmp, struct vir_region *region, vir_bytes offset, int write)); _PROTOTYPE(int map_handle_memory,(struct vmproc *vmp, struct vir_region *region, vir_bytes offset, vir_bytes len, int write)); +_PROTOTYPE(void map_printmap, (struct vmproc *vmp)); +_PROTOTYPE(int map_writept, (struct vmproc *vmp)); +_PROTOTYPE(void printregionstats, (struct vmproc *vmp)); _PROTOTYPE(struct vir_region * map_region_lookup_tag, (struct vmproc *vmp, u32_t tag)); _PROTOTYPE(void map_region_set_tag, (struct vir_region *vr, u32_t tag)); _PROTOTYPE(u32_t map_region_get_tag, (struct vir_region *vr)); - +_PROTOTYPE(int map_remap, (struct vmproc *dvmp, vir_bytes da, size_t size, + struct vir_region *region, vir_bytes *r)); +_PROTOTYPE(int map_get_phys, (struct vmproc *vmp, vir_bytes addr, phys_bytes *r)); +_PROTOTYPE(int map_get_ref, (struct vmproc *vmp, vir_bytes addr, u8_t *cnt)); #if SANITYCHECKS _PROTOTYPE(void map_sanitycheck,(char *file, int line)); @@ -149,4 +164,12 @@ _PROTOTYPE(void map_sanitycheck,(char *file, int line)); _PROTOTYPE( void arch_init_vm, (struct memory mem_chunks[NR_MEMS])); _PROTOTYPE( vir_bytes, arch_map2vir(struct vmproc *vmp, vir_bytes addr)); _PROTOTYPE( vir_bytes, arch_vir2map(struct vmproc *vmp, vir_bytes addr)); +_PROTOTYPE( vir_bytes, arch_vir2map_text(struct vmproc *vmp, vir_bytes addr)); + +/* rs.c */ +_PROTOTYPE(int do_rs_set_priv, (message *m)); +/* queryexit.c */ +_PROTOTYPE(int do_query_exit, (message *m)); +_PROTOTYPE(int do_notify_sig, (message *m)); +_PROTOTYPE(void init_query_exit, (void)); diff --git a/servers/vm/queryexit.c b/servers/vm/queryexit.c new file mode 100644 index 000000000..339d53789 --- /dev/null +++ b/servers/vm/queryexit.c @@ -0,0 +1,123 @@ + +#define _SYSTEM 1 + +#define VERBOSE 0 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "glo.h" +#include "proto.h" +#include "util.h" + +struct query_exit_struct { + int avail; + endpoint_t ep; +}; +static struct query_exit_struct array[NR_PROCS]; + +/*===========================================================================* + * do_query_exit * + *===========================================================================*/ +PUBLIC int do_query_exit(message *m) +{ + int i, nr; + endpoint_t ep; + + for (i = 0; i < NR_PROCS; i++) { + if (!array[i].avail) { + array[i].avail = 1; + ep = array[i].ep; + array[i].ep = 0; + + break; + } + } + + nr = 0; + for (i = 0; i < NR_PROCS; i++) { + if (!array[i].avail) + nr++; + } + m->VM_QUERY_RET_PT = ep; + m->VM_QUERY_IS_MORE = (nr > 0); + + return OK; +} + +/*===========================================================================* + * do_notify_sig * + *===========================================================================*/ +PUBLIC int do_notify_sig(message *m) +{ + int i, avails = 0; + endpoint_t ep = m->VM_NOTIFY_SIG_ENDPOINT; + endpoint_t ipc_ep = m->VM_NOTIFY_SIG_IPC; + int r; + + for (i = 0; i < NR_PROCS; i++) { + /* its signal is already here */ + if (!array[i].avail && array[i].ep == ep) + goto out; + if (array[i].avail) + avails++; + } + if (!avails) { + /* no slot for signals, impossible */ + printf("VM: no slot for signals!\n"); + return ENOMEM; + } + + for (i = 0; i < NR_PROCS; i++) { + if (array[i].avail) { + array[i].avail = 0; + array[i].ep = ep; + + break; + } + } + +out: + /* only care when IPC server starts up, + * and bypass the process to be signal is IPC itself. + */ + if (ipc_ep != 0 && ep != ipc_ep) { + r = notify(ipc_ep); + if (r != OK) + printf("VM: notify IPC error!\n"); + } + return OK; +} + +/*===========================================================================* + * init_query_exit * + *===========================================================================*/ +PUBLIC void init_query_exit(void) +{ + int i; + + for (i = 0; i < NR_PROCS; i++) { + array[i].avail = 1; + array[i].ep = 0; + } +} + diff --git a/servers/vm/region.c b/servers/vm/region.c index e327c455a..d71bfe459 100644 --- a/servers/vm/region.c +++ b/servers/vm/region.c @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include @@ -24,15 +26,23 @@ #include "glo.h" #include "region.h" #include "sanitycheck.h" +#include "physravl.h" -FORWARD _PROTOTYPE(int map_new_physblock, (struct vmproc *vmp, +/* Should a physblock be mapped writable? */ +#define WRITABLE(r, pb) \ + (((r)->flags & (VR_DIRECT | VR_SHARED)) || \ + (((r)->flags & VR_WRITABLE) && (pb)->refcount == 1)) + +FORWARD _PROTOTYPE(struct phys_region *map_new_physblock, (struct vmproc *vmp, struct vir_region *region, vir_bytes offset, vir_bytes length, - phys_bytes what, struct phys_region *physhint)); + phys_bytes what)); + +FORWARD _PROTOTYPE(int map_ph_writept, (struct vmproc *vmp, struct vir_region *vr, + struct phys_region *pr)); FORWARD _PROTOTYPE(int map_copy_ph_block, (struct vmproc *vmp, struct vir_region *region, struct phys_region *ph)); -FORWARD _PROTOTYPE(struct vir_region *map_copy_region, (struct vir_region *)); -FORWARD _PROTOTYPE(void map_printmap, (struct vmproc *vmp)); +FORWARD _PROTOTYPE(struct vir_region *map_copy_region, (struct vmproc *vmp, struct vir_region *vr)); PRIVATE char *map_name(struct vir_region *vr) { @@ -52,25 +62,30 @@ PRIVATE char *map_name(struct vir_region *vr) /*===========================================================================* * map_printmap * *===========================================================================*/ -PRIVATE void map_printmap(vmp) +PUBLIC void map_printmap(vmp) struct vmproc *vmp; { struct vir_region *vr; + physr_iter iter; + printf("memory regions in process %d:\n", vmp->vm_endpoint); for(vr = vmp->vm_regions; vr; vr = vr->next) { struct phys_region *ph; int nph = 0; + printf("map_printmap: map_name: %s\n", map_name(vr)); printf("\t0x%lx - 0x%lx (len 0x%lx), %s\n", vr->vaddr, vr->vaddr + vr->length, vr->length, - vr->vaddr + vr->length, map_name(vr)); + map_name(vr)); printf("\t\tphysical: "); - for(ph = vr->first; ph; ph = ph->next) { + physr_start_iter_least(vr->phys, &iter); + while((ph = physr_get_iter(&iter))) { printf("0x%lx-0x%lx (refs %d): phys 0x%lx ", - vr->vaddr + ph->ph->offset, - vr->vaddr + ph->ph->offset + ph->ph->length, + vr->vaddr + ph->offset, + vr->vaddr + ph->offset + ph->ph->length, ph->ph->refcount, ph->ph->phys); nph++; + physr_incr_iter(&iter); } printf(" (phregions %d)\n", nph); } @@ -91,27 +106,35 @@ PUBLIC void map_sanitycheck(char *file, int line) * all processes. */ #define ALLREGIONS(regioncode, physcode) \ - for(vmp = vmproc; vmp <= &vmproc[_NR_PROCS]; vmp++) { \ + for(vmp = vmproc; vmp < &vmproc[VMP_NR]; vmp++) { \ struct vir_region *vr; \ if(!(vmp->vm_flags & VMF_INUSE)) \ continue; \ for(vr = vmp->vm_regions; vr; vr = vr->next) { \ + physr_iter iter; \ struct phys_region *pr; \ regioncode; \ - for(pr = vr->first; pr; pr = pr->next) { \ + physr_start_iter_least(vr->phys, &iter); \ + while((pr = physr_get_iter(&iter))) { \ physcode; \ + physr_incr_iter(&iter); \ } \ } \ } -#define MYSLABSANE(s) MYASSERT(slabsane(s, sizeof(*(s)))) +#define MYSLABSANE(s) MYASSERT(slabsane_f(__FILE__, __LINE__, s, sizeof(*(s)))) /* Basic pointers check. */ ALLREGIONS(MYSLABSANE(vr),MYSLABSANE(pr); MYSLABSANE(pr->ph);MYSLABSANE(pr->parent)); - ALLREGIONS(MYASSERT(vr->parent == vmp),MYASSERT(pr->parent == vr);); + ALLREGIONS(/* MYASSERT(vr->parent == vmp) */,MYASSERT(pr->parent == vr);); /* Do counting for consistency check. */ - ALLREGIONS(;,pr->ph->seencount = 0;); - ALLREGIONS(;,pr->ph->seencount++;); + ALLREGIONS(;,USE(pr->ph, pr->ph->seencount = 0;);); + ALLREGIONS(;,USE(pr->ph, pr->ph->seencount++;); + if(pr->ph->seencount == 1) { + MYASSERT(usedpages_add(pr->ph->phys, + pr->ph->length) == OK); + } + ); /* Do consistency check. */ ALLREGIONS(if(vr->next) { @@ -123,8 +146,8 @@ PUBLIC void map_sanitycheck(char *file, int line) map_printmap(vmp); printf("ph in vr 0x%lx: 0x%lx-0x%lx refcount %d " "but seencount %lu\n", - vr, pr->ph->offset, - pr->ph->offset + pr->ph->length, + vr, pr->offset, + pr->offset + pr->ph->length, pr->ph->refcount, pr->ph->seencount); } { @@ -147,8 +170,9 @@ PUBLIC void map_sanitycheck(char *file, int line) MYASSERT(pr->ph->refcount == n_others); } MYASSERT(pr->ph->refcount == pr->ph->seencount); - MYASSERT(!(pr->ph->offset % VM_PAGE_SIZE)); + MYASSERT(!(pr->offset % VM_PAGE_SIZE)); MYASSERT(!(pr->ph->length % VM_PAGE_SIZE));); + } #endif @@ -156,34 +180,23 @@ PUBLIC void map_sanitycheck(char *file, int line) /*=========================================================================* * map_ph_writept * *=========================================================================*/ -PUBLIC int map_ph_writept(struct vmproc *vmp, struct vir_region *vr, - struct phys_block *pb, int *ropages, int *rwpages) +PRIVATE int map_ph_writept(struct vmproc *vmp, struct vir_region *vr, + struct phys_region *pr) { int rw; + struct phys_block *pb = pr->ph; vm_assert(!(vr->vaddr % VM_PAGE_SIZE)); vm_assert(!(pb->length % VM_PAGE_SIZE)); - vm_assert(!(pb->offset % VM_PAGE_SIZE)); + vm_assert(!(pr->offset % VM_PAGE_SIZE)); vm_assert(pb->refcount > 0); - if((vr->flags & VR_WRITABLE) - && (pb->refcount == 1 || (vr->flags & VR_DIRECT))) + if(WRITABLE(vr, pb)) rw = PTF_WRITE; else rw = 0; -#if SANITYCHECKS - if(rwpages && ropages && (vr->flags & VR_ANON)) { - int pages; - pages = pb->length / VM_PAGE_SIZE; - if(rw) - (*rwpages) += pages; - else - (*ropages) += pages; - } -#endif - - if(pt_writemap(&vmp->vm_pt, vr->vaddr + pb->offset, + if(pt_writemap(&vmp->vm_pt, vr->vaddr + pr->offset, pb->phys, pb->length, PTF_PRESENT | PTF_USER | rw, WMF_OVERWRITE) != OK) { printf("VM: map_writept: pt_writemap failed\n"); @@ -194,20 +207,13 @@ PUBLIC int map_ph_writept(struct vmproc *vmp, struct vir_region *vr, } /*===========================================================================* - * map_page_region * + * region_find_slot * *===========================================================================*/ -PUBLIC struct vir_region *map_page_region(vmp, minv, maxv, length, - what, flags, mapflags) -struct vmproc *vmp; -vir_bytes minv; -vir_bytes maxv; -vir_bytes length; -vir_bytes what; -u32_t flags; -int mapflags; +PRIVATE vir_bytes region_find_slot(struct vmproc *vmp, + vir_bytes minv, vir_bytes maxv, vir_bytes length, + struct vir_region **prev) { - struct vir_region *vr, *prevregion = NULL, *newregion, - *firstregion = vmp->vm_regions; + struct vir_region *firstregion = vmp->vm_regions, *prevregion = NULL; vir_bytes startv; int foundflag = 0; @@ -227,10 +233,10 @@ int mapflags; /* Sanity check. */ if(maxv <= minv) { - printf("map_page_region: minv 0x%lx and bytes 0x%lx\n", + printf("region_find_slot: minv 0x%lx and bytes 0x%lx\n", minv, length); map_printmap(vmp); - return NULL; + return (vir_bytes) -1; } } @@ -257,6 +263,7 @@ int mapflags; FREEVRANGE(0, firstregion ? firstregion->vaddr : VM_DATATOP, ;); if(!foundflag) { + struct vir_region *vr; for(vr = vmp->vm_regions; vr && !foundflag; vr = vr->next) { FREEVRANGE(vr->vaddr + vr->length, vr->next ? vr->next->vaddr : VM_DATATOP, @@ -265,10 +272,10 @@ int mapflags; } if(!foundflag) { - printf("VM: map_page_region: no 0x%lx bytes found for %d between 0x%lx and 0x%lx\n", + printf("VM: region_find_slot: no 0x%lx bytes found for %d between 0x%lx and 0x%lx\n", length, vmp->vm_endpoint, minv, maxv); map_printmap(vmp); - return NULL; + return (vir_bytes) -1; } #if SANITYCHECKS @@ -280,6 +287,35 @@ int mapflags; vm_assert(startv < maxv); vm_assert(startv + length <= maxv); + if (prev) + *prev = prevregion; + return startv; +} + +/*===========================================================================* + * map_page_region * + *===========================================================================*/ +PUBLIC struct vir_region *map_page_region(vmp, minv, maxv, length, + what, flags, mapflags) +struct vmproc *vmp; +vir_bytes minv; +vir_bytes maxv; +vir_bytes length; +vir_bytes what; +u32_t flags; +int mapflags; +{ + struct vir_region *prevregion = NULL, *newregion; + vir_bytes startv; + struct phys_region *ph; + physr_avl *phavl; + + SANITYCHECK(SCL_FUNCTIONS); + + startv = region_find_slot(vmp, minv, maxv, length, &prevregion); + if (startv == (vir_bytes) -1) + return NULL; + /* Now we want a new region. */ if(!SLABALLOC(newregion)) { printf("VM: map_page_region: allocating region failed\n"); @@ -287,28 +323,37 @@ int mapflags; } /* Fill in node details. */ +USE(newregion, newregion->vaddr = startv; newregion->length = length; - newregion->first = NULL; newregion->flags = flags; newregion->tag = VRT_NONE; - newregion->parent = vmp; + newregion->parent = vmp;); + + SLABALLOC(phavl); + if(!phavl) { + printf("VM: map_page_region: allocating phys avl failed\n"); + SLABFREE(newregion); + return NULL; + } + USE(newregion, newregion->phys = phavl;); + + physr_init(newregion->phys); /* If we know what we're going to map to, map it right away. */ if(what != MAP_NONE) { + struct phys_region *pr; vm_assert(!(what % VM_PAGE_SIZE)); vm_assert(!(length % VM_PAGE_SIZE)); vm_assert(!(startv % VM_PAGE_SIZE)); - vm_assert(!newregion->first); vm_assert(!(mapflags & MF_PREALLOC)); - if(map_new_physblock(vmp, newregion, 0, length, what, NULL) != OK) { + if(!(pr=map_new_physblock(vmp, newregion, 0, length, what))) { printf("VM: map_new_physblock failed\n"); + SLABFREE(newregion->phys); SLABFREE(newregion); return NULL; } - vm_assert(newregion->first); - vm_assert(!newregion->first->next); - if(map_ph_writept(vmp, newregion, newregion->first->ph, NULL, NULL) != OK) { + if(map_ph_writept(vmp, newregion, pr) != OK) { printf("VM: map_region_writept failed\n"); SLABFREE(newregion); return NULL; @@ -317,7 +362,8 @@ int mapflags; if((flags & VR_ANON) && (mapflags & MF_PREALLOC)) { if(map_handle_memory(vmp, newregion, 0, length, 1) != OK) { - printf("VM:map_page_region: prealloc failed\n"); + printf("VM: map_page_region: prealloc failed\n"); + SLABFREE(newregion->phys); SLABFREE(newregion); return NULL; } @@ -326,10 +372,10 @@ int mapflags; /* Link it. */ if(prevregion) { vm_assert(prevregion->vaddr < newregion->vaddr); - newregion->next = prevregion->next; - prevregion->next = newregion; + USE(newregion, newregion->next = prevregion->next;); + USE(prevregion, prevregion->next = newregion;); } else { - newregion->next = vmp->vm_regions; + USE(newregion, newregion->next = vmp->vm_regions;); vmp->vm_regions = newregion; } @@ -353,28 +399,21 @@ void pb_unreferenced(struct vir_region *region, struct phys_region *pr) struct phys_block *pb; int remap = 0; - SLABSANE(pr); pb = pr->ph; - SLABSANE(pb); vm_assert(pb->refcount > 0); - pb->refcount--; + USE(pb, pb->refcount--;); vm_assert(pb->refcount >= 0); - SLABSANE(pb->firstregion); if(pb->firstregion == pr) { - pb->firstregion = pr->next_ph_list; - if(pb->firstregion) { - SLABSANE(pb->firstregion); - } + USE(pb, pb->firstregion = pr->next_ph_list;); } else { struct phys_region *others; for(others = pb->firstregion; others; others = others->next_ph_list) { - SLABSANE(others); vm_assert(others->ph == pb); if(others->next_ph_list == pr) { - others->next_ph_list = pr->next_ph_list; + USE(others, others->next_ph_list = pr->next_ph_list;); break; } } @@ -393,62 +432,95 @@ void pb_unreferenced(struct vir_region *region, struct phys_region *pr) vm_panic("strange phys flags", NO_NUM); } SLABFREE(pb); - } else { - SLABSANE(pb->firstregion); + } else if(WRITABLE(region, pb)) { /* If a writable piece of physical memory is now only * referenced once, map it writable right away instead of * waiting for a page fault. */ - if(pb->refcount == 1 && (region->flags & VR_WRITABLE)) { vm_assert(pb); vm_assert(pb->firstregion); vm_assert(!pb->firstregion->next_ph_list); vm_assert(pb->firstregion->ph == pb); vm_assert(pb->firstregion->ph == pb); - SLABSANE(pb); - SLABSANE(pb->firstregion); - SLABSANE(pb->firstregion->parent); if(map_ph_writept(pb->firstregion->parent->parent, - pb->firstregion->parent, pb, NULL, NULL) != OK) { + pb->firstregion->parent, pb->firstregion) != OK) { vm_panic("pb_unreferenced: writept", NO_NUM); } - } } } /*===========================================================================* - * map_free * + * map_subfree * *===========================================================================*/ -PRIVATE int map_free(struct vir_region *region) +PRIVATE int map_subfree(struct vmproc *vmp, + struct vir_region *region, vir_bytes len) { struct phys_region *pr, *nextpr; + physr_iter iter; #if SANITYCHECKS - for(pr = region->first; pr; pr = pr->next) { + { + physr_start_iter_least(region->phys, &iter); + while((pr = physr_get_iter(&iter))) { struct phys_region *others; struct phys_block *pb; - SLABSANE(pr); pb = pr->ph; - SLABSANE(pb); - SLABSANE(pb->firstregion); for(others = pb->firstregion; others; others = others->next_ph_list) { - SLABSANE(others); vm_assert(others->ph == pb); } + physr_incr_iter(&iter); + } } #endif - for(pr = region->first; pr; pr = nextpr) { - SANITYCHECK(SCL_DETAIL); - pb_unreferenced(region, pr); - nextpr = pr->next; - region->first = nextpr; /* For sanity checks. */ - SLABFREE(pr); + physr_start_iter_least(region->phys, &iter); + while((pr = physr_get_iter(&iter))) { + physr_incr_iter(&iter); + if(pr->offset >= len) + break; + if(pr->offset + pr->ph->length <= len) { + pb_unreferenced(region, pr); + physr_remove(region->phys, pr->offset); + physr_start_iter_least(region->phys, &iter); + SLABFREE(pr); + } else { + vir_bytes sublen; + vm_assert(len > pr->offset); + vm_assert(len < pr->offset + pr->ph->length); + vm_assert(pr->ph->refcount > 0); + sublen = len - pr->offset; + if(pr->ph->refcount > 1) { + int r; + r = map_copy_ph_block(vmp, region, pr); + if(r != OK) + return r; + } + vm_assert(pr->ph->refcount == 1); + FREE_MEM(ABS2CLICK(pr->ph->phys), ABS2CLICK(sublen)); + USE(pr, pr->offset += sublen;); + USE(pr->ph, + pr->ph->phys += sublen; + pr->ph->length -= sublen;); + } } + return OK; +} + +/*===========================================================================* + * map_free * + *===========================================================================*/ +PRIVATE int map_free(struct vmproc *vmp, struct vir_region *region) +{ + int r; + + if((r=map_subfree(vmp, region, region->length)) != OK) + return r; + + SLABFREE(region->phys); SLABFREE(region); return OK; @@ -470,7 +542,7 @@ struct vmproc *vmp; #if SANITYCHECKS nocheck++; #endif - map_free(r); + map_free(vmp, r); vmp->vm_regions = nextr; /* For sanity checks. */ #if SANITYCHECKS nocheck--; @@ -513,13 +585,12 @@ vir_bytes offset; /*===========================================================================* * map_new_physblock * *===========================================================================*/ -PRIVATE int map_new_physblock(vmp, region, offset, length, what_mem, physhint) +PRIVATE struct phys_region *map_new_physblock(vmp, region, offset, length, what_mem) struct vmproc *vmp; struct vir_region *region; vir_bytes offset; vir_bytes length; phys_bytes what_mem; -struct phys_region *physhint; { struct phys_region *newphysr; struct phys_block *newpb; @@ -529,14 +600,15 @@ struct phys_region *physhint; SANITYCHECK(SCL_FUNCTIONS); vm_assert(!(length % VM_PAGE_SIZE)); - if(!physhint) physhint = region->first; + + NOTRUNNABLE(vmp->vm_endpoint); /* Allocate things necessary for this chunk of memory. */ if(!SLABALLOC(newphysr)) - return ENOMEM; + return NULL; if(!SLABALLOC(newpb)) { SLABFREE(newphysr); - return ENOMEM; + return NULL; } /* Memory for new physical block. */ @@ -545,10 +617,15 @@ struct phys_region *physhint; u32_t af = PAF_CLEAR; if(region->flags & VR_PHYS64K) af |= PAF_ALIGN64K; + if(region->flags & VR_LOWER16MB) + af |= PAF_LOWER16MB; + if(region->flags & VR_LOWER1MB) + af |= PAF_LOWER1MB; if((mem_clicks = ALLOC_MEM(clicks, af)) == NO_MEM) { SLABFREE(newpb); SLABFREE(newphysr); - return ENOMEM; + printf("map_new_physblock: couldn't allocate\n"); + return NULL; } mem = CLICK2ABS(mem_clicks); } else { @@ -557,54 +634,37 @@ struct phys_region *physhint; SANITYCHECK(SCL_DETAIL); /* New physical block. */ + USE(newpb, newpb->phys = mem; newpb->refcount = 1; - newpb->offset = offset; newpb->length = length; - newpb->firstregion = newphysr; - SLABSANE(newpb->firstregion); + newpb->firstregion = newphysr;); /* New physical region. */ + USE(newphysr, + newphysr->offset = offset; newphysr->ph = newpb; newphysr->parent = region; - newphysr->next_ph_list = NULL; /* No other references to this block. */ + newphysr->next_ph_list = NULL; /* No other references to this block. */); /* Update pagetable. */ vm_assert(!(length % VM_PAGE_SIZE)); vm_assert(!(newpb->length % VM_PAGE_SIZE)); SANITYCHECK(SCL_DETAIL); - if(map_ph_writept(vmp, region, newpb, NULL, NULL) != OK) { + if(map_ph_writept(vmp, region, newphysr) != OK) { if(what_mem == MAP_NONE) FREE_MEM(mem_clicks, clicks); SLABFREE(newpb); SLABFREE(newphysr); - return ENOMEM; + printf("map_new_physblock: map_ph_writept failed\n"); + return NULL; } - if(!region->first || offset < region->first->ph->offset) { - /* Special case: offset is before start. */ - if(region->first) { - vm_assert(offset + length <= region->first->ph->offset); - } - newphysr->next = region->first; - region->first = newphysr; - } else { - struct phys_region *physr; - for(physr = physhint; physr; physr = physr->next) { - if(!physr->next || physr->next->ph->offset > offset) { - newphysr->next = physr->next; - physr->next = newphysr; - break; - } - } - - /* Loop must have put the node somewhere. */ - vm_assert(physr->next == newphysr); - } + physr_insert(region->phys, newphysr); SANITYCHECK(SCL_FUNCTIONS); - return OK; + return newphysr; } @@ -637,7 +697,11 @@ struct phys_region *ph; vm_assert(CLICK2ABS(clicks) == ph->ph->length); if(region->flags & VR_PHYS64K) af |= PAF_ALIGN64K; + + NOTRUNNABLE(vmp->vm_endpoint); + if((newmem_cl = ALLOC_MEM(clicks, af)) == NO_MEM) { + printf("VM: map_copy_ph_block: couldn't allocate new block\n"); SLABFREE(newpb); return ENOMEM; } @@ -645,15 +709,17 @@ struct phys_region *ph; vm_assert(ABS2CLICK(newmem) == newmem_cl); pb_unreferenced(region, ph); - SLABSANE(ph); - SLABSANE(ph->ph); vm_assert(ph->ph->refcount > 0); + +USE(newpb, newpb->length = ph->ph->length; - newpb->offset = ph->ph->offset; newpb->refcount = 1; newpb->phys = newmem; - newpb->firstregion = ph; - ph->next_ph_list = NULL; + newpb->firstregion = ph;); + + USE(ph, ph->next_ph_list = NULL;); + + NOTRUNNABLE(vmp->vm_endpoint); /* Copy old memory to new memory. */ if((r=sys_abscopy(ph->ph->phys, newpb->phys, newpb->length)) != OK) { @@ -667,7 +733,7 @@ struct phys_region *ph; #endif /* Reference new block. */ - ph->ph = newpb; + USE(ph, ph->ph = newpb;); /* Check reference counts. */ SANITYCHECK(SCL_DETAIL); @@ -675,7 +741,7 @@ struct phys_region *ph; /* Update pagetable with new address. * This will also make it writable. */ - r = map_ph_writept(vmp, region, ph->ph, NULL, NULL); + r = map_ph_writept(vmp, region, ph); if(r != OK) vm_panic("map_copy_ph_block: map_ph_writept failed", r); @@ -695,7 +761,7 @@ int write; { vir_bytes virpage; struct phys_region *ph; - int r; + int r = OK; vm_assert(offset >= 0); vm_assert(offset < region->length); @@ -707,30 +773,30 @@ int write; SANITYCHECK(SCL_FUNCTIONS); - for(ph = region->first; ph; ph = ph->next) - if(ph->ph->offset <= offset && offset < ph->ph->offset + ph->ph->length) - break; + NOTRUNNABLE(vmp->vm_endpoint); - if(ph) { + if((ph = physr_search(region->phys, offset, AVL_LESS_EQUAL)) && + (ph->offset <= offset && offset < ph->offset + ph->ph->length)) { /* Pagefault in existing block. Do copy-on-write. */ vm_assert(write); vm_assert(region->flags & VR_WRITABLE); vm_assert(ph->ph->refcount > 0); - if(ph->ph->refcount == 1) - r = map_ph_writept(vmp, region, ph->ph, NULL, NULL); - else + if(WRITABLE(region, ph->ph)) { + r = map_ph_writept(vmp, region, ph); + if(r != OK) + printf("map_ph_writept failed\n"); + } else { r = map_copy_ph_block(vmp, region, ph); + if(r != OK) + printf("map_copy_ph_block failed\n"); + } } else { /* Pagefault in non-existing block. Map in new block. */ -#if 0 - if(!write) { - printf("VM: read from uninitialized memory by %d\n", - vmp->vm_endpoint); + if(!map_new_physblock(vmp, region, virpage, VM_PAGE_SIZE, MAP_NONE)) { + printf("map_new_physblock failed\n"); + r = ENOMEM; } -#endif - r = map_new_physblock(vmp, region, virpage, VM_PAGE_SIZE, - MAP_NONE, region->first); } if(r != OK) @@ -750,21 +816,26 @@ struct vir_region *region; vir_bytes offset, length; int write; { - struct phys_region *physr; + struct phys_region *physr, *nextphysr; int changes = 0; + physr_iter iter; + + NOTRUNNABLE(vmp->vm_endpoint); #define FREE_RANGE_HERE(er1, er2) { \ struct phys_region *r1 = (er1), *r2 = (er2); \ vir_bytes start = offset, end = offset + length; \ - if(r1) { start = MAX(start, r1->ph->offset + r1->ph->length); } \ - if(r2) { end = MIN(end, r2->ph->offset); } \ + if(r1) { \ + start = MAX(start, r1->offset + r1->ph->length); } \ + if(r2) { \ + end = MIN(end, r2->offset); } \ if(start < end) { \ int r; \ SANITYCHECK(SCL_DETAIL); \ - if((r=map_new_physblock(vmp, region, start, \ - end-start, MAP_NONE, r1 ? r1 : r2)) != OK) { \ + if(!map_new_physblock(vmp, region, start, \ + end-start, MAP_NONE) != OK) { \ SANITYCHECK(SCL_DETAIL); \ - return r; \ + return ENOMEM; \ } \ changes++; \ } } @@ -777,16 +848,29 @@ int write; vm_assert(!(length % VM_PAGE_SIZE)); vm_assert(!write || (region->flags & VR_WRITABLE)); - FREE_RANGE_HERE(NULL, region->first); + physr_start_iter(region->phys, &iter, offset, AVL_LESS_EQUAL); + physr = physr_get_iter(&iter); + + if(!physr || offset < physr->offset) { + physr_iter previter; + struct phys_region *prevphysr; + previter = iter; + physr_decr_iter(&iter); + prevphysr = physr_get_iter(&iter); + + FREE_RANGE_HERE(prevphysr, physr); + + iter = previter; + } - for(physr = region->first; physr; physr = physr->next) { + while(physr) { int r; SANITYCHECK(SCL_DETAIL); if(write) { vm_assert(physr->ph->refcount > 0); - if(physr->ph->refcount > 1) { + if(!WRITABLE(region, physr->ph)) { SANITYCHECK(SCL_DETAIL); r = map_copy_ph_block(vmp, region, physr); if(r != OK) { @@ -797,7 +881,7 @@ int write; SANITYCHECK(SCL_DETAIL); } else { SANITYCHECK(SCL_DETAIL); - if((r=map_ph_writept(vmp, region, physr->ph, NULL, NULL)) != OK) { + if((r=map_ph_writept(vmp, region, physr)) != OK) { printf("VM: map_ph_writept failed\n"); return r; } @@ -807,17 +891,20 @@ int write; } SANITYCHECK(SCL_DETAIL); - FREE_RANGE_HERE(physr, physr->next); + physr_incr_iter(&iter); + nextphysr = physr_get_iter(&iter); + FREE_RANGE_HERE(physr, nextphysr); SANITYCHECK(SCL_DETAIL); + if(nextphysr) { + if(nextphysr->offset >= offset + length) + break; + } + physr = nextphysr; } SANITYCHECK(SCL_FUNCTIONS); -#if SANITYCHECKS - if(changes == 0) { - vm_panic("no changes?!", changes); - } -#endif + vm_assert(changes > 0); return OK; } @@ -827,8 +914,12 @@ static int countregions(struct vir_region *vr) { int n = 0; struct phys_region *ph; - for(ph = vr->first; ph; ph = ph->next) + physr_iter iter; + physr_start_iter_least(vr->phys, &iter); + while((ph = physr_get_iter(&iter))) { n++; + physr_incr_iter(&iter); + } return n; } #endif @@ -836,7 +927,7 @@ static int countregions(struct vir_region *vr) /*===========================================================================* * map_copy_region * *===========================================================================*/ -PRIVATE struct vir_region *map_copy_region(struct vir_region *vr) +PRIVATE struct vir_region *map_copy_region(struct vmproc *vmp, struct vir_region *vr) { /* map_copy_region creates a complete copy of the vir_region * data structure, linking in the same phys_blocks directly, @@ -847,34 +938,46 @@ PRIVATE struct vir_region *map_copy_region(struct vir_region *vr) * within this function. */ struct vir_region *newvr; - struct phys_region *ph, *prevph = NULL; + struct phys_region *ph; + physr_iter iter; + physr_avl *phavl; #if SANITYCHECKS int cr; cr = countregions(vr); #endif + if(!SLABALLOC(newvr)) return NULL; - *newvr = *vr; - newvr->first = NULL; - newvr->next = NULL; + SLABALLOC(phavl); + if(!phavl) { + SLABFREE(newvr); + return NULL; + } + USE(newvr, + *newvr = *vr; + newvr->next = NULL; + newvr->phys = phavl; + ); + physr_init(newvr->phys); SANITYCHECK(SCL_FUNCTIONS); - for(ph = vr->first; ph; ph = ph->next) { + physr_start_iter_least(vr->phys, &iter); + while((ph = physr_get_iter(&iter))) { struct phys_region *newph; if(!SLABALLOC(newph)) { - map_free(newvr); + map_free(vmp, newvr); return NULL; } - newph->next = NULL; + USE(newph, newph->ph = ph->ph; newph->next_ph_list = NULL; newph->parent = newvr; - if(prevph) prevph->next = newph; - else newvr->first = newph; - prevph = newph; + newph->offset = ph->offset;); + physr_insert(newvr->phys, newph); SANITYCHECK(SCL_DETAIL); vm_assert(countregions(vr) == cr); + physr_incr_iter(&iter); } vm_assert(countregions(vr) == countregions(newvr)); @@ -891,12 +994,19 @@ PUBLIC int map_writept(struct vmproc *vmp) { struct vir_region *vr; struct phys_region *ph; - int ropages = 0, rwpages = 0; + int r; - for(vr = vmp->vm_regions; vr; vr = vr->next) - for(ph = vr->first; ph; ph = ph->next) { - map_ph_writept(vmp, vr, ph->ph, &ropages, &rwpages); + for(vr = vmp->vm_regions; vr; vr = vr->next) { + physr_iter iter; + physr_start_iter_least(vr->phys, &iter); + while((ph = physr_get_iter(&iter))) { + if((r=map_ph_writept(vmp, vr, ph)) != OK) { + printf("VM: map_writept: failed\n"); + return r; + } + physr_incr_iter(&iter); } + } return OK; } @@ -912,54 +1022,64 @@ struct vmproc *src; dst->vm_regions = NULL; SANITYCHECK(SCL_FUNCTIONS); + + PT_SANE(&src->vm_pt); + for(vr = src->vm_regions; vr; vr = vr->next) { + physr_iter iter_orig, iter_new; struct vir_region *newvr; struct phys_region *orig_ph, *new_ph; SANITYCHECK(SCL_DETAIL); - if(!(newvr = map_copy_region(vr))) { + if(!(newvr = map_copy_region(dst, vr))) { map_free_proc(dst); SANITYCHECK(SCL_FUNCTIONS); return ENOMEM; } SANITYCHECK(SCL_DETAIL); - newvr->parent = dst; - if(prevvr) { prevvr->next = newvr; } + USE(newvr, newvr->parent = dst;); + if(prevvr) { USE(prevvr, prevvr->next = newvr;); } else { dst->vm_regions = newvr; } - new_ph = newvr->first; - for(orig_ph = vr->first; orig_ph; orig_ph = orig_ph->next) { + physr_start_iter_least(vr->phys, &iter_orig); + physr_start_iter_least(newvr->phys, &iter_new); + while((orig_ph = physr_get_iter(&iter_orig))) { struct phys_block *pb; + new_ph = physr_get_iter(&iter_new); /* Check two physregions both are nonnull, * are different, and match physblocks. */ - vm_assert(orig_ph && new_ph); + vm_assert(new_ph); + vm_assert(orig_ph); vm_assert(orig_ph != new_ph); pb = orig_ph->ph; vm_assert(pb == new_ph->ph); /* Link in new physregion. */ vm_assert(!new_ph->next_ph_list); - new_ph->next_ph_list = pb->firstregion; - pb->firstregion = new_ph; - SLABSANE(new_ph); - SLABSANE(new_ph->next_ph_list); + USE(new_ph, new_ph->next_ph_list = pb->firstregion;); + USE(pb, pb->firstregion = new_ph;); /* Increase phys block refcount */ vm_assert(pb->refcount > 0); - pb->refcount++; + USE(pb, pb->refcount++;); vm_assert(pb->refcount > 1); /* Get next new physregion */ - new_ph = new_ph->next; + physr_incr_iter(&iter_orig); + physr_incr_iter(&iter_new); } - vm_assert(!new_ph); + vm_assert(!physr_get_iter(&iter_new)); SANITYCHECK(SCL_DETAIL); prevvr = newvr; SANITYCHECK(SCL_DETAIL); } SANITYCHECK(SCL_DETAIL); + PT_SANE(&src->vm_pt); + map_writept(src); + PT_SANE(&src->vm_pt); map_writept(dst); + PT_SANE(&dst->vm_pt); SANITYCHECK(SCL_FUNCTIONS); return OK; @@ -1015,7 +1135,7 @@ PUBLIC int map_region_extend(struct vmproc *vmp, struct vir_region *vr, } if(!vr->next || end + delta <= vr->next->vaddr) { - vr->length += delta; + USE(vr, vr->length += delta;); return OK; } @@ -1055,7 +1175,7 @@ u32_t tag; PUBLIC void map_region_set_tag(struct vir_region *vr, u32_t tag) { - vr->tag = tag; + USE(vr, vr->tag = tag;); } PUBLIC u32_t map_region_get_tag(struct vir_region *vr) @@ -1066,9 +1186,14 @@ PUBLIC u32_t map_region_get_tag(struct vir_region *vr) /*========================================================================* * map_unmap_region * *========================================================================*/ -PUBLIC int map_unmap_region(struct vmproc *vmp, struct vir_region *region) +PUBLIC int map_unmap_region(struct vmproc *vmp, struct vir_region *region, + vir_bytes len) { +/* Shrink the region by 'len' bytes, from the start. Unreference + * memory it used to reference if any. + */ struct vir_region *r, *nextr, *prev = NULL; + vir_bytes regionstart; SANITYCHECK(SCL_FUNCTIONS); @@ -1084,16 +1209,53 @@ PUBLIC int map_unmap_region(struct vmproc *vmp, struct vir_region *region) if(r == NULL) vm_panic("map_unmap_region: region not found\n", NO_NUM); - if(!prev) - vmp->vm_regions = r->next; - else - prev->next = r->next; - map_free(r); + if(len > r->length || (len % VM_PAGE_SIZE)) { + printf("VM: bogus length 0x%lx\n", len); + return EINVAL; + } + + if(!(r->flags & VR_ANON)) { + printf("VM: only unmap anonymous memory\n"); + return EINVAL; + } + + regionstart = r->vaddr; + + if(len == r->length) { + /* Whole region disappears. Unlink and free it. */ + if(!prev) { + vmp->vm_regions = r->next; + } else { + USE(prev, prev->next = r->next;); + } + map_free(vmp, r); + } else { + struct phys_region *pr; + physr_iter iter; + /* Region shrinks. First unreference its memory + * and then shrink the region. + */ + map_subfree(vmp, r, len); + USE(r, + r->vaddr += len; + r->length -= len;); + physr_start_iter_least(r->phys, &iter); + + /* vaddr has increased; to make all the phys_regions + * point to the same addresses, make them shrink by the + * same amount. + */ + while((pr = physr_get_iter(&iter))) { + vm_assert(pr->offset >= len); + USE(pr, pr->offset -= len;); + physr_incr_iter(&iter); + } + } SANITYCHECK(SCL_DETAIL); - if(pt_writemap(&vmp->vm_pt, r->vaddr, - MAP_NONE, r->length, 0, WMF_OVERWRITE) != OK) { + if(pt_writemap(&vmp->vm_pt, regionstart, + MAP_NONE, len, 0, WMF_OVERWRITE) != OK) { printf("VM: map_unmap_region: pt_writemap failed\n"); return ENOMEM; } @@ -1102,3 +1264,159 @@ PUBLIC int map_unmap_region(struct vmproc *vmp, struct vir_region *region) return OK; } + +/*========================================================================* + * map_remap * + *========================================================================*/ +PUBLIC int map_remap(struct vmproc *dvmp, vir_bytes da, size_t size, + struct vir_region *region, vir_bytes *r) +{ + struct vir_region *vr, *prev; + struct phys_region *ph; + vir_bytes startv, dst_addr; + physr_iter iter; + + SANITYCHECK(SCL_FUNCTIONS); + + /* da is handled differently */ + if (!da) + dst_addr = dvmp->vm_stacktop; + else + dst_addr = da; + dst_addr = arch_vir2map(dvmp, dst_addr); + + prev = NULL; + /* round up to page size */ + if (size % I386_PAGE_SIZE) + size += I386_PAGE_SIZE - size % I386_PAGE_SIZE; + startv = region_find_slot(dvmp, dst_addr, VM_DATATOP, size, &prev); + if (startv == (vir_bytes) -1) { + printf("map_remap: search %x...\n", dst_addr); + map_printmap(dvmp); + return ENOMEM; + } + /* when the user specifies the address, we cannot change it */ + if (da && (startv != dst_addr)) + return EINVAL; + + vr = map_copy_region(dvmp, region); + if(!vr) + return ENOMEM; + + USE(vr, + vr->vaddr = startv; + vr->length = size; + vr->flags = region->flags; + vr->tag = VRT_NONE; + vr->parent = dvmp;); + vm_assert(vr->flags & VR_SHARED); + + if (prev) { + USE(vr, + vr->next = prev->next;); + USE(prev, prev->next = vr;); + } else { + USE(vr, + vr->next = dvmp->vm_regions;); + dvmp->vm_regions = vr; + } + + physr_start_iter_least(vr->phys, &iter); + while((ph = physr_get_iter(&iter))) { + struct phys_block *pb = ph->ph; + USE(pb, pb->refcount++;); + if(map_ph_writept(dvmp, vr, ph) != OK) { + vm_panic("map_remap: map_ph_writept failed", NO_NUM); + } + + physr_incr_iter(&iter); + } + + *r = startv; + + SANITYCHECK(SCL_FUNCTIONS); + + return OK; +} + +/*========================================================================* + * map_get_phys * + *========================================================================*/ +PUBLIC int map_get_phys(struct vmproc *vmp, vir_bytes addr, phys_bytes *r) +{ + struct vir_region *vr; + struct phys_region *ph; + physr_iter iter; + + if (!(vr = map_lookup(vmp, addr)) || + (vr->vaddr != addr)) + return EINVAL; + + if (!(vr->flags & VR_SHARED)) + return EINVAL; + + physr_start_iter_least(vr->phys, &iter); + ph = physr_get_iter(&iter); + + vm_assert(ph); + vm_assert(ph->ph); + if (r) + *r = ph->ph->phys; + + return OK; +} + +/*========================================================================* + * map_get_ref * + *========================================================================*/ +PUBLIC int map_get_ref(struct vmproc *vmp, vir_bytes addr, u8_t *cnt) +{ + struct vir_region *vr; + struct phys_region *ph; + physr_iter iter; + + if (!(vr = map_lookup(vmp, addr)) || + (vr->vaddr != addr)) + return EINVAL; + + if (!(vr->flags & VR_SHARED)) + return EINVAL; + + physr_start_iter_least(vr->phys, &iter); + ph = physr_get_iter(&iter); + + vm_assert(ph); + vm_assert(ph->ph); + if (cnt) + *cnt = ph->ph->refcount; + + return OK; +} + + +/*========================================================================* + * regionprintstats * + *========================================================================*/ +PUBLIC void printregionstats(struct vmproc *vmp) +{ + struct vir_region *vr; + struct phys_region *pr; + physr_iter iter; + vir_bytes used = 0, weighted = 0; + + for(vr = vmp->vm_regions; vr; vr = vr->next) { + if(vr->flags & VR_DIRECT) + continue; + physr_start_iter_least(vr->phys, &iter); + while((pr = physr_get_iter(&iter))) { + physr_incr_iter(&iter); + used += pr->ph->length; + weighted += pr->ph->length / pr->ph->refcount; + } + } + + printf("%6dkB %6dkB\n", used/1024, weighted/1024); + + return; +} + diff --git a/servers/vm/region.h b/servers/vm/region.h index 9e4cf9f02..9eaf1d41a 100644 --- a/servers/vm/region.h +++ b/servers/vm/region.h @@ -2,11 +2,24 @@ #ifndef _REGION_H #define _REGION_H 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + struct phys_block { #if SANITYCHECKS u32_t seencount; #endif - vir_bytes offset; /* offset from start of vir region */ vir_bytes length; /* no. of contiguous bytes */ phys_bytes phys; /* physical memory */ u8_t refcount; /* Refcount of these pages */ @@ -15,33 +28,42 @@ struct phys_block { struct phys_region *firstregion; }; -struct phys_region { - struct phys_region *next; /* next contiguous block */ +typedef struct phys_region { struct phys_block *ph; - struct vir_region *parent; /* Region that owns this phys_region. */ + struct vir_region *parent; /* parent vir_region. */ + vir_bytes offset; /* offset from start of vir region */ /* list of phys_regions that reference the same phys_block */ struct phys_region *next_ph_list; -}; + + /* AVL fields */ + struct phys_region *less, *greater; + int factor; +} phys_region_t; + +#include "physravl.h" struct vir_region { struct vir_region *next; /* next virtual region in this process */ vir_bytes vaddr; /* virtual address, offset from pagetable */ vir_bytes length; /* length in bytes */ - struct phys_region *first; /* phys regions in vir region */ + physr_avl *phys; /* avl tree of physical memory blocks */ u16_t flags; u32_t tag; /* Opaque to mapping code. */ struct vmproc *parent; /* Process that owns this vir_region. */ }; /* Mapping flags: */ -#define VR_WRITABLE 0x01 /* Process may write here. */ -#define VR_NOPF 0x02 /* May not generate page faults. */ -#define VR_PHYS64K 0x04 /* Physical memory must be 64k aligned. */ +#define VR_WRITABLE 0x001 /* Process may write here. */ +#define VR_NOPF 0x002 /* May not generate page faults. */ +#define VR_PHYS64K 0x004 /* Physical memory must be 64k aligned. */ +#define VR_LOWER16MB 0x008 +#define VR_LOWER1MB 0x010 /* Mapping type: */ -#define VR_ANON 0x10 /* Memory to be cleared and allocated */ -#define VR_DIRECT 0x20 /* Mapped, but not managed by VM */ +#define VR_ANON 0x100 /* Memory to be cleared and allocated */ +#define VR_DIRECT 0x200 /* Mapped, but not managed by VM */ +#define VR_SHARED 0x40 /* Tag values: */ #define VRT_NONE 0xBEEF0000 diff --git a/servers/vm/rs.c b/servers/vm/rs.c new file mode 100644 index 000000000..4473c67bb --- /dev/null +++ b/servers/vm/rs.c @@ -0,0 +1,56 @@ + +#define _SYSTEM 1 + +#define VERBOSE 0 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "glo.h" +#include "proto.h" +#include "util.h" + +/*===========================================================================* + * do_rs_set_priv * + *===========================================================================*/ +PUBLIC int do_rs_set_priv(message *m) +{ + int r, n, nr; + struct vmproc *vmp; + + nr = m->VM_RS_NR; + + if ((r = vm_isokendpt(nr, &n)) != OK) { + printf("do_rs_set_priv: message from strange source %d\n", nr); + return EINVAL; + } + + vmp = &vmproc[n]; + + if (m->VM_RS_BUF) { + r = sys_datacopy(m->m_source, (vir_bytes) m->VM_RS_BUF, + SELF, (vir_bytes) vmp->vm_call_priv_mask, + sizeof(vmp->vm_call_priv_mask)); + if (r != OK) + return r; + } + return OK; +} + diff --git a/servers/vm/sanitycheck.h b/servers/vm/sanitycheck.h index dac7b83e7..a0a67729e 100644 --- a/servers/vm/sanitycheck.h +++ b/servers/vm/sanitycheck.h @@ -13,34 +13,59 @@ printf("VM:%s:%d: %s failed\n", file, line, #c); \ vm_panic("sanity check failed", NO_NUM); } } while(0) +#define SLABSANITYCHECK(l) if((l) <= vm_sanitychecklevel) { \ + slab_sanitycheck(__FILE__, __LINE__); } + #define SANITYCHECK(l) if(!nocheck && ((l) <= vm_sanitychecklevel)) { \ - int failflag = 0; \ - u32_t *origptr = CHECKADDR;\ - int _sanep; \ struct vmproc *vmp; \ - \ - for(_sanep = 0; _sanep < sizeof(data1) / sizeof(*origptr); \ - _sanep++) { \ - if(origptr[_sanep] != data1[_sanep]) { \ - printf("%d: %08lx != %08lx ", \ - _sanep, origptr[_sanep], data1[_sanep]); failflag = 1; \ - } \ - } \ - if(failflag) { \ - printf("%s:%d: memory corruption test failed\n", \ - __FILE__, __LINE__); \ - vm_panic("memory corruption", NO_NUM); \ - } \ - for(vmp = vmproc; vmp <= &vmproc[_NR_PROCS]; vmp++) { \ + vm_assert(incheck == 0); \ + incheck = 1; \ + usedpages_reset(); \ + slab_sanitycheck(__FILE__, __LINE__); \ + for(vmp = vmproc; vmp < &vmproc[VMP_NR]; vmp++) { \ if((vmp->vm_flags & (VMF_INUSE | VMF_HASPT)) == \ (VMF_INUSE | VMF_HASPT)) { \ - pt_sanitycheck(&vmp->vm_pt, __FILE__, __LINE__); \ + PT_SANE(&vmp->vm_pt); \ } \ } \ map_sanitycheck(__FILE__, __LINE__); \ + vm_assert(incheck == 1); \ + incheck = 0; \ } + +#include "../../kernel/proc.h" + +#define USE(obj, code) do { \ + slabunlock(obj, sizeof(*obj)); \ + do { \ + code \ + } while(0); \ + slablock(obj, sizeof(*obj)); \ +} while(0) + +#define SLABSANE(ptr) { \ + if(!slabsane_f(__FILE__, __LINE__, ptr, sizeof(*(ptr)))) { \ + printf("VM:%s:%d: SLABSANE(%s)\n", __FILE__, __LINE__, #ptr); \ + vm_panic("SLABSANE failed", NO_NUM); \ + } \ +} + +#define NOTRUNNABLE(ep) { \ + struct proc pr; \ + if(sys_getproc(&pr, ep) != OK) { \ + vm_panic("VM: sys_getproc failed", ep); \ + } \ + if(!pr.p_rts_flags) { \ + vm_panic("VM: runnable", ep); \ + } \ +} + #else #define SANITYCHECK +#define SLABSANITYCHECK(l) +#define USE(obj, code) do { code } while(0) +#define SLABSANE(ptr) +#define NOTRUNNABLE(ep) #endif #endif diff --git a/servers/vm/signal.c b/servers/vm/signal.c index a43e76962..bc7555bb5 100644 --- a/servers/vm/signal.c +++ b/servers/vm/signal.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include diff --git a/servers/vm/slaballoc.c b/servers/vm/slaballoc.c index 2789303a8..2cc8c2e65 100644 --- a/servers/vm/slaballoc.c +++ b/servers/vm/slaballoc.c @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include #include @@ -27,7 +29,7 @@ #define SLABSIZES 60 -#define ITEMSPERPAGE(s, bytes) (DATABYTES / (bytes)) +#define ITEMSPERPAGE(bytes) (DATABYTES / (bytes)) #define ELBITS (sizeof(element_t)*8) #define BITPAT(b) (1UL << ((b) % ELBITS)) @@ -37,9 +39,37 @@ #define OFF(f, b) vm_assert(!GETBIT(f, b)) #define ON(f, b) vm_assert(GETBIT(f, b)) +#if SANITYCHECKS +#define SLABDATAWRITABLE(data, wr) do { \ + vm_assert(data->sdh.writable == WRITABLE_NONE); \ + vm_assert(wr != WRITABLE_NONE); \ + vm_pagelock(data, 0); \ + data->sdh.writable = wr; \ +} while(0) + +#define SLABDATAUNWRITABLE(data) do { \ + vm_assert(data->sdh.writable != WRITABLE_NONE); \ + data->sdh.writable = WRITABLE_NONE; \ + vm_pagelock(data, 1); \ +} while(0) + +#define SLABDATAUSE(data, code) do { \ + SLABDATAWRITABLE(data, WRITABLE_HEADER); \ + code \ + SLABDATAUNWRITABLE(data); \ +} while(0) + +#else + +#define SLABDATAWRITABLE(data, wr) +#define SLABDATAUNWRITABLE(data) +#define SLABDATAUSE(data, code) do { code } while(0) + +#endif + #define GETBIT(f, b) (BITEL(f,b) & BITPAT(b)) -#define SETBIT(f, b) {OFF(f,b); (BITEL(f,b)|= BITPAT(b)); (f)->sdh.nused++; } -#define CLEARBIT(f, b) {ON(f, b); (BITEL(f,b)&=~BITPAT(b)); (f)->sdh.nused--; (f)->sdh.freeguess = (b); } +#define SETBIT(f, b) {OFF(f,b); SLABDATAUSE(f, BITEL(f,b)|= BITPAT(b); (f)->sdh.nused++;); } +#define CLEARBIT(f, b) {ON(f, b); SLABDATAUSE(f, BITEL(f,b)&=~BITPAT(b); (f)->sdh.nused--; (f)->sdh.freeguess = (b);); } #define MINSIZE 8 #define MAXSIZE (SLABSIZES-1+MINSIZE) @@ -56,28 +86,32 @@ typedef element_t elements_t[USEELEMENTS]; * inconsistent state during a slaballoc() / slabfree(). So only do * our own sanity checks here, with SLABSANITYCHECK. */ -#if SANITYCHECKS -#define SLABSANITYCHECK(l) if((l) <= vm_sanitychecklevel) { \ - slab_sanitycheck(__FILE__, __LINE__); } -#else -#define SLABSANITYCHECK(l) -#endif + + +/* Special writable values. */ +#define WRITABLE_NONE -2 +#define WRITABLE_HEADER -1 struct sdh { - u8_t list; - u16_t nused; /* Number of data items used in this slab. */ #if SANITYCHECKS - u32_t magic; + u32_t magic1; #endif + u8_t list; + u16_t nused; /* Number of data items used in this slab. */ int freeguess; struct slabdata *next, *prev; elements_t usebits; phys_bytes phys; +#if SANITYCHECKS + int writable; /* data item number or WRITABLE_* */ + u32_t magic2; +#endif }; #define DATABYTES (VM_PAGE_SIZE-sizeof(struct sdh)) -#define MAGIC 0x1f5b842f +#define MAGIC1 0x1f5b842f +#define MAGIC2 0x8bb5a420 #define JUNK 0xdeadbeef #define NOJUNK 0xc0ffee @@ -107,6 +141,7 @@ FORWARD _PROTOTYPE( int objstats, (void *, int, struct slabheader **, struct sla #define LH(sl, l) (sl)->list_head[l] +/* move head of list l1 to list of l2 in slabheader sl. */ #define MOVEHEAD(sl, l1, l2) { \ struct slabdata *t; \ vm_assert(LH(sl,l1)); \ @@ -114,28 +149,35 @@ FORWARD _PROTOTYPE( int objstats, (void *, int, struct slabheader **, struct sla ADDHEAD(t, sl, l2); \ } +/* remove head of list 'list' in sl, assign it unlinked to 'to'. */ #define REMOVEHEAD(sl, list, to) { \ - (to) = LH(sl, list); \ - vm_assert(to); \ - LH(sl, list) = (to)->sdh.next; \ - if(LH(sl, list)) LH(sl, list) = NULL; \ - vm_assert((to)->sdh.magic == MAGIC);\ - vm_assert(!(to)->sdh.prev); \ + struct slabdata *dat; \ + dat = (to) = LH(sl, list); \ + vm_assert(dat); \ + LH(sl, list) = dat->sdh.next; \ + UNLINKNODE(dat); \ } +/* move slabdata nw to slabheader sl under list number l. */ #define ADDHEAD(nw, sl, l) { \ - vm_assert((nw)->sdh.magic == MAGIC); \ - (nw)->sdh.next = LH(sl, l); \ - (nw)->sdh.prev = NULL; \ - (nw)->sdh.list = l; \ + SLABDATAUSE(nw, \ + (nw)->sdh.next = LH(sl, l); \ + (nw)->sdh.prev = NULL; \ + (nw)->sdh.list = l;); \ LH(sl, l) = (nw); \ - if((nw)->sdh.next) (nw)->sdh.next->sdh.prev = (nw); \ + if((nw)->sdh.next) { \ + SLABDATAUSE((nw)->sdh.next, \ + (nw)->sdh.next->sdh.prev = (nw);); \ + } \ } -#define UNLINKNODE(n) { \ - if((f)->sdh.prev) (f)->sdh.prev->sdh.next = (f)->sdh.next; \ - if((f)->sdh.next) (f)->sdh.next->sdh.prev = (f)->sdh.prev; \ - } +#define UNLINKNODE(node) { \ + struct slabdata *next, *prev; \ + prev = (node)->sdh.prev; \ + next = (node)->sdh.next; \ + if(prev) { SLABDATAUSE(prev, prev->sdh.next = next;); } \ + if(next) { SLABDATAUSE(next, next->sdh.prev = prev;); } \ +} struct slabdata *newslabdata(int list) { @@ -151,12 +193,18 @@ struct slabdata *newslabdata(int list) n->sdh.phys = p; #if SANITYCHECKS - n->sdh.magic = MAGIC; + n->sdh.magic1 = MAGIC1; + n->sdh.magic2 = MAGIC2; #endif n->sdh.nused = 0; n->sdh.freeguess = 0; n->sdh.list = list; +#if SANITYCHECKS + n->sdh.writable = WRITABLE_HEADER; + SLABDATAUNWRITABLE(n); +#endif + return n; } @@ -173,15 +221,17 @@ PRIVATE int checklist(char *file, int line, while(n) { int count = 0, i; + MYASSERT(n->sdh.magic1 == MAGIC1); + MYASSERT(n->sdh.magic2 == MAGIC2); MYASSERT(n->sdh.list == l); - MYASSERT(n->sdh.magic == MAGIC); + MYASSERT(usedpages_add(n->sdh.phys, VM_PAGE_SIZE) == OK); if(n->sdh.prev) MYASSERT(n->sdh.prev->sdh.next == n); else MYASSERT(s->list_head[l] == n); if(n->sdh.next) MYASSERT(n->sdh.next->sdh.prev == n); for(i = 0; i < USEELEMENTS*8; i++) - if(i >= ITEMSPERPAGE(s, bytes)) + if(i >= ITEMSPERPAGE(bytes)) MYASSERT(!GETBIT(n, i)); else if(GETBIT(n,i)) @@ -211,21 +261,25 @@ PUBLIC void slab_sanitycheck(char *file, int line) /*===========================================================================* * int slabsane * *===========================================================================*/ -PUBLIC int slabsane(void *mem, int bytes) +PUBLIC int slabsane_f(char *file, int line, void *mem, int bytes) { struct slabheader *s; struct slabdata *f; int i; + return (objstats(mem, bytes, &s, &f, &i) == OK); } #endif +static int nojunkwarning = 0; + /*===========================================================================* * void *slaballoc * *===========================================================================*/ PUBLIC void *slaballoc(int bytes) { - int i, n = 0; + int i; + int count = 0; struct slabheader *s; struct slabdata *firstused; @@ -242,10 +296,10 @@ PUBLIC void *slaballoc(int bytes) /* Make sure there is something on the freelist. */ SLABSANITYCHECK(SCL_DETAIL); if(!LH(s, LIST_FREE)) { - struct slabdata *n = newslabdata(LIST_FREE); + struct slabdata *nd = newslabdata(LIST_FREE); SLABSANITYCHECK(SCL_DETAIL); - if(!n) return NULL; - ADDHEAD(n, s, LIST_FREE); + if(!nd) return NULL; + ADDHEAD(nd, s, LIST_FREE); SLABSANITYCHECK(SCL_DETAIL); } @@ -260,18 +314,21 @@ PUBLIC void *slaballoc(int bytes) vm_assert(s); firstused = LH(s, LIST_USED); vm_assert(firstused); - vm_assert(firstused->sdh.magic == MAGIC); + vm_assert(firstused->sdh.magic1 == MAGIC1); + vm_assert(firstused->sdh.magic2 == MAGIC2); + vm_assert(firstused->sdh.nused < ITEMSPERPAGE(bytes)); - for(i = firstused->sdh.freeguess; n < ITEMSPERPAGE(s, bytes); n++, i++) { + for(i = firstused->sdh.freeguess; + count < ITEMSPERPAGE(bytes); count++, i++) { SLABSANITYCHECK(SCL_DETAIL); - i = i % ITEMSPERPAGE(s, bytes); + i = i % ITEMSPERPAGE(bytes); if(!GETBIT(firstused, i)) { struct slabdata *f; char *ret; SETBIT(firstused, i); SLABSANITYCHECK(SCL_DETAIL); - if(firstused->sdh.nused == ITEMSPERPAGE(s, bytes)) { + if(firstused->sdh.nused == ITEMSPERPAGE(bytes)) { SLABSANITYCHECK(SCL_DETAIL); MOVEHEAD(s, LIST_USED, LIST_FULL); SLABSANITYCHECK(SCL_DETAIL); @@ -280,20 +337,21 @@ PUBLIC void *slaballoc(int bytes) ret = ((char *) firstused->data) + i*bytes; #if SANITYCHECKS - f = (struct slabdata *) ((char *) ret - (vir_bytes) ret % VM_PAGE_SIZE); - if(f->sdh.magic != MAGIC) { - printf("slaballoc bogus pointer 0x%lx, " - "rounded 0x%lx, bad magic 0x%lx\n", - ret, f, f->sdh.magic); - vm_panic("slaballoc check failed", NO_NUM); - } + nojunkwarning++; + slabunlock(ret, bytes); + nojunkwarning--; + vm_assert(!nojunkwarning); *(u32_t *) ret = NOJUNK; + slablock(ret, bytes); #endif SLABSANITYCHECK(SCL_FUNCTIONS); - firstused->sdh.freeguess = i+1; + SLABDATAUSE(firstused, firstused->sdh.freeguess = i+1;); #if SANITYCHECKS - if(!slabsane(ret, bytes)) + if(bytes >= SLABSIZES+MINSIZE) { + printf("slaballoc: odd, bytes %d?\n", bytes); + } + if(!slabsane_f(__FILE__, __LINE__, ret, bytes)) vm_panic("slaballoc: slabsane failed", NO_NUM); #endif @@ -317,12 +375,16 @@ PUBLIC void *slaballoc(int bytes) PRIVATE int objstats(void *mem, int bytes, struct slabheader **sp, struct slabdata **fp, int *ip) { +#if SANITYCHECKS #define OBJSTATSCHECK(cond) \ if(!(cond)) { \ - printf("VM:objstats: %s failed for ptr 0x%p, %d bytes\n", \ + printf("VM: objstats: %s failed for ptr 0x%p, %d bytes\n", \ #cond, mem, bytes); \ return EINVAL; \ } +#else +#define OBJSTATSCHECK(cond) +#endif struct slabheader *s; struct slabdata *f; @@ -331,21 +393,19 @@ PRIVATE int objstats(void *mem, int bytes, OBJSTATSCHECK((char *) mem >= (char *) VM_PAGE_SIZE); #if SANITYCHECKS - if(*(u32_t *) mem == JUNK) { + if(*(u32_t *) mem == JUNK && !nojunkwarning) { util_stacktrace(); printf("VM: WARNING: JUNK seen in slab object\n"); } #endif - /* Retrieve entry in slabs[]. */ GETSLAB(bytes, s); /* Round address down to VM_PAGE_SIZE boundary to get header. */ f = (struct slabdata *) ((char *) mem - (vir_bytes) mem % VM_PAGE_SIZE); -#if SANITYCHECKS - OBJSTATSCHECK(f->sdh.magic == MAGIC); -#endif + OBJSTATSCHECK(f->sdh.magic1 == MAGIC1); + OBJSTATSCHECK(f->sdh.magic2 == MAGIC2); OBJSTATSCHECK(f->sdh.list == LIST_USED || f->sdh.list == LIST_FULL); /* Make sure it's in range. */ @@ -379,22 +439,26 @@ PUBLIC void slabfree(void *mem, int bytes) SLABSANITYCHECK(SCL_FUNCTIONS); + if(objstats(mem, bytes, &s, &f, &i) != OK) { + vm_panic("slabfree objstats failed", NO_NUM); + } + #if SANITYCHECKS if(*(u32_t *) mem == JUNK) { printf("VM: WARNING: likely double free, JUNK seen\n"); } + + slabunlock(mem, bytes); + *(u32_t *) mem = JUNK; + nojunkwarning++; + slablock(mem, bytes); + nojunkwarning--; + vm_assert(!nojunkwarning); #endif - if(objstats(mem, bytes, &s, &f, &i) != OK) { - vm_panic("slabfree objstats failed", NO_NUM); - } /* Free this data. */ CLEARBIT(f, i); -#if SANITYCHECKS - *(u32_t *) mem = JUNK; -#endif - /* Check if this slab changes lists. */ if(f->sdh.nused == 0) { /* Now become FREE; must've been USED */ @@ -404,7 +468,7 @@ PUBLIC void slabfree(void *mem, int bytes) LH(s, LIST_USED) = f->sdh.next; ADDHEAD(f, s, LIST_FREE); SLABSANITYCHECK(SCL_DETAIL); - } else if(f->sdh.nused == ITEMSPERPAGE(s, bytes)-1) { + } else if(f->sdh.nused == ITEMSPERPAGE(bytes)-1) { /* Now become USED; must've been FULL */ vm_assert(f->sdh.list == LIST_FULL); UNLINKNODE(f); @@ -422,6 +486,42 @@ PUBLIC void slabfree(void *mem, int bytes) return; } +/*===========================================================================* + * void *slablock * + *===========================================================================*/ +PUBLIC void slablock(void *mem, int bytes) +{ + int i; + struct slabheader *s; + struct slabdata *f; + + if(objstats(mem, bytes, &s, &f, &i) != OK) + vm_panic("slablock objstats failed", NO_NUM); + + SLABDATAUNWRITABLE(f); + + FIXME("verify new contents"); + + return; +} + +/*===========================================================================* + * void *slabunlock * + *===========================================================================*/ +PUBLIC void slabunlock(void *mem, int bytes) +{ + int i; + struct slabheader *s; + struct slabdata *f; + + if(objstats(mem, bytes, &s, &f, &i) != OK) + vm_panic("slablock objstats failed", NO_NUM); + + SLABDATAWRITABLE(f, i); + + return; +} + #if SANITYCHECKS /*===========================================================================* * void slabstats * diff --git a/servers/vm/util.h b/servers/vm/util.h index 7d9082156..2b5bd6374 100644 --- a/servers/vm/util.h +++ b/servers/vm/util.h @@ -8,15 +8,15 @@ #define ELEMENTS(a) (sizeof(a)/sizeof((a)[0])) #if SANITYCHECKS -#define vm_assert(cond) do { \ +#define vm_assert(cond) { \ if(vm_sanitychecklevel > 0 && !(cond)) { \ printf("VM:%s:%d: assert failed: %s\n", \ __FILE__, __LINE__, #cond); \ panic("VM", "assert failed", NO_NUM); \ } \ - } while(0) + } #else -#define vm_assert(cond) +#define vm_assert(cond) ; #endif #define vm_panic(str, n) { char _pline[100]; \ diff --git a/servers/vm/utility.c b/servers/vm/utility.c index 47ed47a40..09b4dbbc0 100644 --- a/servers/vm/utility.c +++ b/servers/vm/utility.c @@ -18,10 +18,12 @@ #include #include #include +#include #include #include #include #include +#include #include "proto.h" #include "glo.h" @@ -119,8 +121,8 @@ struct mem_map *map_ptr; /* memory to remove */ PUBLIC int vm_isokendpt(endpoint_t endpoint, int *proc) { *proc = _ENDPOINT_P(endpoint); - if(*proc < -NR_TASKS || *proc >= NR_PROCS) - return EINVAL; + if(*proc < 0 || *proc >= NR_PROCS) + vm_panic("crazy slot number", *proc); if(*proc >= 0 && endpoint != vmproc[*proc].vm_endpoint) return EDEADSRCDST; if(*proc >= 0 && !(vmproc[*proc].vm_flags & VMF_INUSE)) @@ -163,3 +165,28 @@ char *brk_addr; return 0; } +/*===========================================================================* + * do_ctl * + *===========================================================================*/ +PUBLIC int do_ctl(message *m) +{ + int pages, nodes; + int pr; + struct vmproc *vmp; + + switch(m->VCTL_WHAT) { + case VCTLP_STATS_MEM: + printmemstats(); + break; + case VCTLP_STATS_EP: + if(vm_isokendpt(m->VCTL_PARAM, &pr) != OK) + return EINVAL; + printregionstats(&vmproc[pr]); + break; + default: + return EINVAL; + } + + return OK; +} + diff --git a/servers/vm/vfs.c b/servers/vm/vfs.c index 62f89fea9..9f8d34899 100644 --- a/servers/vm/vfs.c +++ b/servers/vm/vfs.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/servers/vm/vm.h b/servers/vm/vm.h index 0f4040db7..53a9aac0a 100644 --- a/servers/vm/vm.h +++ b/servers/vm/vm.h @@ -5,6 +5,8 @@ #define PAF_CLEAR 0x01 /* Clear physical memory. */ #define PAF_CONTIG 0x02 /* Physically contiguous. */ #define PAF_ALIGN64K 0x04 /* Aligned to 64k boundary. */ +#define PAF_LOWER16MB 0x08 +#define PAF_LOWER1MB 0x10 /* special value for v in pt_allocmap */ #define AM_AUTO ((u32_t) -1) @@ -14,7 +16,10 @@ /* Compile in asserts and custom sanity checks at all? */ #define SANITYCHECKS 0 -#define VMSTATS 1 +#define VMSTATS 0 + +/* Minimum stack region size - 64MB. */ +#define MINSTACKREGION (64*1024*1024) /* If so, this level: */ #define SCL_NONE 0 /* No sanity checks - vm_assert()s only. */ @@ -31,7 +36,9 @@ #define VMP_CATEGORIES 4 /* Flags to pt_writemap(). */ -#define WMF_OVERWRITE 0x01 /* Caller knows map may overwrite. */ +#define WMF_OVERWRITE 0x01 /* Caller knows map may overwrite. */ +#define WMF_WRITEFLAGSONLY 0x02 /* Copy physaddr and update flags. */ +#define WMF_FREE 0x04 /* Free pages overwritten. */ -/* Special value of 'what' to map_page_region meaning: unknown. */ #define MAP_NONE 0xFFFFFFFE + diff --git a/servers/vm/vmproc.h b/servers/vm/vmproc.h index 7b9d6a18e..47beedaa7 100644 --- a/servers/vm/vmproc.h +++ b/servers/vm/vmproc.h @@ -4,6 +4,7 @@ #include #include +#include #include "vm.h" @@ -31,6 +32,9 @@ struct vmproc { /* Heap for brk() to extend. */ struct vir_region *vm_heap; +#define VM_CALL_PRIV_MASK_SIZE BITMAP_CHUNKS(VM_NCALLS) + bitchunk_t vm_call_priv_mask[VM_CALL_PRIV_MASK_SIZE]; + /* State for requests pending to be done to vfs on behalf of * this process. */