From: Ben Gras Date: Wed, 20 Mar 2013 19:18:52 +0000 (+0000) Subject: vm: new secondary cache code X-Git-Tag: v3.3.0~1007 X-Git-Url: http://zhaoyanbai.com/repos/%22http:/www.isc.org/icons/zlib_tech.html?a=commitdiff_plain;h=49eb1f480675178546d93769e98b96ed9f0d7fc7;p=minix.git vm: new secondary cache code Primary purpose of change: to support the mmap implementation, VM must know both (a) about some block metadata for FS cache blocks, i.e. inode numbers and inode offsets where applicable; and (b) know about *all* cache blocks, i.e. also of the FS primary caches and not just the blocks that spill into the secondary one. This changes the interface and VM data structures. This change is only for the interface (libminixfs) and VM data structures; the filesystem code is unmodified, so although the secondary cache will be used as normal, blocks will not be annotated with inode information until the FS is modified to provide this information. Until it is modified, mmap of files will fail gracefully on such filesystems. This is indicated to VFS/VM by returning ENOSYS for REQ_PEEK. Change-Id: I1d2df6c485e6c5e89eb28d9055076cc02629594e --- diff --git a/commands/service/parse.c b/commands/service/parse.c index a18ebef3e..ea805a073 100644 --- a/commands/service/parse.c +++ b/commands/service/parse.c @@ -732,6 +732,8 @@ struct { "RS_UPDATE", VM_RS_UPDATE }, { "RS_MEMCTL", VM_RS_MEMCTL }, { "PROCCTL", VM_PROCCTL }, + { "MAPCACHEPAGE", VM_MAPCACHEPAGE }, + { "SETCACHEPAGE", VM_SETCACHEPAGE }, { NULL, 0 }, }; diff --git a/distrib/sets/lists/minix/mi b/distrib/sets/lists/minix/mi index bde486f80..4ae4a5feb 100644 --- a/distrib/sets/lists/minix/mi +++ b/distrib/sets/lists/minix/mi @@ -4634,14 +4634,14 @@ ./usr/tests/minix-posix/test70 minix-sys ./usr/tests/minix-posix/test71 minix-sys ./usr/tests/minix-posix/test72 minix-sys -./usr/tests/minix-posix/test73 minix-sys obsolete +./usr/tests/minix-posix/test73 minix-sys ./usr/tests/minix-posix/test7 minix-sys ./usr/tests/minix-posix/test8 minix-sys ./usr/tests/minix-posix/test9 minix-sys ./usr/tests/minix-posix/testinterp minix-sys ./usr/tests/minix-posix/testsh1 minix-sys ./usr/tests/minix-posix/testsh2 minix-sys -./usr/tests/minix-posix/testvm minix-sys obsolete +./usr/tests/minix-posix/testvm minix-sys ./usr/tests/minix-posix/testvm.conf minix-sys ./usr/tmp minix-sys ./usr/var minix-sys diff --git a/etc/system.conf b/etc/system.conf index 022492521..496888114 100644 --- a/etc/system.conf +++ b/etc/system.conf @@ -107,7 +107,7 @@ service mfs { ipc ALL_SYS; # All system ipc targets allowed system BASIC; # Only basic kernel calls allowed - vm BASIC; # Only basic VM calls allowed + vm MAPCACHEPAGE SETCACHEPAGE; io NONE; # No I/O range allowed irq NONE; # No IRQ allowed sigmgr rs; # Signal manager is RS @@ -134,7 +134,7 @@ service ext2 { ipc ALL_SYS; # All system ipc targets allowed system BASIC; # Only basic kernel calls allowed - vm BASIC; # Only basic VM calls allowed + vm MAPCACHEPAGE SETCACHEPAGE; io NONE; # No I/O range allowed irq NONE; # No IRQ allowed sigmgr rs; # Signal manager is RS @@ -147,7 +147,7 @@ service pfs { ipc ALL_SYS; # All system ipc targets allowed system BASIC; # Only basic kernel calls allowed - vm BASIC; # Only basic VM calls allowed + vm MAPCACHEPAGE SETCACHEPAGE; io NONE; # No I/O range allowed irq NONE; # No IRQ allowed sigmgr rs; # Signal manager is RS diff --git a/include/minix/com.h b/include/minix/com.h index 1daa0409a..f92a567b0 100644 --- a/include/minix/com.h +++ b/include/minix/com.h @@ -993,12 +993,31 @@ /* To VM: map in cache block by FS */ #define VM_MAPCACHEPAGE (VM_RQ_BASE+26) +/* To VM: identify cache block in FS */ +#define VM_SETCACHEPAGE (VM_RQ_BASE+27) + +/* To VFS: fields for request from VM. */ +# define VFS_VMCALL_REQ m10_i1 +# define VFS_VMCALL_FD m10_i2 +# define VFS_VMCALL_REQID m10_i3 +# define VFS_VMCALL_ENDPOINT m10_i4 +# define VFS_VMCALL_OFFSET_LO m10_l1 +# define VFS_VMCALL_OFFSET_HI m10_l2 +# define VFS_VMCALL_LENGTH m10_l3 + +/* Request codes to from VM to VFS */ +#define VMVFSREQ_FDLOOKUP 101 +#define VMVFSREQ_FDCLOSE 102 +#define VMVFSREQ_FDIO 103 + /* Calls from VFS. */ -# define VMV_ENDPOINT m1_i1 /* for all VM_VFS_REPLY_* */ -#define VM_VFS_REPLY_OPEN (VM_RQ_BASE+30) -# define VMVRO_FD m1_i2 -#define VM_VFS_REPLY_MMAP (VM_RQ_BASE+31) -#define VM_VFS_REPLY_CLOSE (VM_RQ_BASE+32) +#define VM_VFS_REPLY (VM_RQ_BASE+30) +# define VMV_ENDPOINT m10_i1 +# define VMV_RESULT m10_i2 +# define VMV_REQID m10_i3 +# define VMV_DEV m10_i4 +# define VMV_INO m10_l1 +# define VMV_FD m10_l2 #define VM_REMAP (VM_RQ_BASE+33) # define VMRE_D m1_i1 diff --git a/include/minix/ipc.h b/include/minix/ipc.h index a0f881365..fc61d6c09 100644 --- a/include/minix/ipc.h +++ b/include/minix/ipc.h @@ -30,6 +30,30 @@ typedef struct {long m9l1, m9l2, m9l3, m9l4, m9l5; typedef struct {int m10i1, m10i2, m10i3, m10i4; long m10l1, m10l2, m10l3; } mess_10; +typedef struct { + void *block; + u32_t dev_offset_pages; + u32_t ino_offset_pages; + u32_t ino; + u32_t *flags_ptr; + u32_t dev; + u8_t pages; + u8_t flags; +} mess_vmmcp __packed; + +typedef struct { + endpoint_t who; + u32_t offset; + u32_t dev; + u32_t ino; + u32_t vaddr; + u32_t len; + u16_t fd; + u16_t clearend_and_flags; /* low 12 bits are clearend, rest flags */ +} mess_vm_vfs_mmap __packed; + +typedef struct { u8_t flags; void *addr; } mess_vmmcp_reply __packed; + typedef struct { endpoint_t m_source; /* who sent the message */ int m_type; /* what kind of message is it */ @@ -44,6 +68,9 @@ typedef struct { mess_6 m_m6; mess_9 m_m9; mess_10 m_m10; + mess_vmmcp m_vmmcp; + mess_vmmcp_reply m_vmmcp_reply; + mess_vm_vfs_mmap m_vm_vfs; } m_u; } message __aligned(16); diff --git a/include/minix/libminixfs.h b/include/minix/libminixfs.h index bbeda8877..0be011735 100644 --- a/include/minix/libminixfs.h +++ b/include/minix/libminixfs.h @@ -17,9 +17,16 @@ struct buf { struct buf *lmfs_hash; /* used to link bufs on hash chains */ block_t lmfs_blocknr; /* block number of its (minor) device */ dev_t lmfs_dev; /* major | minor device where block resides */ - char lmfs_dirt; /* BP_CLEAN or BP_DIRTY */ char lmfs_count; /* number of users of this buffer */ + char lmfs_needsetcache; /* to be identified to VM */ unsigned int lmfs_bytes; /* Number of bytes allocated in bp */ + u32_t lmfs_flags; /* Flags shared between VM and FS */ + + /* If any, which inode & offset does this block correspond to? + * If none, VMC_NO_INODE + */ + ino_t lmfs_inode; + u64_t lmfs_inode_offset; }; int fs_lookup_credentials(vfs_ucred_t *credentials, @@ -42,10 +49,13 @@ void lmfs_reset_rdwt_err(void); int lmfs_rdwt_err(void); void lmfs_buf_pool(int new_nr_bufs); struct buf *lmfs_get_block(dev_t dev, block_t block,int only_search); +struct buf *lmfs_get_block_ino(dev_t dev, block_t block,int only_search, + ino_t ino, u64_t off); void lmfs_invalidate(dev_t device); void lmfs_put_block(struct buf *bp, int block_type); void lmfs_rw_scattered(dev_t, struct buf **, int, int); void lmfs_setquiet(int q); +int lmfs_do_bpeek(message *); /* calls that libminixfs does into fs */ void fs_blockstats(u32_t *blocks, u32_t *free, u32_t *used); diff --git a/include/minix/vm.h b/include/minix/vm.h index acb0a901a..698905954 100644 --- a/include/minix/vm.h +++ b/include/minix/vm.h @@ -65,5 +65,20 @@ int vm_info_region(endpoint_t who, struct vm_region_info *vri, int count, vir_bytes *next); int vm_procctl(endpoint_t ep, int param); +int vm_set_cacheblock(void *block, u32_t dev, u64_t dev_offset, + u64_t ino, u64_t ino_offset, u32_t *flags, int blocksize); + +void *vm_map_cacheblock(u32_t dev, u64_t dev_offset, + u64_t ino, u64_t ino_offset, u32_t *flags, int blocksize); + +/* flags for vm cache functions */ +#define VMMC_FLAGS_LOCKED 0x01 /* someone is updating the flags; don't read/write */ +#define VMMC_DIRTY 0x02 /* dirty buffer and it may not be evicted */ +#define VMMC_EVICTED 0x04 /* VM has evicted the buffer and it's invalid */ +#define VMMC_BLOCK_LOCKED 0x08 /* client is using it and it may not be evicted */ + +/* special inode number for vm cache functions */ +#define VMC_NO_INODE 0 /* to reference a disk block, no associated file */ + #endif /* _MINIX_VM_H */ diff --git a/lib/libminixfs/Makefile b/lib/libminixfs/Makefile index ade070374..ae0324ebc 100644 --- a/lib/libminixfs/Makefile +++ b/lib/libminixfs/Makefile @@ -1,6 +1,5 @@ # Makefile for libminixfs .include - LIB= minixfs SRCS= fetch_credentials.c cache.c diff --git a/lib/libminixfs/cache.c b/lib/libminixfs/cache.c index 37d7d2901..eb55bc808 100644 --- a/lib/libminixfs/cache.c +++ b/lib/libminixfs/cache.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include @@ -16,9 +16,6 @@ #include #include -#define BP_CLEAN 0 /* on-disk block and memory copies identical */ -#define BP_DIRTY 1 /* on-disk block and memory copies differ */ - #define BUFHASH(b) ((b) % nr_bufs) #define MARKCLEAN lmfs_markclean @@ -31,6 +28,7 @@ static unsigned int bufs_in_use;/* # bufs currently in use (not on free list)*/ static void rm_lru(struct buf *bp); static void read_block(struct buf *); static void flushall(dev_t dev); +static void freeblock(struct buf *bp); static int vmcache = 0; /* are we using vm's secondary cache? (initially not) */ @@ -58,13 +56,6 @@ u32_t fs_bufs_heuristic(int minbufs, u32_t btotal, u32_t bfree, bused = btotal-bfree; - /* but we simply need minbufs no matter what, and we don't - * want more than that if we're a memory device - */ - if(majordev == MEMORY_MAJOR) { - return minbufs; - } - /* set a reasonable cache size; cache at most a certain * portion of the used FS, and at most a certain %age of remaining * memory @@ -101,19 +92,19 @@ u32_t fs_bufs_heuristic(int minbufs, u32_t btotal, u32_t bfree, void lmfs_markdirty(struct buf *bp) { - bp->lmfs_dirt = BP_DIRTY; + bp->lmfs_flags |= VMMC_DIRTY; } void lmfs_markclean(struct buf *bp) { - bp->lmfs_dirt = BP_CLEAN; + bp->lmfs_flags &= ~VMMC_DIRTY; } int lmfs_isclean(struct buf *bp) { - return bp->lmfs_dirt == BP_CLEAN; + return !(bp->lmfs_flags & VMMC_DIRTY); } dev_t @@ -127,14 +118,109 @@ int lmfs_bytes(struct buf *bp) return bp->lmfs_bytes; } +static void +free_unused_blocks(void) +{ + struct buf *bp; + + int freed = 0, bytes = 0; + printf("libminixfs: freeing; %d blocks in use\n", bufs_in_use); + for(bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { + if(bp->lmfs_bytes > 0 && bp->lmfs_count == 0) { + freed++; + bytes += bp->lmfs_bytes; + freeblock(bp); + } + } + printf("libminixfs: freeing; %d blocks, %d bytes\n", freed, bytes); +} + +static void +lmfs_alloc_block(struct buf *bp) +{ + ASSERT(!bp->data); + ASSERT(bp->lmfs_bytes == 0); + ASSERT(!(fs_block_size % PAGE_SIZE)); + if((bp->data = minix_mmap(0, fs_block_size, + PROT_READ|PROT_WRITE, MAP_PREALLOC|MAP_ANON, -1, 0)) == MAP_FAILED) { + free_unused_blocks(); + if((bp->data = minix_mmap(0, fs_block_size, PROT_READ|PROT_WRITE, + MAP_PREALLOC|MAP_ANON, -1, 0)) == MAP_FAILED) { + panic("libminixfs: could not allocate block"); + } + } + assert(bp->data); + bp->lmfs_bytes = fs_block_size; + bp->lmfs_needsetcache = 1; +} + /*===========================================================================* * lmfs_get_block * *===========================================================================*/ -struct buf *lmfs_get_block( - register dev_t dev, /* on which device is the block? */ - register block_t block, /* which block is wanted? */ - int only_search /* if NO_READ, don't read, else act normal */ -) +struct buf *lmfs_get_block(register dev_t dev, register block_t block, + int only_search) +{ + return lmfs_get_block_ino(dev, block, only_search, VMC_NO_INODE, 0); +} + +void minix_munmap_t(void *a, int len) +{ + vir_bytes av = (vir_bytes) a; + assert(a); + assert(a != MAP_FAILED); + assert(len > 0); + assert(!(len % PAGE_SIZE)); + assert(!(av % PAGE_SIZE)); + + if(minix_munmap(a, len) < 0) + panic("libminixfs cache: munmap failed"); +} + +static void raisecount(struct buf *bp) +{ + assert(bufs_in_use >= 0); + ASSERT(bp->lmfs_count >= 0); + bp->lmfs_count++; + if(bp->lmfs_count == 1) bufs_in_use++; + assert(bufs_in_use > 0); +} + +static void lowercount(struct buf *bp) +{ + assert(bufs_in_use > 0); + ASSERT(bp->lmfs_count > 0); + bp->lmfs_count--; + if(bp->lmfs_count == 0) bufs_in_use--; + assert(bufs_in_use >= 0); +} + +static void freeblock(struct buf *bp) +{ + ASSERT(bp->lmfs_count == 0); + /* If the block taken is dirty, make it clean by writing it to the disk. + * Avoid hysteresis by flushing all other dirty blocks for the same device. + */ + if (bp->lmfs_dev != NO_DEV) { + if (!lmfs_isclean(bp)) flushall(bp->lmfs_dev); + assert(bp->lmfs_bytes == fs_block_size); + bp->lmfs_dev = NO_DEV; + } + + /* Fill in block's parameters and add it to the hash chain where it goes. */ + MARKCLEAN(bp); /* NO_DEV blocks may be marked dirty */ + if(bp->lmfs_bytes > 0) { + assert(bp->data); + minix_munmap_t(bp->data, bp->lmfs_bytes); + bp->lmfs_bytes = 0; + bp->data = NULL; + } else assert(!bp->data); +} + +/*===========================================================================* + * lmfs_get_block_ino * + *===========================================================================*/ +struct buf *lmfs_get_block_ino(dev_t dev, block_t block, int only_search, + ino_t ino, u64_t ino_off) { /* Check to see if the requested block is in the block cache. If so, return * a pointer to it. If not, evict some other block and fetch it (unless @@ -152,8 +238,9 @@ struct buf *lmfs_get_block( */ int b; - static struct buf *bp, *prev_ptr; - u64_t yieldid = VM_BLOCKID_NONE /*, getid = make64(dev, block) */; + static struct buf *bp; + u64_t dev_off = (u64_t) block * fs_block_size; + struct buf *prev_ptr; assert(buf_hash); assert(buf); @@ -163,22 +250,52 @@ struct buf *lmfs_get_block( assert(dev != NO_DEV); - /* Search the hash chain for (dev, block). Do_read() can use - * lmfs_get_block(NO_DEV ...) to get an unnamed block to fill with zeros when - * someone wants to read from a hole in a file, in which case this search - * is skipped - */ + if((ino_off % fs_block_size)) { + + printf("cache: unaligned lmfs_get_block_ino ino_off %llu\n", + ino_off); + util_stacktrace(); + } + + /* Search the hash chain for (dev, block). */ b = BUFHASH(block); bp = buf_hash[b]; while (bp != NULL) { if (bp->lmfs_blocknr == block && bp->lmfs_dev == dev) { + if(bp->lmfs_flags & VMMC_EVICTED) { + /* We had it but VM evicted it; invalidate it. */ + ASSERT(bp->lmfs_count == 0); + ASSERT(!(bp->lmfs_flags & VMMC_BLOCK_LOCKED)); + ASSERT(!(bp->lmfs_flags & VMMC_DIRTY)); + bp->lmfs_dev = NO_DEV; + bp->lmfs_bytes = 0; + bp->data = NULL; + break; + } + ASSERT(bp->lmfs_needsetcache == 0); /* Block needed has been found. */ - if (bp->lmfs_count == 0) rm_lru(bp); - bp->lmfs_count++; /* record that block is in use */ + if (bp->lmfs_count == 0) { + rm_lru(bp); + ASSERT(!(bp->lmfs_flags & VMMC_BLOCK_LOCKED)); + bp->lmfs_flags |= VMMC_BLOCK_LOCKED; + } + raisecount(bp); ASSERT(bp->lmfs_bytes == fs_block_size); ASSERT(bp->lmfs_dev == dev); ASSERT(bp->lmfs_dev != NO_DEV); + ASSERT(bp->lmfs_flags & VMMC_BLOCK_LOCKED); ASSERT(bp->data); + + if(ino != VMC_NO_INODE) { + if(bp->lmfs_inode == VMC_NO_INODE + || bp->lmfs_inode != ino + || bp->lmfs_inode_offset != ino_off) { + bp->lmfs_inode = ino; + bp->lmfs_inode_offset = ino_off; + bp->lmfs_needsetcache = 1; + } + } + return(bp); } else { /* This block is not the one sought. */ @@ -186,29 +303,13 @@ struct buf *lmfs_get_block( } } - /* Desired block is not on available chain. Take oldest block ('front'). */ - if ((bp = front) == NULL) panic("all buffers in use: %d", nr_bufs); - - if(bp->lmfs_bytes < fs_block_size) { - ASSERT(!bp->data); - ASSERT(bp->lmfs_bytes == 0); - if(!(bp->data = alloc_contig( (size_t) fs_block_size, 0, NULL))) { - printf("fs cache: couldn't allocate a new block.\n"); - for(bp = front; - bp && bp->lmfs_bytes < fs_block_size; bp = bp->lmfs_next) - ; - if(!bp) { - panic("no buffer available"); - } - } else { - bp->lmfs_bytes = fs_block_size; - } + /* Desired block is not on available chain. Find a free block to use. */ + if(bp) { + ASSERT(bp->lmfs_flags & VMMC_EVICTED); + } else { + if ((bp = front) == NULL) panic("all buffers in use: %d", nr_bufs); } - - ASSERT(bp); - ASSERT(bp->data); - ASSERT(bp->lmfs_bytes == fs_block_size); - ASSERT(bp->lmfs_count == 0); + assert(bp); rm_lru(bp); @@ -228,25 +329,17 @@ struct buf *lmfs_get_block( } } - /* If the block taken is dirty, make it clean by writing it to the disk. - * Avoid hysteresis by flushing all other dirty blocks for the same device. - */ - if (bp->lmfs_dev != NO_DEV) { - if (bp->lmfs_dirt == BP_DIRTY) flushall(bp->lmfs_dev); + freeblock(bp); - /* Are we throwing out a block that contained something? - * Give it to VM for the second-layer cache. - */ - yieldid = make64(bp->lmfs_dev, bp->lmfs_blocknr); - assert(bp->lmfs_bytes == fs_block_size); - bp->lmfs_dev = NO_DEV; - } + bp->lmfs_inode = ino; + bp->lmfs_inode_offset = ino_off; - /* Fill in block's parameters and add it to the hash chain where it goes. */ - MARKCLEAN(bp); /* NO_DEV blocks may be marked dirty */ + bp->lmfs_flags = VMMC_BLOCK_LOCKED; + bp->lmfs_needsetcache = 0; bp->lmfs_dev = dev; /* fill in device number */ bp->lmfs_blocknr = block; /* fill in block number */ - bp->lmfs_count++; /* record that block is being used */ + ASSERT(bp->lmfs_count == 0); + raisecount(bp); b = BUFHASH(bp->lmfs_blocknr); bp->lmfs_hash = buf_hash[b]; @@ -254,23 +347,26 @@ struct buf *lmfs_get_block( assert(dev != NO_DEV); - /* Go get the requested block unless searching or prefetching. */ - if(only_search == PREFETCH || only_search == NORMAL) { - /* Block is not found in our cache, but we do want it - * if it's in the vm cache. - */ - if(vmcache) { - /* If we can satisfy the PREFETCH or NORMAL request - * from the vm cache, work is done. - */ -#if 0 - if(vm_yield_block_get_block(yieldid, getid, - bp->data, fs_block_size) == OK) { - return bp; - } -#endif + /* Block is not found in our cache, but we do want it + * if it's in the vm cache. + */ + assert(!bp->data); + assert(!bp->lmfs_bytes); + if(vmcache) { + if((bp->data = vm_map_cacheblock(dev, dev_off, ino, ino_off, + &bp->lmfs_flags, fs_block_size)) != MAP_FAILED) { + bp->lmfs_bytes = fs_block_size; + ASSERT(!bp->lmfs_needsetcache); + return bp; } } + bp->data = NULL; + + /* Not in the cache; reserve memory for its contents. */ + + lmfs_alloc_block(bp); + + assert(bp->data); if(only_search == PREFETCH) { /* PREFETCH: don't do i/o. */ @@ -278,15 +374,7 @@ struct buf *lmfs_get_block( } else if (only_search == NORMAL) { read_block(bp); } else if(only_search == NO_READ) { - /* we want this block, but its contents - * will be overwritten. VM has to forget - * about it. - */ -#if 0 - if(vmcache) { - vm_forgetblock(getid); - } -#endif + /* This block will be overwritten by new contents. */ } else panic("unexpected only_search value: %d", only_search); @@ -310,15 +398,21 @@ int block_type; /* INODE_BLOCK, DIRECTORY_BLOCK, or whatever */ * the integrity of the file system (e.g., inode blocks) are written to * disk immediately if they are dirty. */ + dev_t dev; + u64_t dev_off; + int r; + if (bp == NULL) return; /* it is easier to check here than in caller */ - bp->lmfs_count--; /* there is one use fewer now */ - if (bp->lmfs_count != 0) return; /* block is still in use */ + dev = bp->lmfs_dev; - bufs_in_use--; /* one fewer block buffers in use */ + dev_off = (u64_t) bp->lmfs_blocknr * fs_block_size; + + lowercount(bp); + if (bp->lmfs_count != 0) return; /* block is still in use */ /* Put this block back on the LRU chain. */ - if (bp->lmfs_dev == DEV_RAM || (block_type & ONE_SHOT)) { + if (dev == DEV_RAM || (block_type & ONE_SHOT)) { /* Block probably won't be needed quickly. Put it on front of chain. * It will be the next block to be evicted from the cache. */ @@ -342,6 +436,25 @@ int block_type; /* INODE_BLOCK, DIRECTORY_BLOCK, or whatever */ rear->lmfs_next = bp; rear = bp; } + + assert(bp->lmfs_flags & VMMC_BLOCK_LOCKED); + bp->lmfs_flags &= ~VMMC_BLOCK_LOCKED; + + /* block has sensible content - if necesary, identify it to VM */ + if(vmcache && bp->lmfs_needsetcache && dev != NO_DEV) { + if((r=vm_set_cacheblock(bp->data, dev, dev_off, + bp->lmfs_inode, bp->lmfs_inode_offset, + &bp->lmfs_flags, fs_block_size)) != OK) { + if(r == ENOSYS) { + printf("libminixfs: ENOSYS, disabling VM calls\n"); + vmcache = 0; + } else { + panic("libminixfs: setblock of 0x%lx dev 0x%x off " + "0x%llx failed\n", bp->data, dev, dev_off); + } + } + } + bp->lmfs_needsetcache = 0; } /*===========================================================================* @@ -363,9 +476,28 @@ register struct buf *bp; /* buffer pointer */ assert(dev != NO_DEV); + ASSERT(bp->lmfs_bytes == fs_block_size); + ASSERT(fs_block_size > 0); + ASSERT(!(fs_block_size % PAGE_SIZE)); + pos = mul64u(bp->lmfs_blocknr, fs_block_size); - r = bdev_read(dev, pos, bp->data, fs_block_size, - BDEV_NOFLAGS); + if(fs_block_size > PAGE_SIZE) { +#define MAXPAGES 20 + vir_bytes vaddr = (vir_bytes) bp->data; + int p; + static iovec_t iovec[MAXPAGES]; + int pages = fs_block_size/PAGE_SIZE; + ASSERT(pages > 1 && pages < MAXPAGES); + for(p = 0; p < pages; p++) { + iovec[p].iov_addr = vaddr; + iovec[p].iov_size = PAGE_SIZE; + vaddr += PAGE_SIZE; + } + r = bdev_gather(dev, pos, iovec, pages, BDEV_NOFLAGS); + } else { + r = bdev_read(dev, pos, bp->data, fs_block_size, + BDEV_NOFLAGS); + } if (r < 0) { printf("fs cache: I/O error on device %d/%d, block %u\n", major(dev), minor(dev), bp->lmfs_blocknr); @@ -381,6 +513,7 @@ register struct buf *bp; /* buffer pointer */ /* Report read errors to interested parties. */ rdwt_err = r; } + } /*===========================================================================* @@ -394,10 +527,16 @@ void lmfs_invalidate( register struct buf *bp; - for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) - if (bp->lmfs_dev == device) bp->lmfs_dev = NO_DEV; - - /* vm_forgetblocks(); */ + for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { + if (bp->lmfs_dev == device) { + assert(bp->data); + assert(bp->lmfs_bytes > 0); + minix_munmap_t(bp->data, bp->lmfs_bytes); + bp->lmfs_dev = NO_DEV; + bp->lmfs_bytes = 0; + bp->data = NULL; + } + } } /*===========================================================================* @@ -423,7 +562,7 @@ static void flushall(dev_t dev) } for (bp = &buf[0], ndirty = 0; bp < &buf[nr_bufs]; bp++) { - if (bp->lmfs_dirt == BP_DIRTY && bp->lmfs_dev == dev) { + if (!lmfs_isclean(bp) && bp->lmfs_dev == dev) { dirty[ndirty++] = bp; } } @@ -449,16 +588,22 @@ void lmfs_rw_scattered( register iovec_t *iop; static iovec_t *iovec = NULL; u64_t pos; - int j, r; + int iov_per_block; STATICINIT(iovec, NR_IOREQS); + assert(dev != NO_DEV); + assert(!(fs_block_size % PAGE_SIZE)); + assert(fs_block_size > 0); + iov_per_block = fs_block_size / PAGE_SIZE; + /* (Shell) sort buffers on lmfs_blocknr. */ gap = 1; do gap = 3 * gap + 1; while (gap <= bufqsize); while (gap != 1) { + int j; gap /= 3; for (j = gap; j < bufqsize; j++) { for (i = j - gap; @@ -475,17 +620,33 @@ void lmfs_rw_scattered( * went fine, otherwise the error code for the first failed transfer. */ while (bufqsize > 0) { - for (j = 0, iop = iovec; j < NR_IOREQS && j < bufqsize; j++, iop++) { - bp = bufq[j]; - if (bp->lmfs_blocknr != (block_t) bufq[0]->lmfs_blocknr + j) break; - iop->iov_addr = (vir_bytes) bp->data; - iop->iov_size = (vir_bytes) fs_block_size; + int nblocks = 0, niovecs = 0; + int r; + for (iop = iovec; nblocks < bufqsize; nblocks++) { + int p; + vir_bytes vdata; + bp = bufq[nblocks]; + if (bp->lmfs_blocknr != (block_t) bufq[0]->lmfs_blocknr + nblocks) + break; + if(niovecs >= NR_IOREQS-iov_per_block) break; + vdata = (vir_bytes) bp->data; + for(p = 0; p < iov_per_block; p++) { + iop->iov_addr = vdata; + iop->iov_size = PAGE_SIZE; + vdata += PAGE_SIZE; + iop++; + niovecs++; + } } + + assert(nblocks > 0); + assert(niovecs > 0); + pos = mul64u(bufq[0]->lmfs_blocknr, fs_block_size); if (rw_flag == READING) - r = bdev_gather(dev, pos, iovec, j, BDEV_NOFLAGS); + r = bdev_gather(dev, pos, iovec, niovecs, BDEV_NOFLAGS); else - r = bdev_scatter(dev, pos, iovec, j, BDEV_NOFLAGS); + r = bdev_scatter(dev, pos, iovec, niovecs, BDEV_NOFLAGS); /* Harvest the results. The driver may have returned an error, or it * may have done less than what we asked for. @@ -494,13 +655,12 @@ void lmfs_rw_scattered( printf("fs cache: I/O error %d on device %d/%d, block %u\n", r, major(dev), minor(dev), bufq[0]->lmfs_blocknr); } - for (i = 0; i < j; i++) { + for (i = 0; i < nblocks; i++) { bp = bufq[i]; if (r < (ssize_t) fs_block_size) { /* Transfer failed. */ if (i == 0) { bp->lmfs_dev = NO_DEV; /* Invalidate block */ - /* vm_forgetblocks(); */ } break; } @@ -512,8 +672,8 @@ void lmfs_rw_scattered( } r -= fs_block_size; } - bufq += i; - bufqsize -= i; + bufq += nblocks; + bufqsize -= nblocks; if (rw_flag == READING) { /* Don't bother reading more than the device is willing to * give at this time. Don't forget to release those extras. @@ -543,7 +703,6 @@ struct buf *bp; /* Remove a block from its LRU chain. */ struct buf *next_ptr, *prev_ptr; - bufs_in_use++; next_ptr = bp->lmfs_next; /* successor on LRU chain */ prev_ptr = bp->lmfs_prev; /* predecessor on LRU chain */ if (prev_ptr != NULL) @@ -599,13 +758,10 @@ void lmfs_set_blocksize(int new_block_size, int major) * - our main FS device isn't a memory device */ -#if 0 vmcache = 0; - if(vm_forgetblock(VM_BLOCKID_NONE) != ENOSYS && - may_use_vmcache && major != MEMORY_MAJOR) { + + if(may_use_vmcache) vmcache = 1; - } -#endif } /*===========================================================================* @@ -624,7 +780,7 @@ void lmfs_buf_pool(int new_nr_bufs) for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { if(bp->data) { assert(bp->lmfs_bytes > 0); - free_contig(bp->data, bp->lmfs_bytes); + minix_munmap_t(bp->data, bp->lmfs_bytes); } } } @@ -659,8 +815,6 @@ void lmfs_buf_pool(int new_nr_bufs) for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) bp->lmfs_hash = bp->lmfs_next; buf_hash[0] = front; - - /* vm_forgetblocks(); */ } int lmfs_bufs_in_use(void) @@ -677,7 +831,7 @@ void lmfs_flushall(void) { struct buf *bp; for(bp = &buf[0]; bp < &buf[nr_bufs]; bp++) - if(bp->lmfs_dev != NO_DEV && bp->lmfs_dirt == BP_DIRTY) + if(bp->lmfs_dev != NO_DEV && !lmfs_isclean(bp)) flushall(bp->lmfs_dev); } @@ -700,3 +854,4 @@ int lmfs_rdwt_err(void) { return rdwt_err; } + diff --git a/lib/libsys/Makefile b/lib/libsys/Makefile index 0d4d43785..490f62e75 100644 --- a/lib/libsys/Makefile +++ b/lib/libsys/Makefile @@ -79,6 +79,7 @@ SRCS+= \ tickdelay.c \ timers.c \ vm_brk.c \ + vm_cache.c \ vm_exit.c \ vm_fork.c \ vm_info.c \ diff --git a/lib/libsys/vm_cache.c b/lib/libsys/vm_cache.c new file mode 100644 index 000000000..73883203c --- /dev/null +++ b/lib/libsys/vm_cache.c @@ -0,0 +1,62 @@ + +#include "syslib.h" + +#include +#include + +#include +#include +#include +#include + +int vm_cachecall(message *m, int call, void *addr, u32_t dev, u64_t dev_offset, + u64_t ino, u64_t ino_offset, u32_t *flags, int blocksize) +{ + if(blocksize % PAGE_SIZE) + panic("blocksize %d should be a multiple of pagesize %d\n", + blocksize, PAGE_SIZE); + + if(ino_offset % PAGE_SIZE) + panic("inode offset %d should be a multiple of pagesize %d\n", + ino_offset, PAGE_SIZE); + + if(dev_offset % PAGE_SIZE) + panic("dev offset offset %d should be a multiple of pagesize %d\n", + dev_offset, PAGE_SIZE); + + memset(m, 0, sizeof(*m)); + + assert(dev != NO_DEV); + + m->m_u.m_vmmcp.dev_offset_pages = dev_offset/PAGE_SIZE; + m->m_u.m_vmmcp.ino_offset_pages = ino_offset/PAGE_SIZE; + m->m_u.m_vmmcp.ino = ino; + m->m_u.m_vmmcp.block = addr; + m->m_u.m_vmmcp.flags_ptr = flags; + m->m_u.m_vmmcp.dev = dev; + m->m_u.m_vmmcp.pages = blocksize / PAGE_SIZE; + m->m_u.m_vmmcp.flags = 0; + + return _taskcall(VM_PROC_NR, call, m); +} + +void *vm_map_cacheblock(u32_t dev, u64_t dev_offset, + u64_t ino, u64_t ino_offset, u32_t *flags, int blocksize) +{ + message m; + + if(vm_cachecall(&m, VM_MAPCACHEPAGE, NULL, dev, dev_offset, + ino, ino_offset, flags, blocksize) != OK) + return MAP_FAILED; + + return m.m_u.m_vmmcp_reply.addr; +} + +int vm_set_cacheblock(void *block, u32_t dev, u64_t dev_offset, + u64_t ino, u64_t ino_offset, u32_t *flags, int blocksize) +{ + message m; + + return vm_cachecall(&m, VM_SETCACHEPAGE, block, dev, dev_offset, + ino, ino_offset, flags, blocksize); +} diff --git a/servers/vm/Makefile b/servers/vm/Makefile index 1db8f8288..ea9a67f37 100644 --- a/servers/vm/Makefile +++ b/servers/vm/Makefile @@ -5,7 +5,8 @@ PROG= vm SRCS= main.c alloc.c utility.c exit.c fork.c break.c \ mmap.c slaballoc.c region.c pagefaults.c \ rs.c queryexit.c pb.c regionavl.c \ - mem_anon.c mem_directphys.c mem_anon_contig.c mem_shared.c + mem_anon.c mem_directphys.c mem_anon_contig.c mem_shared.c \ + mem_cache.c cache.c .if ${MACHINE_ARCH} == "earm" LDFLAGS+= -T ${.CURDIR}/arch/${MACHINE_ARCH}/vm.lds diff --git a/servers/vm/alloc.c b/servers/vm/alloc.c index 658cbd2c5..09c252fca 100644 --- a/servers/vm/alloc.c +++ b/servers/vm/alloc.c @@ -257,11 +257,9 @@ phys_clicks alloc_mem(phys_clicks clicks, u32_t memflags) clicks += align_clicks; } - mem = alloc_pages(clicks, memflags); - if(mem == NO_MEM) { - /* free_yielded(clicks * CLICK_SIZE); */ - mem = alloc_pages(clicks, memflags); - } + do { + mem = alloc_pages(clicks, memflags); + } while(mem == NO_MEM && cache_freepages(clicks) > 0); if(mem == NO_MEM) return mem; diff --git a/servers/vm/break.c b/servers/vm/break.c index 0c38eab73..7ecaa41cb 100644 --- a/servers/vm/break.c +++ b/servers/vm/break.c @@ -65,8 +65,9 @@ int real_brk(vmp, v) struct vmproc *vmp; vir_bytes v; { - if(map_region_extend_upto_v(vmp, v) == OK) + if(map_region_extend_upto_v(vmp, v) == OK) { return OK; + } return(ENOMEM); } diff --git a/servers/vm/cache.c b/servers/vm/cache.c new file mode 100644 index 000000000..fd6be5ab3 --- /dev/null +++ b/servers/vm/cache.c @@ -0,0 +1,311 @@ + +/* File that implements the data structure, insert, lookup and remove + * functions for file system cache blocks. + * + * Cache blocks can be mapped into the memory of processes by the + * 'cache' and 'file' memory types. + */ + +#include +#include + +#include + +#include "proto.h" +#include "vm.h" +#include "region.h" +#include "glo.h" +#include "cache.h" + +/* cache datastructure */ +#define HASHSIZE 65536 + +static struct cached_page *cache_hash_bydev[HASHSIZE]; +static struct cached_page *cache_hash_byino[HASHSIZE]; +static struct cached_page *lru_oldest = NULL, *lru_newest = NULL; + +static u32_t cached_pages = 0; + +static void lru_rm(struct cached_page *hb) +{ + struct cached_page *newer = hb->newer, *older = hb->older; + assert(lru_newest); + assert(lru_oldest); + if(newer) { + assert(newer->older == hb); + newer->older = older; + } + if(older) { + assert(older->newer == hb); + older->newer = newer; + } + + if(lru_newest == hb) { assert(!newer); lru_newest = older; } + if(lru_oldest == hb) { assert(!older); lru_oldest = newer; } + + if(lru_newest) assert(lru_newest->newer == NULL); + if(lru_oldest) assert(lru_oldest->older == NULL); + + cached_pages--; +} + +static void lru_add(struct cached_page *hb) +{ + if(lru_newest) { + assert(lru_oldest); + assert(!lru_newest->newer); + lru_newest->newer = hb; + } else { + assert(!lru_oldest); + lru_oldest = hb; + } + + hb->older = lru_newest; + hb->newer = NULL; + lru_newest = hb; + + cached_pages++; +} + +void cache_lru_touch(struct cached_page *hb) +{ + lru_rm(hb); + lru_add(hb); +} + +static __inline u32_t makehash(u32_t p1, u64_t p2) +{ + u32_t offlo = ex64lo(p2), offhi = ex64hi(p2), + v = 0x12345678; + hash_mix(p1, offlo, offhi); + hash_final(offlo, offhi, v); + + return v % HASHSIZE; +} + +#if CACHE_SANITY +void cache_sanitycheck_internal(void) +{ + int h; + int n = 0; + int byino = 0; + int withino = 0; + int bydev_total = 0, lru_total = 0; + struct cached_page *cp; + + for(h = 0; h < HASHSIZE; h++) { + for(cp = cache_hash_bydev[h]; cp; cp = cp->hash_next_dev) { + assert(cp->dev != NO_DEV); + assert(h == makehash(cp->dev, cp->dev_offset)); + assert(cp == find_cached_page_bydev(cp->dev, cp->dev_offset, cp->ino, cp->ino_offset)); + if(cp->ino != VMC_NO_INODE) withino++; + bydev_total++; + n++; + assert(n < 1500000); + } + for(cp = cache_hash_byino[h]; cp; cp = cp->hash_next_ino) { + assert(cp->dev != NO_DEV); + assert(cp->ino != VMC_NO_INODE); + assert(h == makehash(cp->ino, cp->ino_offset)); + byino++; + n++; + assert(n < 1500000); + } + } + + assert(byino == withino); + + if(lru_newest) { + assert(lru_oldest); + assert(!lru_newest->newer); + assert(!lru_oldest->older); + } else { + assert(!lru_oldest); + } + + for(cp = lru_oldest; cp; cp = cp->newer) { + struct cached_page *newer = cp->newer, + *older = cp->older; + if(newer) assert(newer->older == cp); + if(older) assert(older->newer == cp); + lru_total++; + } + + assert(lru_total == bydev_total); + + assert(lru_total == cached_pages); +} +#endif + +#define rmhash_f(fname, nextfield) \ +static void \ +fname(struct cached_page *cp, struct cached_page **head) \ +{ \ + struct cached_page *hb; \ + if(*head == cp) { *head = cp->nextfield; return; } \ + for(hb = *head; hb && cp != hb->nextfield; hb = hb->nextfield) ; \ + assert(hb); assert(hb->nextfield == cp); \ + hb->nextfield = cp->nextfield; \ + return; \ +} + +rmhash_f(rmhash_byino, hash_next_ino) +rmhash_f(rmhash_bydev, hash_next_dev) + +static void addcache_byino(struct cached_page *hb) +{ + int hv_ino = makehash(hb->ino, hb->ino_offset); + assert(hb->ino != VMC_NO_INODE); + hb->hash_next_ino = cache_hash_byino[hv_ino]; + cache_hash_byino[hv_ino] = hb; +} + +static void +update_inohash(struct cached_page *hb, ino_t ino, u64_t ino_off) +{ + assert(ino != VMC_NO_INODE); + if(hb->ino != VMC_NO_INODE) { + int h = makehash(hb->ino, hb->ino_offset); + rmhash_byino(hb, &cache_hash_byino[h]); + } + hb->ino = ino; + hb->ino_offset = ino_off; + addcache_byino(hb); +} + +struct cached_page * +find_cached_page_bydev(dev_t dev, u64_t dev_off, ino_t ino, u64_t ino_off, int touchlru) +{ + struct cached_page *hb; + + for(hb = cache_hash_bydev[makehash(dev, dev_off)]; hb; hb=hb->hash_next_dev) { + if(hb->dev == dev && hb->dev_offset == dev_off) { + if(ino != VMC_NO_INODE) { + if(hb->ino != ino || hb->ino_offset != ino_off) { + update_inohash(hb, ino, ino_off); + } + } + + if(touchlru) cache_lru_touch(hb); + + return hb; + } + } + + return NULL; +} + +struct cached_page *find_cached_page_byino(dev_t dev, ino_t ino, u64_t ino_off, int touchlru) +{ + struct cached_page *hb; + + assert(ino != VMC_NO_INODE); + assert(dev != NO_DEV); + + for(hb = cache_hash_byino[makehash(ino, ino_off)]; hb; hb=hb->hash_next_ino) { + if(hb->dev == dev && hb->ino == ino && hb->ino_offset == ino_off) { + if(touchlru) cache_lru_touch(hb); + + return hb; + } + } + + return NULL; +} + +int addcache(dev_t dev, u64_t dev_off, ino_t ino, u64_t ino_off, struct phys_block *pb) +{ + int hv_dev; + struct cached_page *hb; + + if(pb->flags & PBF_INCACHE) { + printf("VM: already in cache\n"); + return EINVAL; + } + + if(!SLABALLOC(hb)) { + printf("VM: no memory for cache node\n"); + return ENOMEM; + } + + assert(dev != NO_DEV); +#if CACHE_SANITY + assert(!find_cached_page_bydev(dev, dev_off, ino, ino_off)); +#endif + + hb->dev = dev; + hb->dev_offset = dev_off; + hb->ino = ino; + hb->ino_offset = ino_off; + hb->page = pb; + hb->page->refcount++; /* block also referenced by cache now */ + hb->page->flags |= PBF_INCACHE; + + hv_dev = makehash(dev, dev_off); + + hb->hash_next_dev = cache_hash_bydev[hv_dev]; + cache_hash_bydev[hv_dev] = hb; + + if(hb->ino != VMC_NO_INODE) + addcache_byino(hb); + + lru_add(hb); + + return OK; +} + +void rmcache(struct cached_page *cp) +{ + struct phys_block *pb = cp->page; + int hv_dev = makehash(cp->dev, cp->dev_offset); + + assert(cp->page->flags & PBF_INCACHE); + + cp->page->flags &= ~PBF_INCACHE; + + rmhash_bydev(cp, &cache_hash_bydev[hv_dev]); + if(cp->ino != VMC_NO_INODE) { + int hv_ino = makehash(cp->ino, cp->ino_offset); + rmhash_byino(cp, &cache_hash_byino[hv_ino]); + } + + assert(cp->page->refcount >= 1); + cp->page->refcount--; + + lru_rm(cp); + + if(pb->refcount == 0) { + assert(pb->phys != MAP_NONE); + free_mem(ABS2CLICK(pb->phys), 1); + SLABFREE(pb); + } + + SLABFREE(cp); +} + +int cache_freepages(int pages) +{ + struct cached_page *cp, *newercp; + int freed = 0; + int oldsteps = 0; + int skips = 0; + + for(cp = lru_oldest; cp && freed < pages; cp = newercp) { + newercp = cp->newer; + assert(cp->page->refcount >= 1); + if(cp->page->refcount == 1) { + rmcache(cp); + freed++; + skips = 0; + } else skips++; + oldsteps++; + } + + return freed; +} + +void get_stats_info(struct vm_stats_info *vsi) +{ + vsi->vsi_cached = cached_pages; +} + diff --git a/servers/vm/cache.h b/servers/vm/cache.h new file mode 100644 index 000000000..581349af8 --- /dev/null +++ b/servers/vm/cache.h @@ -0,0 +1,21 @@ + +struct cached_page { + /* - The (dev, dev_offset) pair are unique; + * the (ino, ino_offset) pair is information and + * might be missing. duplicate do not make sense + * although it won't bother VM much. + * - dev must always be valid, i.e. not NO_DEV + * - ino may be unknown, i.e. VMC_NO_INODE + */ + dev_t dev; /* which dev is it on */ + u64_t dev_offset; /* offset within dev */ + + ino_t ino; /* which ino is it about */ + u64_t ino_offset; /* offset within ino */ + struct phys_block *page; /* page ptr */ + struct cached_page *older; /* older in lru chain */ + struct cached_page *newer; /* newer in lru chain */ + struct cached_page *hash_next_dev; /* next in hash chain (bydev) */ + struct cached_page *hash_next_ino; /* next in hash chain (byino) */ +}; + diff --git a/servers/vm/exit.c b/servers/vm/exit.c index e6da3b6d9..f287b327a 100644 --- a/servers/vm/exit.c +++ b/servers/vm/exit.c @@ -29,21 +29,20 @@ void free_proc(struct vmproc *vmp) map_free_proc(vmp); pt_free(&vmp->vm_pt); region_init(&vmp->vm_regions_avl); - vmp->vm_region_top = 0; #if VMSTATS vmp->vm_bytecopies = 0; #endif + vmp->vm_region_top = 0; } void clear_proc(struct vmproc *vmp) { region_init(&vmp->vm_regions_avl); - vmp->vm_region_top = 0; - vmp->vm_callback = NULL; /* No pending vfs callback. */ vmp->vm_flags = 0; /* Clear INUSE, so slot is free. */ #if VMSTATS vmp->vm_bytecopies = 0; #endif + vmp->vm_region_top = 0; } /*===========================================================================* @@ -61,6 +60,7 @@ SANITYCHECK(SCL_FUNCTIONS); return EINVAL; } vmp = &vmproc[proc]; + if(!(vmp->vm_flags & VMF_EXITING)) { printf("VM: unannounced VM_EXIT %d\n", msg->VME_ENDPOINT); return EINVAL; diff --git a/servers/vm/fork.c b/servers/vm/fork.c index 39dcd9d8b..877ec7f7b 100644 --- a/servers/vm/fork.c +++ b/servers/vm/fork.c @@ -102,10 +102,10 @@ int do_fork(message *msg) * and its return value needn't be checked. */ vir = msgaddr; - if (handle_memory(vmc, vir, sizeof(message), 1) != OK) + if (handle_memory(vmc, vir, sizeof(message), 1, NULL, 0, 0) != OK) panic("do_fork: handle_memory for child failed\n"); vir = msgaddr; - if (handle_memory(vmp, vir, sizeof(message), 1) != OK) + if (handle_memory(vmp, vir, sizeof(message), 1, NULL, 0, 0) != OK) panic("do_fork: handle_memory for parent failed\n"); } diff --git a/servers/vm/glo.h b/servers/vm/glo.h index 9ef20fcaa..4a5e77fc3 100644 --- a/servers/vm/glo.h +++ b/servers/vm/glo.h @@ -23,15 +23,18 @@ EXTERN kinfo_t kernel_boot_info; #if SANITYCHECKS EXTERN int nocheck; EXTERN int incheck; -EXTERN long vm_sanitychecklevel; EXTERN int sc_lastline; EXTERN char *sc_lastfile; #endif +extern struct minix_kerninfo *_minix_kerninfo; + /* mem types */ EXTERN mem_type_t mem_type_anon, /* anonymous memory */ mem_type_directphys, /* direct physical mapping memory */ mem_type_anon_contig, /* physically contig anon memory */ + mem_type_cache, /* disk cache */ + mem_type_mappedfile, /* memory with file contents */ mem_type_shared; /* memory shared by multiple processes */ /* total number of memory pages */ diff --git a/servers/vm/main.c b/servers/vm/main.c index 884218bb0..9cd14b444 100644 --- a/servers/vm/main.c +++ b/servers/vm/main.c @@ -90,6 +90,7 @@ int main(void) /* This is VM's main loop. */ while (TRUE) { int r, c; + u32_t type, param; SANITYCHECK(SCL_TOP); if(missing_spares > 0) { @@ -107,7 +108,11 @@ int main(void) who_e = msg.m_source; if(vm_isokendpt(who_e, &caller_slot) != OK) panic("invalid caller %d", who_e); - c = CALLNUMBER(msg.m_type); + + type = param = msg.m_type; + type &= 0x0000FFFF; + param >>= 16; + c = CALLNUMBER(type); result = ENOSYS; /* Out of range or restricted calls return this. */ if(msg.m_type == RS_INIT && msg.m_source == RS_PROC_NR) { @@ -118,7 +123,6 @@ int main(void) "message!\n", msg.m_source); } do_pagefaults(&msg); - pt_clearmapcache(); /* * do not reply to this call, the caller is unblocked by * a sys_vmctl() call in do_pagefaults if success. VM panics @@ -322,10 +326,6 @@ void init_vm(void) assert(kernel_boot_info.mmap_size > 0); assert(kernel_boot_info.mods_with_kernel > 0); -#if SANITYCHECKS - env_parse("vm_sanitychecklevel", "d", 0, &vm_sanitychecklevel, 0, SCL_MAX); -#endif - /* Get chunks of available memory. */ get_mem_chunks(mem_chunks); @@ -431,6 +431,10 @@ void init_vm(void) CALLMAP(VM_QUERY_EXIT, do_query_exit); CALLMAP(VM_WATCH_EXIT, do_watch_exit); + /* Cache blocks. */ + CALLMAP(VM_MAPCACHEPAGE, do_mapcache); + CALLMAP(VM_SETCACHEPAGE, do_setcache); + /* Initialize the structures for queryexit */ init_query_exit(); diff --git a/servers/vm/mem_anon.c b/servers/vm/mem_anon.c index 55d30bc23..acd346d3c 100644 --- a/servers/vm/mem_anon.c +++ b/servers/vm/mem_anon.c @@ -17,10 +17,12 @@ * pointers. */ -static int anon_reference(struct phys_region *pr); +static void anon_split(struct vmproc *vmp, struct vir_region *vr, + struct vir_region *r1, struct vir_region *r2); +static int anon_lowshrink(struct vir_region *vr, vir_bytes len); static int anon_unreference(struct phys_region *pr); static int anon_pagefault(struct vmproc *vmp, struct vir_region *region, - struct phys_region *ph, int write); + struct phys_region *ph, int write, vfs_callback_t cb, void *, int); static int anon_sanitycheck(struct phys_region *pr, char *file, int line); static int anon_writable(struct phys_region *pr); static int anon_resize(struct vmproc *vmp, struct vir_region *vr, vir_bytes l); @@ -29,21 +31,17 @@ static int anon_refcount(struct vir_region *vr); struct mem_type mem_type_anon = { .name = "anonymous memory", - .ev_reference = anon_reference, .ev_unreference = anon_unreference, .ev_pagefault = anon_pagefault, .ev_resize = anon_resize, .ev_sanitycheck = anon_sanitycheck, + .ev_lowshrink = anon_lowshrink, + .ev_split = anon_split, .regionid = anon_regionid, .writable = anon_writable, .refcount = anon_refcount }; -static int anon_reference(struct phys_region *pr) -{ - return OK; -} - static int anon_unreference(struct phys_region *pr) { assert(pr->ph->refcount == 0); @@ -53,10 +51,9 @@ static int anon_unreference(struct phys_region *pr) } static int anon_pagefault(struct vmproc *vmp, struct vir_region *region, - struct phys_region *ph, int write) + struct phys_region *ph, int write, vfs_callback_t cb, void *st, int l) { phys_bytes new_page, new_page_cl; - struct phys_block *pb; u32_t allocflags; allocflags = vrallocflags(region->flags); @@ -83,20 +80,7 @@ static int anon_pagefault(struct vmproc *vmp, struct vir_region *region, assert(region->flags & VR_WRITABLE); - if(sys_abscopy(ph->ph->phys, new_page, VM_PAGE_SIZE) != OK) { - panic("VM: abscopy failed\n"); - return EFAULT; - } - - if(!(pb = pb_new(new_page))) { - free_mem(new_page_cl, 1); - return ENOMEM; - } - - pb_unreferenced(region, ph, 0); - pb_link(ph, pb, ph->offset, region); - - return OK; + return mem_cow(region, ph, new_page_cl, new_page); } static int anon_sanitycheck(struct phys_region *pr, char *file, int line) @@ -137,8 +121,18 @@ static u32_t anon_regionid(struct vir_region *region) return region->id; } +static int anon_lowshrink(struct vir_region *vr, vir_bytes len) +{ + return OK; +} + static int anon_refcount(struct vir_region *vr) { return 1 + vr->remaps; } +static void anon_split(struct vmproc *vmp, struct vir_region *vr, + struct vir_region *r1, struct vir_region *r2) +{ + return; +} diff --git a/servers/vm/mem_anon_contig.c b/servers/vm/mem_anon_contig.c index f11adc676..1f2750d18 100644 --- a/servers/vm/mem_anon_contig.c +++ b/servers/vm/mem_anon_contig.c @@ -8,10 +8,10 @@ #include "region.h" #include "glo.h" -static int anon_contig_reference(struct phys_region *pr); +static int anon_contig_reference(struct phys_region *, struct phys_region *); static int anon_contig_unreference(struct phys_region *pr); static int anon_contig_pagefault(struct vmproc *vmp, struct vir_region *region, - struct phys_region *ph, int write); + struct phys_region *ph, int write, vfs_callback_t cb, void *st, int); static int anon_contig_sanitycheck(struct phys_region *pr, char *file, int line); static int anon_contig_writable(struct phys_region *pr); static int anon_contig_resize(struct vmproc *vmp, struct vir_region *vr, vir_bytes l); @@ -29,7 +29,7 @@ struct mem_type mem_type_anon_contig = { }; static int anon_contig_pagefault(struct vmproc *vmp, struct vir_region *region, - struct phys_region *ph, int write) + struct phys_region *ph, int write, vfs_callback_t cb, void *s, int l) { panic("anon_contig_pagefault: pagefault cannot happen"); } @@ -50,7 +50,7 @@ static int anon_contig_new(struct vir_region *region) struct phys_block *pb = pb_new(MAP_NONE); struct phys_region *pr = NULL; if(pb) - pr = pb_reference(pb, p * VM_PAGE_SIZE, region); + pr = pb_reference(pb, p * VM_PAGE_SIZE, region, &mem_type_anon_contig); if(!pr) { if(pb) pb_free(pb); map_free(region); @@ -85,7 +85,8 @@ static int anon_contig_resize(struct vmproc *vmp, struct vir_region *vr, vir_byt return ENOMEM; } -static int anon_contig_reference(struct phys_region *pr) +static int anon_contig_reference(struct phys_region *pr, + struct phys_region *newpr) { printf("VM: cannot fork with physically contig memory.\n"); return ENOMEM; diff --git a/servers/vm/mem_cache.c b/servers/vm/mem_cache.c new file mode 100644 index 000000000..b5fd758c8 --- /dev/null +++ b/servers/vm/mem_cache.c @@ -0,0 +1,228 @@ + +/* This file implements the disk cache. + * + * If they exist anywhere, cached pages are always in a private + * VM datastructure. + * + * They might also be any combination of: + * - be mapped in by a filesystem for reading/writing by it + * - be mapped in by a process as the result of an mmap call (future) + * + * This file manages the datastructure of all cache blocks, and + * mapping them in and out of filesystems. + */ + +#include +#include + +#include + +#include "proto.h" +#include "vm.h" +#include "region.h" +#include "glo.h" +#include "cache.h" + +static int cache_reference(struct phys_region *pr, struct phys_region *pr2); +static int cache_unreference(struct phys_region *pr); +static int cache_sanitycheck(struct phys_region *pr, char *file, int line); +static int cache_writable(struct phys_region *pr); +static int cache_resize(struct vmproc *vmp, struct vir_region *vr, vir_bytes l); +static int cache_pagefault(struct vmproc *vmp, struct vir_region *region, + struct phys_region *ph, int write, vfs_callback_t cb, void *, int); + +struct mem_type mem_type_cache = { + .name = "cache memory", + .ev_reference = cache_reference, + .ev_unreference = cache_unreference, + .ev_resize = cache_resize, + .ev_sanitycheck = cache_sanitycheck, + .ev_pagefault = cache_pagefault, + .writable = cache_writable, +}; + +static int cache_reference(struct phys_region *pr, struct phys_region *pr2) +{ + return OK; +} + +static int cache_unreference(struct phys_region *pr) +{ + return mem_type_anon.ev_unreference(pr); +} + +static int cache_sanitycheck(struct phys_region *pr, char *file, int line) +{ + MYASSERT(usedpages_add(pr->ph->phys, VM_PAGE_SIZE) == OK); + return OK; +} + +static int cache_writable(struct phys_region *pr) +{ + /* Cache blocks are at the moment only used by filesystems so always writable. */ + assert(pr->ph->refcount > 0); + return pr->ph->phys != MAP_NONE; +} + +static int cache_resize(struct vmproc *vmp, struct vir_region *vr, vir_bytes l) +{ + printf("VM: cannot resize cache blocks.\n"); + return ENOMEM; +} + +int +do_mapcache(message *msg) +{ + dev_t dev = msg->m_u.m_vmmcp.dev; + u64_t dev_off = (u64_t) msg->m_u.m_vmmcp.dev_offset_pages * VM_PAGE_SIZE; + u64_t ino_off = (u64_t) msg->m_u.m_vmmcp.ino_offset_pages * VM_PAGE_SIZE; + int n; + int bytes = msg->m_u.m_vmmcp.pages * VM_PAGE_SIZE; + struct vir_region *vr; + struct vmproc *caller; + vir_bytes offset; + + if(vm_isokendpt(msg->m_source, &n) != OK) panic("bogus source"); + caller = &vmproc[n]; + + if(bytes < VM_PAGE_SIZE) return EINVAL; + + if(!(vr = map_page_region(caller, VM_PAGE_SIZE, VM_DATATOP, bytes, + VR_ANON | VR_WRITABLE, 0, &mem_type_cache))) { + printf("VM: map_page_region failed\n"); + return ENOMEM; + } + + assert(vr->length == bytes); + + for(offset = 0; offset < bytes; offset += VM_PAGE_SIZE) { + struct cached_page *hb; + + assert(vr->length == bytes); + assert(offset < vr->length); + + if(!(hb = find_cached_page_bydev(dev, dev_off + offset, + msg->m_u.m_vmmcp.ino, ino_off + offset, 1))) { + map_unmap_region(caller, vr, 0, bytes); + return ENOENT; + } + + assert(!vr->param.pb_cache); + vr->param.pb_cache = hb->page; + + assert(vr->length == bytes); + assert(offset < vr->length); + + if(map_pf(caller, vr, offset, 1, NULL, NULL, 0) != OK) { + map_unmap_region(caller, vr, 0, bytes); + printf("VM: map_pf failed\n"); + return ENOMEM; + } + + assert(!vr->param.pb_cache); + } + + memset(msg, 0, sizeof(*msg)); + + msg->m_u.m_vmmcp_reply.addr = (void *) vr->vaddr; + + assert(vr); + +#if CACHE_SANITY + cache_sanitycheck_internal(); +#endif + + return OK; +} + +static int cache_pagefault(struct vmproc *vmp, struct vir_region *region, + struct phys_region *ph, int write, vfs_callback_t cb, + void *state, int len) +{ + vir_bytes offset = ph->offset; + assert(ph->ph->phys == MAP_NONE); + assert(region->param.pb_cache); + pb_unreferenced(region, ph, 0); + pb_link(ph, region->param.pb_cache, offset, region); + region->param.pb_cache = NULL; + + return OK; +} + +int +do_setcache(message *msg) +{ + int r; + dev_t dev = msg->m_u.m_vmmcp.dev; + u64_t dev_off = (u64_t) msg->m_u.m_vmmcp.dev_offset_pages * VM_PAGE_SIZE; + u64_t ino_off = (u64_t) msg->m_u.m_vmmcp.ino_offset_pages * VM_PAGE_SIZE; + int n; + struct vmproc *caller; + vir_bytes offset; + int bytes = msg->m_u.m_vmmcp.pages * VM_PAGE_SIZE; + + if(bytes < VM_PAGE_SIZE) return EINVAL; + + if(vm_isokendpt(msg->m_source, &n) != OK) panic("bogus source"); + caller = &vmproc[n]; + + for(offset = 0; offset < bytes; offset += VM_PAGE_SIZE) { + struct vir_region *region; + struct phys_region *phys_region = NULL; + vir_bytes v = (vir_bytes) msg->m_u.m_vmmcp.block + offset; + struct cached_page *hb; + + if(!(region = map_lookup(caller, v, &phys_region))) { + printf("VM: error: no reasonable memory region given (offset 0x%lx, 0x%lx)\n", offset, v); + return EFAULT; + } + + if(!phys_region) { + printf("VM: error: no available memory region given\n"); + return EFAULT; + } + + if((hb=find_cached_page_bydev(dev, dev_off + offset, + msg->m_u.m_vmmcp.ino, ino_off + offset, 1))) { + /* block inode info updated */ + if(hb->page != phys_region->ph) { + /* previous cache entry has become + * obsolete; make a new one. rmcache + * removes it from the cache and frees + * the page if it isn't mapped in anywhere + * else. + */ + rmcache(hb); + } else { + /* block was already there, inode info might've changed which is fine */ + continue; + } + } + + if(phys_region->memtype != &mem_type_anon && + phys_region->memtype != &mem_type_anon_contig) { + printf("VM: error: no reasonable memory type\n"); + return EFAULT; + } + + if(phys_region->ph->refcount != 1) { + printf("VM: error: no reasonable refcount\n"); + return EFAULT; + } + + phys_region->memtype = &mem_type_cache; + + if((r=addcache(dev, dev_off + offset, + msg->m_u.m_vmmcp.ino, ino_off + offset, phys_region->ph)) != OK) { + printf("VM: addcache failed\n"); + return r; + } + } + +#if CACHE_SANITY + cache_sanitycheck_internal(); +#endif + + return OK; +} + diff --git a/servers/vm/mem_directphys.c b/servers/vm/mem_directphys.c index 391f911a0..740c6e488 100644 --- a/servers/vm/mem_directphys.c +++ b/servers/vm/mem_directphys.c @@ -14,35 +14,27 @@ * pointers. */ -static int phys_reference(struct phys_region *pr); static int phys_unreference(struct phys_region *pr); static int phys_writable(struct phys_region *pr); static int phys_pagefault(struct vmproc *vmp, struct vir_region *region, - struct phys_region *ph, int write); + struct phys_region *ph, int write, vfs_callback_t cb, void *, int); static int phys_copy(struct vir_region *vr, struct vir_region *newvr); struct mem_type mem_type_directphys = { .name = "physical memory mapping", - .ev_reference = phys_reference, .ev_copy = phys_copy, .ev_unreference = phys_unreference, .writable = phys_writable, .ev_pagefault = phys_pagefault }; -static int phys_reference(struct phys_region *pr) -{ - panic("%s", __FUNCTION__); - return OK; -} - static int phys_unreference(struct phys_region *pr) { return OK; } static int phys_pagefault(struct vmproc *vmp, struct vir_region *region, - struct phys_region *ph, int write) + struct phys_region *ph, int write, vfs_callback_t cb, void *st, int len) { phys_bytes arg = region->param.phys, phmem; assert(arg != MAP_NONE); diff --git a/servers/vm/mem_shared.c b/servers/vm/mem_shared.c index a8349b69b..b7fdd4944 100644 --- a/servers/vm/mem_shared.c +++ b/servers/vm/mem_shared.c @@ -13,10 +13,9 @@ * pointers. */ -static int shared_reference(struct phys_region *pr); static int shared_unreference(struct phys_region *pr); static int shared_pagefault(struct vmproc *vmp, struct vir_region *region, - struct phys_region *ph, int write); + struct phys_region *ph, int write, vfs_callback_t cb, void *, int); static int shared_sanitycheck(struct phys_region *pr, char *file, int line); static int shared_writable(struct phys_region *pr); static void shared_delete(struct vir_region *region); @@ -26,7 +25,6 @@ static int shared_refcount(struct vir_region *vr); struct mem_type mem_type_shared = { .name = "shared memory", - .ev_reference = shared_reference, .ev_copy = shared_copy, .ev_unreference = shared_unreference, .ev_pagefault = shared_pagefault, @@ -37,11 +35,6 @@ struct mem_type mem_type_shared = { .writable = shared_writable }; -static int shared_reference(struct phys_region *pr) -{ - return OK; -} - static int shared_unreference(struct phys_region *pr) { return mem_type_anon.ev_unreference(pr); @@ -116,7 +109,8 @@ static void shared_delete(struct vir_region *region) } static int shared_pagefault(struct vmproc *vmp, struct vir_region *region, - struct phys_region *ph, int write) + struct phys_region *ph, int write, vfs_callback_t cb, + void *state, int statelen) { struct vir_region *src_region; struct vmproc *src_vmp; @@ -131,7 +125,8 @@ static int shared_pagefault(struct vmproc *vmp, struct vir_region *region, if(!(pr = physblock_get(src_region, ph->offset))) { int r; - if((r=map_pf(src_vmp, src_region, ph->offset, write)) != OK) + if((r=map_pf(src_vmp, src_region, ph->offset, write, + NULL, NULL, 0)) != OK) return r; if(!(pr = physblock_get(src_region, ph->offset))) { panic("missing region after pagefault handling"); diff --git a/servers/vm/memtype.h b/servers/vm/memtype.h index b0fff8e41..7ff4ce9c6 100644 --- a/servers/vm/memtype.h +++ b/servers/vm/memtype.h @@ -6,18 +6,24 @@ struct vmproc; struct vir_region; struct phys_region; +typedef void (*vfs_callback_t)(struct vmproc *vmp, message *m, + void *, void *); + typedef struct mem_type { char *name; /* human-readable name */ int (*ev_new)(struct vir_region *region); void (*ev_delete)(struct vir_region *region); - int (*ev_reference)(struct phys_region *pr); + int (*ev_reference)(struct phys_region *pr, struct phys_region *newpr); int (*ev_unreference)(struct phys_region *pr); int (*ev_pagefault)(struct vmproc *vmp, struct vir_region *region, - struct phys_region *ph, int write); + struct phys_region *ph, int write, vfs_callback_t cb, void *, int); int (*ev_resize)(struct vmproc *vmp, struct vir_region *vr, vir_bytes len); + void (*ev_split)(struct vmproc *vmp, struct vir_region *vr, + struct vir_region *r1, struct vir_region *r2); int (*writable)(struct phys_region *pr); int (*ev_sanitycheck)(struct phys_region *pr, char *file, int line); int (*ev_copy)(struct vir_region *vr, struct vir_region *newvr); + int (*ev_lowshrink)(struct vir_region *vr, vir_bytes len); u32_t (*regionid)(struct vir_region *vr); int (*refcount)(struct vir_region *vr); } mem_type_t; diff --git a/servers/vm/mmap.c b/servers/vm/mmap.c index 1cdc677a0..4340f94b5 100644 --- a/servers/vm/mmap.c +++ b/servers/vm/mmap.c @@ -56,8 +56,6 @@ static struct vir_region *mmap_region(struct vmproc *vmp, vir_bytes addr, if(len % VM_PAGE_SIZE) len += VM_PAGE_SIZE - (len % VM_PAGE_SIZE); -#if 0 - /* MAP_FIXED is restored in a later commit */ if (addr && (vmm_flags & MAP_FIXED)) { int r = map_unmap_range(vmp, addr, len); if(r != OK) { @@ -65,7 +63,6 @@ static struct vir_region *mmap_region(struct vmproc *vmp, vir_bytes addr, return NULL; } } -#endif if (addr || (vmm_flags & MAP_FIXED)) { /* An address is given, first try at that address. */ @@ -359,8 +356,7 @@ int do_munmap(message *m) { int r, n; struct vmproc *vmp; - vir_bytes addr, len, offset; - struct vir_region *vr; + vir_bytes addr, len; endpoint_t target = SELF; if(m->m_type == VM_UNMAP_PHYS) { @@ -384,30 +380,20 @@ int do_munmap(message *m) addr = (vir_bytes) m->VMUN_ADDR; } else addr = (vir_bytes) m->VMUM_ADDR; - if(!(vr = map_lookup(vmp, addr, NULL))) { - printf("VM: unmap: virtual address 0x%lx not found in %d\n", - addr, target); - return EFAULT; - } - if(addr % VM_PAGE_SIZE) return EFAULT; if(m->m_type == VM_UNMAP_PHYS || m->m_type == VM_SHM_UNMAP) { + struct vir_region *vr; + if(!(vr = map_lookup(vmp, addr, NULL))) { + printf("VM: unmap: address 0x%lx not found in %d\n", + addr, target); + sys_sysctl_stacktrace(target); + return EFAULT; + } len = vr->length; } else len = roundup(m->VMUM_LEN, VM_PAGE_SIZE); - offset = addr - vr->vaddr; - - if(offset + len > vr->length) { - printf("munmap: addr 0x%lx len 0x%lx spills out of region\n", - addr, len); - return EFAULT; - } - - if(map_unmap_region(vmp, vr, offset, len) != OK) - panic("do_munmap: map_unmap_region failed"); - - return OK; + return map_unmap_range(vmp, addr, len); } diff --git a/servers/vm/pagefaults.c b/servers/vm/pagefaults.c index acc497562..46c254777 100644 --- a/servers/vm/pagefaults.c +++ b/servers/vm/pagefaults.c @@ -45,23 +45,34 @@ char *pf_errstr(u32_t err) return buf; } -/*===========================================================================* - * do_pagefaults * - *===========================================================================*/ -void do_pagefaults(message *m) -{ - endpoint_t ep = m->m_source; - u32_t addr = m->VPF_ADDR; - u32_t err = m->VPF_FLAGS; +struct pf_state { + endpoint_t ep; + vir_bytes vaddr; + u32_t err; +}; + +struct hm_state { + endpoint_t requestor; struct vmproc *vmp; - int s; + vir_bytes mem; + vir_bytes len; + int wrflag; +}; + +static void pf_cont(struct vmproc *vmp, message *m, void *arg, void *statearg); +static void hm_cont(struct vmproc *vmp, message *m, void *arg, void *statearg); + +static void handle_pagefault(endpoint_t ep, vir_bytes addr, u32_t err, int retry) +{ + struct vmproc *vmp; + int s, result; struct vir_region *region; vir_bytes offset; int p, wr = PFERR_WRITE(err); if(vm_isokendpt(ep, &p) != OK) - panic("do_pagefaults: endpoint wrong: %d", ep); + panic("handle_pagefault: endpoint wrong: %d", ep); vmp = &vmproc[p]; assert(vmp->vm_flags & VMF_INUSE); @@ -69,11 +80,11 @@ void do_pagefaults(message *m) /* See if address is valid at all. */ if(!(region = map_lookup(vmp, addr, NULL))) { if(PFERR_PROT(err)) { - printf("VM: pagefault: SIGSEGV %d protected addr 0x%x; %s\n", + printf("VM: pagefault: SIGSEGV %d protected addr 0x%lx; %s\n", ep, addr, pf_errstr(err)); } else { assert(PFERR_NOPAGE(err)); - printf("VM: pagefault: SIGSEGV %d bad addr 0x%x; %s\n", + printf("VM: pagefault: SIGSEGV %d bad addr 0x%lx; %s\n", ep, addr, pf_errstr(err)); sys_sysctl_stacktrace(ep); } @@ -86,7 +97,7 @@ void do_pagefaults(message *m) /* If process was writing, see if it's writable. */ if(!(region->flags & VR_WRITABLE) && wr) { - printf("VM: pagefault: SIGSEGV %d ro map 0x%x %s\n", + printf("VM: pagefault: SIGSEGV %d ro map 0x%lx %s\n", ep, addr, pf_errstr(err)); if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK) panic("sys_kill failed: %d", s); @@ -99,20 +110,75 @@ void do_pagefaults(message *m) offset = addr - region->vaddr; /* Access is allowed; handle it. */ - if((map_pf(vmp, region, offset, wr)) != OK) { + if(retry) { + result = map_pf(vmp, region, offset, wr, NULL, NULL, 0); + assert(result != SUSPEND); + } else { + struct pf_state state; + state.ep = ep; + state.vaddr = addr; + state.err = err; + result = map_pf(vmp, region, offset, wr, pf_cont, + &state, sizeof(state)); + } + + if(result == SUSPEND) { + return; + } + + if(result != OK) { printf("VM: pagefault: SIGSEGV %d pagefault not handled\n", ep); - if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK) + if((s=sys_kill(ep, SIGSEGV)) != OK) panic("sys_kill failed: %d", s); if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, 0 /*unused*/)) != OK) panic("do_pagefaults: sys_vmctl failed: %d", ep); return; } + pt_clearmapcache(); + /* Pagefault is handled, so now reactivate the process. */ if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, 0 /*unused*/)) != OK) panic("do_pagefaults: sys_vmctl failed: %d", ep); } + +static void pf_cont(struct vmproc *vmp, message *m, + void *arg, void *statearg) +{ + struct pf_state *state = statearg; + handle_pagefault(state->ep, state->vaddr, state->err, 1); +} + +static void hm_cont(struct vmproc *vmp, message *m, + void *arg, void *statearg) +{ + int r; + struct hm_state *state = statearg; + printf("hm_cont: result %d\n", m->VMV_RESULT); + r = handle_memory(vmp, state->mem, state->len, state->wrflag, + hm_cont, &state, sizeof(state)); + if(r == SUSPEND) { + printf("VM: hm_cont: damnit: hm_cont: more SUSPEND\n"); + return; + } + + printf("VM: hm_cont: ok, result %d, requestor %d\n", r, state->requestor); + + if(sys_vmctl(state->requestor, VMCTL_MEMREQ_REPLY, r) != OK) + panic("hm_cont: sys_vmctl failed: %d", r); + + printf("MEMREQ_REPLY sent\n"); +} + +/*===========================================================================* + * do_pagefaults * + *===========================================================================*/ +void do_pagefaults(message *m) +{ + handle_pagefault(m->m_source, m->VPF_ADDR, m->VPF_FLAGS, 0); +} + /*===========================================================================* * do_memory * *===========================================================================*/ @@ -132,25 +198,43 @@ void do_memory(void) switch(r) { case VMPTYPE_CHECK: + { + struct hm_state state; + if(vm_isokendpt(who, &p) != OK) panic("do_memory: bad endpoint: %d", who); vmp = &vmproc[p]; - r = handle_memory(vmp, mem, len, wrflag); + + state.vmp = vmp; + state.mem = mem; + state.len = len; + state.wrflag = wrflag; + state.requestor = requestor; + + r = handle_memory(vmp, mem, len, + wrflag, hm_cont, &state, sizeof(state)); + break; + } + default: return; } - if(sys_vmctl(requestor, VMCTL_MEMREQ_REPLY, r) != OK) + if(r != SUSPEND) { + if(sys_vmctl(requestor, VMCTL_MEMREQ_REPLY, r) != OK) panic("do_memory: sys_vmctl failed: %d", r); + } } } -int handle_memory(struct vmproc *vmp, vir_bytes mem, vir_bytes len, int wrflag) +int handle_memory(struct vmproc *vmp, vir_bytes mem, vir_bytes len, int wrflag, + vfs_callback_t callback, void *state, int statelen) { struct vir_region *region; vir_bytes o; + struct hm_state *hmstate = (struct hm_state *) state; /* Page-align memory and length. */ o = mem % VM_PAGE_SIZE; @@ -181,8 +265,14 @@ int handle_memory(struct vmproc *vmp, vir_bytes mem, vir_bytes len, int wrflag) if(offset + sublen > region->length) sublen = region->length - offset; - r = map_handle_memory(vmp, region, offset, - sublen, wrflag); + if(hmstate && hmstate->requestor == VFS_PROC_NR + && region->def_memtype == &mem_type_mappedfile) { + r = map_handle_memory(vmp, region, offset, + sublen, wrflag, NULL, NULL, 0); + } else { + r = map_handle_memory(vmp, region, offset, + sublen, wrflag, callback, state, sizeof(*state)); + } len -= sublen; mem += sublen; diff --git a/servers/vm/pb.c b/servers/vm/pb.c index 908840485..2a024f839 100644 --- a/servers/vm/pb.c +++ b/servers/vm/pb.c @@ -45,6 +45,7 @@ USE(newpb, newpb->phys = phys; newpb->refcount = 0; newpb->firstregion = NULL; + newpb->flags = 0; ); return newpb; @@ -65,13 +66,12 @@ USE(newphysr, newphysr->ph = newpb; newphysr->parent = parent; newphysr->next_ph_list = newpb->firstregion; - newphysr->memtype = parent->def_memtype; newpb->firstregion = newphysr;); newpb->refcount++; } struct phys_region *pb_reference(struct phys_block *newpb, - vir_bytes offset, struct vir_region *region) + vir_bytes offset, struct vir_region *region, mem_type_t *memtype) { struct phys_region *newphysr; @@ -80,6 +80,8 @@ struct phys_region *pb_reference(struct phys_block *newpb, return NULL; } + newphysr->memtype = memtype; + /* New physical region. */ pb_link(newphysr, newpb, offset, region); @@ -120,7 +122,7 @@ void pb_unreferenced(struct vir_region *region, struct phys_region *pr, int rm) if(pb->refcount == 0) { assert(!pb->firstregion); int r; - if((r = region->def_memtype->ev_unreference(pr)) != OK) + if((r = pr->memtype->ev_unreference(pr)) != OK) panic("unref failed, %d", r); SLABFREE(pb); @@ -130,3 +132,37 @@ void pb_unreferenced(struct vir_region *region, struct phys_region *pr, int rm) if(rm) physblock_set(region, pr->offset, NULL); } + +int mem_cow(struct vir_region *region, + struct phys_region *ph, phys_bytes new_page_cl, phys_bytes new_page) +{ + struct phys_block *pb; + + if(new_page == MAP_NONE) { + u32_t allocflags; + allocflags = vrallocflags(region->flags); + + if((new_page_cl = alloc_mem(1, allocflags)) == NO_MEM) + return ENOMEM; + + new_page = CLICK2ABS(new_page_cl); + } + + assert(ph->ph->phys != MAP_NONE); + + if(sys_abscopy(ph->ph->phys, new_page, VM_PAGE_SIZE) != OK) { + panic("VM: abscopy failed\n"); + return EFAULT; + } + + if(!(pb = pb_new(new_page))) { + free_mem(new_page_cl, 1); + return ENOMEM; + } + + pb_unreferenced(region, ph, 0); + pb_link(ph, pb, ph->offset, region); + ph->memtype = &mem_type_anon; + + return OK; +} diff --git a/servers/vm/proto.h b/servers/vm/proto.h index 6941b0b8a..2d5018a50 100644 --- a/servers/vm/proto.h +++ b/servers/vm/proto.h @@ -70,13 +70,14 @@ int do_unmap_phys(message *msg); int do_remap(message *m); int do_get_phys(message *m); int do_get_refcount(message *m); +int do_vfs_mmap(message *m); /* pagefaults.c */ void do_pagefaults(message *m); void do_memory(void); char *pf_errstr(u32_t err); int handle_memory(struct vmproc *vmp, vir_bytes mem, vir_bytes len, int - wrflag); + wrflag, vfs_callback_t cb, void *state, int statelen); /* $(ARCH)/pagetable.c */ void pt_init(); @@ -103,6 +104,7 @@ int pt_mapkernel(pt_t *pt); void vm_pagelock(void *vir, int lockflag); int vm_addrok(void *vir, int write); int get_vm_self_pages(void); +int pt_writable(struct vmproc *vmp, vir_bytes v); #if SANITYCHECKS void pt_sanitycheck(pt_t *pt, char *file, int line); @@ -133,6 +135,7 @@ int map_region_extend(struct vmproc *vmp, struct vir_region *vr, int map_region_extend_upto_v(struct vmproc *vmp, vir_bytes vir); int map_unmap_region(struct vmproc *vmp, struct vir_region *vr, vir_bytes offset, vir_bytes len); +int map_unmap_range(struct vmproc *vmp, vir_bytes, vir_bytes); int map_free_proc(struct vmproc *vmp); int map_proc_copy(struct vmproc *dst, struct vmproc *src); int map_proc_copy_from(struct vmproc *dst, struct vmproc *src, struct @@ -140,10 +143,11 @@ int map_proc_copy_from(struct vmproc *dst, struct vmproc *src, struct struct vir_region *map_lookup(struct vmproc *vmp, vir_bytes addr, struct phys_region **pr); int map_pf(struct vmproc *vmp, struct vir_region *region, vir_bytes - offset, int write); + offset, int write, vfs_callback_t pf_callback, void *state, int); int map_pin_memory(struct vmproc *vmp); int map_handle_memory(struct vmproc *vmp, struct vir_region *region, - vir_bytes offset, vir_bytes len, int write); + vir_bytes offset, vir_bytes len, int write, vfs_callback_t cb, + void *state, int statelen); void map_printmap(struct vmproc *vmp); int map_writept(struct vmproc *vmp); void printregionstats(struct vmproc *vmp); @@ -153,6 +157,8 @@ int map_free(struct vir_region *region); struct phys_region *physblock_get(struct vir_region *region, vir_bytes offset); void physblock_set(struct vir_region *region, vir_bytes offset, struct phys_region *newphysr); +int map_ph_writept(struct vmproc *vmp, struct vir_region *vr, + struct phys_region *pr); struct vir_region * map_region_lookup_tag(struct vmproc *vmp, u32_t tag); @@ -162,7 +168,6 @@ int map_get_phys(struct vmproc *vmp, vir_bytes addr, phys_bytes *r); int map_get_ref(struct vmproc *vmp, vir_bytes addr, u8_t *cnt); int physregions(struct vir_region *vr); -void get_stats_info(struct vm_stats_info *vsi); void get_usage_info(struct vmproc *vmp, struct vm_usage_info *vui); void get_usage_info_kernel(struct vm_usage_info *vui); int get_region_info(struct vmproc *vmp, struct vm_region_info *vri, int @@ -188,13 +193,40 @@ void init_query_exit(void); struct phys_block *pb_new(phys_bytes phys); void pb_free(struct phys_block *); struct phys_region *pb_reference(struct phys_block *newpb, - vir_bytes offset, struct vir_region *region); + vir_bytes offset, struct vir_region *region, mem_type_t *); void pb_unreferenced(struct vir_region *region, struct phys_region *pr, int rm); void pb_link(struct phys_region *newphysr, struct phys_block *newpb, vir_bytes offset, struct vir_region *parent); +int mem_cow(struct vir_region *region, + struct phys_region *ph, phys_bytes new_page_cl, phys_bytes new_page); /* mem_directphys.c */ void phys_setphys(struct vir_region *vr, phys_bytes startaddr); /* mem_shared.c */ void shared_setsource(struct vir_region *vr, endpoint_t ep, struct vir_region *src); + +/* mem_cache.c */ +int do_mapcache(message *m); +int do_setcache(message *m); + +/* cache.c */ +struct cached_page *find_cached_page_bydev(dev_t dev, u64_t dev_off, + ino_t ino, u64_t ino_off, int touchlru); +struct cached_page *find_cached_page_byino(dev_t dev, ino_t ino, u64_t ino_off, int touchlru); +int addcache(dev_t dev, u64_t def_off, ino_t ino, u64_t ino_off, struct phys_block *pb); +void cache_sanitycheck_internal(void); +int cache_freepages(int pages); +void get_stats_info(struct vm_stats_info *vsi); +void cache_lru_touch(struct cached_page *hb); +void rmcache(struct cached_page *cp); + +/* vfs.c */ +int vfs_request(int reqno, int fd, struct vmproc *vmp, u64_t offset, + u32_t len, vfs_callback_t reply_callback, void *cbarg, void *state, + int statelen); +int do_vfs_reply(message *m); + +/* mem_file.c */ +void mappedfile_setfile(struct vir_region *region, int fd, u64_t offset, + dev_t dev, ino_t ino, u16_t clearend, int prefill); diff --git a/servers/vm/region.c b/servers/vm/region.c index a7ed1ec58..a77f415dd 100644 --- a/servers/vm/region.c +++ b/servers/vm/region.c @@ -30,9 +30,6 @@ #include "memtype.h" #include "regionavl.h" -static int map_ph_writept(struct vmproc *vmp, struct vir_region *vr, - struct phys_region *pr); - static struct vir_region *map_copy_region(struct vmproc *vmp, struct vir_region *vr); @@ -45,14 +42,18 @@ void map_printregion(struct vir_region *vr) int i; struct phys_region *ph; printf("map_printmap: map_name: %s\n", vr->def_memtype->name); - printf("\t%lx (len 0x%lx, %lukB), %p\n", - vr->vaddr, vr->length, vr->length/1024, vr->def_memtype->name); + printf("\t%lx (len 0x%lx, %lukB), %p, %s\n", + vr->vaddr, vr->length, vr->length/1024, + vr->def_memtype->name, + (vr->flags & VR_WRITABLE) ? "writable" : "readonly"); printf("\t\tphysblocks:\n"); for(i = 0; i < vr->length/VM_PAGE_SIZE; i++) { if(!(ph=vr->physblocks[i])) continue; - printf("\t\t@ %lx (refs %d): phys 0x%lx\n", + printf("\t\t@ %lx (refs %d): phys 0x%lx, %s\n", (vr->vaddr + ph->offset), - ph->ph->refcount, ph->ph->phys); + ph->ph->refcount, ph->ph->phys, + pt_writable(vr->parent, vr->vaddr + ph->offset) ? "W" : "R"); + } } @@ -122,8 +123,8 @@ static struct vir_region *getnextvr(struct vir_region *vr) int pr_writable(struct vir_region *vr, struct phys_region *pr) { - assert(vr->def_memtype->writable); - return ((vr->flags & VR_WRITABLE) && vr->def_memtype->writable(pr)); + assert(pr->memtype->writable); + return ((vr->flags & VR_WRITABLE) && pr->memtype->writable(pr)); } #if SANITYCHECKS @@ -196,8 +197,8 @@ void map_sanitycheck(char *file, int line) ALLREGIONS(;,MYASSERT(pr->offset == voffset);); ALLREGIONS(;,USE(pr->ph, pr->ph->seencount++;); if(pr->ph->seencount == 1) { - if(pr->parent->memtype->ev_sanitycheck) - pr->parent->memtype->ev_sanitycheck(pr, file, line); + if(pr->memtype->ev_sanitycheck) + pr->memtype->ev_sanitycheck(pr, file, line); } ); @@ -209,6 +210,7 @@ void map_sanitycheck(char *file, int line) } } MYASSERT(!(vr->vaddr % VM_PAGE_SIZE));, + if(pr->ph->flags & PBF_INCACHE) pr->ph->seencount++; if(pr->ph->refcount != pr->ph->seencount) { map_printmap(vmp); printf("ph in vr %p: 0x%lx refcount %u " @@ -233,6 +235,7 @@ void map_sanitycheck(char *file, int line) MYASSERT(others->ph == pr->ph); n_others++; } + if(pr->ph->flags & PBF_INCACHE) n_others++; MYASSERT(pr->ph->refcount == n_others); } MYASSERT(pr->ph->refcount == pr->ph->seencount); @@ -245,7 +248,7 @@ void map_sanitycheck(char *file, int line) /*=========================================================================* * map_ph_writept * *=========================================================================*/ -static int map_ph_writept(struct vmproc *vmp, struct vir_region *vr, +int map_ph_writept(struct vmproc *vmp, struct vir_region *vr, struct phys_region *pr) { int flags = PTF_PRESENT | PTF_USER; @@ -490,7 +493,8 @@ mem_type_t *memtype; } if(mapflags & MF_PREALLOC) { - if(map_handle_memory(vmp, newregion, 0, length, 1) != OK) { + if(map_handle_memory(vmp, newregion, 0, length, 1, + NULL, 0, 0) != OK) { printf("VM: map_page_region: prealloc failed\n"); free(newregion->physblocks); USE(newregion, @@ -657,8 +661,6 @@ u32_t vrallocflags(u32_t flags) allocflags |= PAF_LOWER16MB; if(flags & VR_LOWER1MB) allocflags |= PAF_LOWER1MB; - if(flags & VR_CONTIG) - allocflags |= PAF_CONTIG; if(!(flags & VR_UNINITIALIZED)) allocflags |= PAF_CLEAR; @@ -668,11 +670,14 @@ u32_t vrallocflags(u32_t flags) /*===========================================================================* * map_pf * *===========================================================================*/ -int map_pf(vmp, region, offset, write) +int map_pf(vmp, region, offset, write, pf_callback, state, len) struct vmproc *vmp; struct vir_region *region; vir_bytes offset; int write; +vfs_callback_t pf_callback; +void *state; +int len; { struct phys_region *ph; int r = OK; @@ -697,7 +702,8 @@ int write; return ENOMEM; } - if(!(ph = pb_reference(pb, offset, region))) { + if(!(ph = pb_reference(pb, offset, region, + region->def_memtype))) { printf("map_pf: pb_reference failed\n"); pb_free(pb); return ENOMEM; @@ -711,15 +717,14 @@ int write; * writable, nothing to do. */ - assert(region->def_memtype->writable); + assert(ph->memtype->writable); - if(!write || !region->def_memtype->writable(ph)) { - assert(region->def_memtype->ev_pagefault); + if(!write || !ph->memtype->writable(ph)) { + assert(ph->memtype->ev_pagefault); assert(ph->ph); - if((r = region->def_memtype->ev_pagefault(vmp, - region, ph, write)) == SUSPEND) { - panic("map_pf: memtype->ev_pagefault returned SUSPEND\n"); + if((r = ph->memtype->ev_pagefault(vmp, + region, ph, write, pf_callback, state, len)) == SUSPEND) { return SUSPEND; } @@ -755,12 +760,16 @@ int write; return r; } -int map_handle_memory(vmp, region, start_offset, length, write) +int map_handle_memory(vmp, region, start_offset, length, write, + cb, state, statelen) struct vmproc *vmp; struct vir_region *region; vir_bytes start_offset; vir_bytes length; int write; +vfs_callback_t cb; +void *state; +int statelen; { vir_bytes offset, lim; int r; @@ -770,7 +779,8 @@ int write; assert(lim > start_offset); for(offset = start_offset; offset < lim; offset += VM_PAGE_SIZE) - if((r = map_pf(vmp, region, offset, write)) != OK) + if((r = map_pf(vmp, region, offset, write, + cb, state, statelen)) != OK) return r; return OK; @@ -788,7 +798,7 @@ int map_pin_memory(struct vmproc *vmp) /* Scan all memory regions. */ while((vr = region_get_iter(&iter))) { /* Make sure region is mapped to physical memory and writable.*/ - r = map_handle_memory(vmp, vr, 0, vr->length, 1); + r = map_handle_memory(vmp, vr, 0, vr->length, 1, NULL, 0, 0); if(r != OK) { panic("map_pin_memory: map_handle_memory failed: %d", r); } @@ -800,7 +810,7 @@ int map_pin_memory(struct vmproc *vmp) /*===========================================================================* * map_copy_region * *===========================================================================*/ -static struct vir_region *map_copy_region(struct vmproc *vmp, struct vir_region *vr) +struct vir_region *map_copy_region(struct vmproc *vmp, struct vir_region *vr) { /* map_copy_region creates a complete copy of the vir_region * data structure, linking in the same phys_blocks directly, @@ -829,11 +839,17 @@ static struct vir_region *map_copy_region(struct vmproc *vmp, struct vir_region } for(p = 0; p < phys_slot(vr->length); p++) { + struct phys_region *newph; + if(!(ph = physblock_get(vr, p*VM_PAGE_SIZE))) continue; - struct phys_region *newph = pb_reference(ph->ph, ph->offset, newvr); + newph = pb_reference(ph->ph, ph->offset, newvr, + vr->def_memtype); if(!newph) { map_free(newvr); return NULL; } + if(ph->memtype->ev_reference) + ph->memtype->ev_reference(ph, newph); + #if SANITYCHECKS USE(newph, newph->written = 0;); assert(physregions(vr) == cr); @@ -994,10 +1010,10 @@ struct vir_region *start_src_vr; int map_region_extend_upto_v(struct vmproc *vmp, vir_bytes v) { - vir_bytes offset = v; + vir_bytes offset = v, limit, extralen; struct vir_region *vr, *nextvr; struct phys_region **newpr; - int newslots, prevslots, addedslots; + int newslots, prevslots, addedslots, r; offset = roundup(offset, VM_PAGE_SIZE); @@ -1008,21 +1024,15 @@ int map_region_extend_upto_v(struct vmproc *vmp, vir_bytes v) if(vr->vaddr + vr->length >= v) return OK; + limit = vr->vaddr + vr->length; + assert(vr->vaddr <= offset); newslots = phys_slot(offset - vr->vaddr); prevslots = phys_slot(vr->length); assert(newslots >= prevslots); addedslots = newslots - prevslots; - - if(!(newpr = realloc(vr->physblocks, - newslots * sizeof(struct phys_region *)))) { - printf("VM: map_region_extend_upto_v: realloc failed\n"); - return ENOMEM; - } - - vr->physblocks = newpr; - memset(vr->physblocks + prevslots, 0, - addedslots * sizeof(struct phys_region *)); + extralen = offset - limit; + assert(extralen > 0); if((nextvr = getnextvr(vr))) { assert(offset <= nextvr->vaddr); @@ -1034,11 +1044,28 @@ int map_region_extend_upto_v(struct vmproc *vmp, vir_bytes v) } if(!vr->def_memtype->ev_resize) { - printf("VM: can't resize this type of memory\n"); + if(!map_page_region(vmp, limit, 0, extralen, + VR_WRITABLE | VR_ANON, + 0, &mem_type_anon)) { + printf("resize: couldn't put anon memory there\n"); + return ENOMEM; + } + return OK; + } + + if(!(newpr = realloc(vr->physblocks, + newslots * sizeof(struct phys_region *)))) { + printf("VM: map_region_extend_upto_v: realloc failed\n"); return ENOMEM; } - return vr->def_memtype->ev_resize(vmp, vr, offset - vr->vaddr); + vr->physblocks = newpr; + memset(vr->physblocks + prevslots, 0, + addedslots * sizeof(struct phys_region *)); + + r = vr->def_memtype->ev_resize(vmp, vr, offset - vr->vaddr); + + return r; } /*========================================================================* @@ -1066,11 +1093,27 @@ int map_unmap_region(struct vmproc *vmp, struct vir_region *r, map_subfree(r, offset, len); /* if unmap was at start/end of this region, it actually shrinks */ - if(offset == 0) { + if(r->length == len) { + /* Whole region disappears. Unlink and free it. */ + region_remove(&vmp->vm_regions_avl, r->vaddr); + map_free(r); + } else if(offset == 0) { struct phys_region *pr; vir_bytes voffset; int remslots; + if(!r->def_memtype->ev_lowshrink) { + printf("VM: low-shrinking not implemented for %s\n", + r->def_memtype->name); + return EINVAL; + } + + if(r->def_memtype->ev_lowshrink(r, len) != OK) { + printf("VM: low-shrinking failed for %s\n", + r->def_memtype->name); + return EINVAL; + } + region_remove(&vmp->vm_regions_avl, r->vaddr); USE(r, @@ -1099,12 +1142,6 @@ int map_unmap_region(struct vmproc *vmp, struct vir_region *r, r->length -= len; } - if(r->length == 0) { - /* Whole region disappears. Unlink and free it. */ - region_remove(&vmp->vm_regions_avl, r->vaddr); - map_free(r); - } - SANITYCHECK(SCL_DETAIL); if(pt_writemap(vmp, &vmp->vm_pt, regionstart, @@ -1118,6 +1155,154 @@ int map_unmap_region(struct vmproc *vmp, struct vir_region *r, return OK; } +int split_region(struct vmproc *vmp, struct vir_region *vr, + struct vir_region **vr1, struct vir_region **vr2, vir_bytes split_len) +{ + struct vir_region *r1 = NULL, *r2 = NULL; + vir_bytes rem_len = vr->length - split_len; + int slots1, slots2; + vir_bytes voffset; + int n1 = 0, n2 = 0; + + assert(!(split_len % VM_PAGE_SIZE)); + assert(!(rem_len % VM_PAGE_SIZE)); + assert(!(vr->vaddr % VM_PAGE_SIZE)); + assert(!(vr->length % VM_PAGE_SIZE)); + + if(!vr->def_memtype->ev_split) { + printf("VM: split region not implemented for %s\n", + vr->def_memtype->name); + return EINVAL; + } + + slots1 = phys_slot(split_len); + slots2 = phys_slot(rem_len); + + if(!(r1 = region_new(vmp, vr->vaddr, split_len, vr->flags, + vr->def_memtype))) { + goto bail; + } + + if(!(r2 = region_new(vmp, vr->vaddr+split_len, rem_len, vr->flags, + vr->def_memtype))) { + map_free(r1); + goto bail; + } + + for(voffset = 0; voffset < r1->length; voffset += VM_PAGE_SIZE) { + struct phys_region *ph, *phn; + if(!(ph = physblock_get(vr, voffset))) continue; + if(!(phn = pb_reference(ph->ph, voffset, r1, ph->memtype))) + goto bail; + n1++; + } + + for(voffset = 0; voffset < r2->length; voffset += VM_PAGE_SIZE) { + struct phys_region *ph, *phn; + if(!(ph = physblock_get(vr, split_len + voffset))) continue; + if(!(phn = pb_reference(ph->ph, voffset, r2, ph->memtype))) + goto bail; + n2++; + } + + vr->def_memtype->ev_split(vmp, vr, r1, r2); + + region_remove(&vmp->vm_regions_avl, vr->vaddr); + map_free(vr); + region_insert(&vmp->vm_regions_avl, r1); + region_insert(&vmp->vm_regions_avl, r2); + + *vr1 = r1; + *vr2 = r2; + + return OK; + + bail: + if(r1) map_free(r1); + if(r2) map_free(r2); + + printf("split_region: failed\n"); + + return ENOMEM; +} + +int map_unmap_range(struct vmproc *vmp, vir_bytes unmap_start, vir_bytes length) +{ + vir_bytes o = unmap_start % VM_PAGE_SIZE, unmap_limit; + region_iter v_iter; + struct vir_region *vr, *nextvr; + + unmap_start -= o; + length += o; + length = roundup(length, VM_PAGE_SIZE); + unmap_limit = length + unmap_start; + + if(length < VM_PAGE_SIZE) return EINVAL; + if(unmap_limit <= unmap_start) return EINVAL; + + region_start_iter(&vmp->vm_regions_avl, &v_iter, unmap_start, AVL_LESS_EQUAL); + + if(!(vr = region_get_iter(&v_iter))) { + region_start_iter(&vmp->vm_regions_avl, &v_iter, unmap_start, AVL_GREATER); + if(!(vr = region_get_iter(&v_iter))) { + return OK; + } + } + + assert(vr); + + for(; vr && vr->vaddr < unmap_limit; vr = nextvr) { + vir_bytes thislimit = vr->vaddr + vr->length; + vir_bytes this_unmap_start, this_unmap_limit; + vir_bytes remainlen; + int r; + + region_incr_iter(&v_iter); + nextvr = region_get_iter(&v_iter); + + assert(thislimit > vr->vaddr); + + this_unmap_start = MAX(unmap_start, vr->vaddr); + this_unmap_limit = MIN(unmap_limit, thislimit); + + if(this_unmap_start >= this_unmap_limit) continue; + + if(this_unmap_start > vr->vaddr && this_unmap_limit < thislimit) { + int r; + struct vir_region *vr1, *vr2; + vir_bytes split_len = this_unmap_limit - vr->vaddr; + assert(split_len > 0); + assert(split_len < vr->length); + if((r=split_region(vmp, vr, &vr1, &vr2, split_len)) != OK) { + printf("VM: unmap split failed\n"); + return r; + } + vr = vr1; + thislimit = vr->vaddr + vr->length; + } + + remainlen = this_unmap_limit - vr->vaddr; + + assert(this_unmap_start >= vr->vaddr); + assert(this_unmap_limit <= thislimit); + assert(remainlen > 0); + + r = map_unmap_region(vmp, vr, this_unmap_start - vr->vaddr, + this_unmap_limit - this_unmap_start); + + if(r != OK) { + printf("map_unmap_range: map_unmap_region failed\n"); + return r; + } + + region_start_iter(&vmp->vm_regions_avl, &v_iter, nextvr->vaddr, AVL_EQUAL); + assert(region_get_iter(&v_iter) == nextvr); + } + + return OK; + +} + /*========================================================================* * map_get_phys * *========================================================================*/ @@ -1155,14 +1340,6 @@ int map_get_ref(struct vmproc *vmp, vir_bytes addr, u8_t *cnt) return OK; } -/*========================================================================* - * get_stats_info * - *========================================================================*/ -void get_stats_info(struct vm_stats_info *vsi) -{ - vsi->vsi_cached = 0L; -} - void get_usage_info_kernel(struct vm_usage_info *vui) { memset(vui, 0, sizeof(*vui)); @@ -1237,7 +1414,8 @@ int get_region_info(struct vmproc *vmp, struct vm_region_info *vri, region_start_iter(&vmp->vm_regions_avl, &v_iter, next, AVL_GREATER_EQUAL); if(!(vr = region_get_iter(&v_iter))) return 0; - for(count = 0; (vr = region_get_iter(&v_iter)) && count < max; count++, vri++) { + for(count = 0; (vr = region_get_iter(&v_iter)) && count < max; + region_incr_iter(&v_iter)) { struct phys_region *ph1 = NULL, *ph2 = NULL; vir_bytes voffset; @@ -1253,18 +1431,23 @@ int get_region_info(struct vmproc *vmp, struct vm_region_info *vri, if(!ph1) ph1 = ph; ph2 = ph; } - if(!ph1 || !ph2) { assert(!ph1 && !ph2); continue; } + + if(!ph1 || !ph2) { + printf("skipping empty region 0x%lx-0x%lx\n", + vr->vaddr, vr->vaddr+vr->length); + continue; + } /* Report start+length of region starting from lowest use. */ vri->vri_addr = vr->vaddr + ph1->offset; - vri->vri_prot = 0; + vri->vri_prot = PROT_READ; vri->vri_length = ph2->offset + VM_PAGE_SIZE - ph1->offset; /* "AND" the provided protection with per-page protection. */ - if (!(vr->flags & VR_WRITABLE)) - vri->vri_prot &= ~PROT_WRITE; - - region_incr_iter(&v_iter); + if (vr->flags & VR_WRITABLE) + vri->vri_prot |= PROT_WRITE; + count++; + vri++; } *nextp = next; diff --git a/servers/vm/region.h b/servers/vm/region.h index 6db556dc1..9d615ef03 100644 --- a/servers/vm/region.h +++ b/servers/vm/region.h @@ -32,6 +32,8 @@ struct phys_block { u8_t flags; }; +#define PBF_INCACHE 0x01 + typedef struct vir_region { vir_bytes vaddr; /* virtual address, offset from pagetable */ vir_bytes length; /* length in bytes */ @@ -43,12 +45,21 @@ typedef struct vir_region { u32_t id; /* unique id */ union { - phys_bytes phys; + phys_bytes phys; /* VR_DIRECT */ struct { endpoint_t ep; vir_bytes vaddr; int id; } shared; + struct phys_block *pb_cache; + struct { + int procfd; /* cloned fd in proc for mmap */ + dev_t dev; + ino_t ino; + u64_t offset; + int inited; + u16_t clearend; + } file; } param; /* AVL fields */ @@ -61,7 +72,6 @@ typedef struct vir_region { #define VR_PHYS64K 0x004 /* Physical memory must be 64k aligned. */ #define VR_LOWER16MB 0x008 #define VR_LOWER1MB 0x010 -#define VR_CONTIG 0x020 /* Must be physically contiguous. */ #define VR_SHARED 0x040 #define VR_UNINITIALIZED 0x080 /* Do not clear after allocation */ diff --git a/servers/vm/sanitycheck.h b/servers/vm/sanitycheck.h index 1528a3ec1..a2993ce5f 100644 --- a/servers/vm/sanitycheck.h +++ b/servers/vm/sanitycheck.h @@ -16,10 +16,10 @@ printf("VM:%s:%d: %s failed (last sanity check %s:%d)\n", file, line, #c, sc_lastfile, sc_lastline); \ panic("sanity check failed"); } } while(0) -#define SLABSANITYCHECK(l) if((l) <= vm_sanitychecklevel) { \ +#define SLABSANITYCHECK(l) if(_minix_kerninfo && 0) { \ slab_sanitycheck(__FILE__, __LINE__); } -#define SANITYCHECK(l) if(!nocheck && ((l) <= vm_sanitychecklevel)) { \ +#define SANITYCHECK(l) if(!nocheck && _minix_kerninfo && 0) { \ struct vmproc *vmpr; \ assert(incheck == 0); \ incheck = 1; \ diff --git a/servers/vm/slaballoc.c b/servers/vm/slaballoc.c index b7dd8bc02..c27d07128 100644 --- a/servers/vm/slaballoc.c +++ b/servers/vm/slaballoc.c @@ -28,7 +28,7 @@ #include "util.h" #include "sanitycheck.h" -#define SLABSIZES 60 +#define SLABSIZES 200 #define ITEMSPERPAGE(bytes) (DATABYTES / (bytes)) diff --git a/servers/vm/utility.c b/servers/vm/utility.c index 4c835b85f..87aad1c35 100644 --- a/servers/vm/utility.c +++ b/servers/vm/utility.c @@ -161,7 +161,7 @@ int do_info(message *m) * deadlock. Note that no memory mapping can be undone without the * involvement of VM, so we are safe until we're done. */ - r = handle_memory(vmp, ptr, size, 1 /*wrflag*/); + r = handle_memory(vmp, ptr, size, 1 /*wrflag*/, NULL, NULL, 0); if (r != OK) return r; /* Now that we know the copy out will succeed, perform the actual copy diff --git a/servers/vm/vm.h b/servers/vm/vm.h index b6d91f6fa..97bd5e3a5 100644 --- a/servers/vm/vm.h +++ b/servers/vm/vm.h @@ -6,6 +6,7 @@ /* Compile in asserts and custom sanity checks at all? */ #define SANITYCHECKS 0 +#define CACHE_SANITY 0 #define VMSTATS 0 /* VM behaviour */ diff --git a/servers/vm/vmproc.h b/servers/vm/vmproc.h index fd28f831f..5e7145505 100644 --- a/servers/vm/vmproc.h +++ b/servers/vm/vmproc.h @@ -11,8 +11,6 @@ struct vmproc; -typedef void (*callback_t)(struct vmproc *who, message *m); - struct vmproc { int vm_flags; endpoint_t vm_endpoint; @@ -22,22 +20,8 @@ struct vmproc { /* Regions in virtual address space. */ region_avl vm_regions_avl; vir_bytes vm_region_top; /* highest vaddr last inserted */ - bitchunk_t vm_call_mask[VM_CALL_MASK_SIZE]; - - /* State for requests pending to be done to vfs on behalf of - * this process. - */ - callback_t vm_callback; /* function to call on vfs reply */ - int vm_callback_type; /* expected message type */ - int vm_slot; /* process table slot */ - - union { - struct { - cp_grant_id_t gid; - } open; /* VM_VFS_OPEN */ - } vm_state; /* Callback state. */ #if VMSTATS int vm_bytecopies; #endif diff --git a/test/Makefile b/test/Makefile index ac2d4c6f5..7d4762dc4 100644 --- a/test/Makefile +++ b/test/Makefile @@ -36,10 +36,9 @@ OBJS.test71+= testcache.o OBJS.test72+= testcache.o LDADD.test72+= -lminixfs -# temporarily disabled until 2ndary cache is back -#PROGS += testvm -#OBJS.testvm+= testcache.o -#LDADD.testvm+= -lsys -ltimers -lminlib -static +PROGS += testvm +OBJS.testvm+= testcache.o +LDADD.testvm+= -lsys -ltimers -lminlib -static FILES += testvm.conf @@ -48,7 +47,7 @@ MINIX_TESTS= \ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 \ 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 \ 41 42 43 44 45 46 48 49 50 52 53 54 55 56 58 59 60 \ -61 64 65 66 67 68 69 70 71 72 # 73 (2ndary cache) +61 64 65 66 67 68 69 70 71 72 73 .if ${MACHINE_ARCH} == "i386" MINIX_TESTS+= \ diff --git a/test/test72.c b/test/test72.c index f376497ca..a25d1e6a1 100644 --- a/test/test72.c +++ b/test/test72.c @@ -114,29 +114,34 @@ static void allocate(int b) ssize_t bdev_gather(dev_t dev, u64_t pos, iovec_t *vec, int count, int flags) { - int i, block; + int i; ssize_t tot = 0; assert(dev == MYDEV); assert(curblocksize > 0); assert(!(pos % curblocksize)); - block = pos / curblocksize; for(i = 0; i < count; i++) { - int subblocks; + int subpages, block, block_off; char *data = (char *) vec[i].iov_addr; - assert(vec[i].iov_size > 0); - assert(!(vec[i].iov_size % curblocksize)); - subblocks = vec[i].iov_size / curblocksize; - while(subblocks > 0) { - assert(block > 0); + assert(!(pos % curblocksize)); + block = pos / curblocksize; + block_off = pos % curblocksize; + assert(!(vec[i].iov_size % PAGE_SIZE)); + subpages = vec[i].iov_size / PAGE_SIZE; + while(subpages > 0) { + assert(block >= 0); assert(block < MAXBLOCKS); + assert(block_off >= 0); + assert(block_off < curblocksize); if(!writtenblocks[block]) { allocate(block); } - memcpy(data, writtenblocks[block], curblocksize); + memcpy(data, writtenblocks[block] + block_off, + PAGE_SIZE); block++; - subblocks--; - data += curblocksize; - tot += curblocksize; + subpages--; + data += PAGE_SIZE; + tot += PAGE_SIZE; + block_off += PAGE_SIZE; } } @@ -156,7 +161,7 @@ bdev_scatter(dev_t dev, u64_t pos, iovec_t *vec, int count, int flags) int subblocks; char *data = (char *) vec[i].iov_addr; assert(vec[i].iov_size > 0); - assert(!(vec[i].iov_size % curblocksize)); + assert(!(vec[i].iov_size % PAGE_SIZE)); subblocks = vec[i].iov_size / curblocksize; while(subblocks > 0) { assert(block >= 0); @@ -263,6 +268,18 @@ u32_t sqrt_approx(u32_t v) return (u32_t) sqrt(v); } +int vm_set_cacheblock(void *block, u32_t dev, u64_t dev_offset, + u64_t ino, u64_t ino_offset, u32_t *flags, int blocksize) +{ + return ENOSYS; +} + +void *vm_map_cacheblock(u32_t dev, u64_t dev_offset, + u64_t ino, u64_t ino_offset, u32_t *flags, int blocksize) +{ + return MAP_FAILED; +} + int main(int argc, char *argv[]) { diff --git a/test/testvm.c b/test/testvm.c index dacf31a16..d42d211a9 100644 --- a/test/testvm.c +++ b/test/testvm.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -13,6 +14,9 @@ #include "common.h" #include "testcache.h" +#define MYMAJOR 40 /* doesn't really matter, shouldn't be NO_DEV though */ +#define MYDEV makedev(MYMAJOR, 1) + static char *pipefilename = NULL, *progname; int pipefd = -1; @@ -23,20 +27,30 @@ static char *bdata = NULL; int dowriteblock(int b, int blocksize, u32_t seed, char *block) { int r; - - r=vm_yield_block_get_block(VM_BLOCKID_NONE, b, bdata, blocksize); - - if(r != OK && r != ESRCH) { - printf("dowriteblock: vm_yield_block_get_block get %d\n", r); - exit(1); + char *bdata; + int mustset = 0; + u64_t dev_off = (u64_t) b * blocksize; + + if((bdata = vm_map_cacheblock(MYDEV, dev_off, + VMC_NO_INODE, 0, NULL, blocksize)) == MAP_FAILED) { + if((bdata = minix_mmap(0, blocksize, + PROT_READ|PROT_WRITE, MAP_ANON, -1, 0)) == MAP_FAILED) { + printf("minix_mmap failed\n"); + exit(1); + } + mustset = 1; } memcpy(bdata, block, blocksize); - r=vm_yield_block_get_block(b, VM_BLOCKID_NONE, bdata, blocksize); + if(mustset && (r=vm_set_cacheblock(bdata, MYDEV, dev_off, + VMC_NO_INODE, 0, NULL, blocksize)) != OK) { + printf("dowriteblock: vm_set_cacheblock failed %d\n", r); + exit(1); + } - if(r != OK) { - printf("dowriteblock: vm_yield_block_get_block yield %d\n", r); + if(minix_munmap(bdata, blocksize) < 0) { + printf("dowriteblock: minix_munmap failed %d\n", r); exit(1); } @@ -45,28 +59,25 @@ int dowriteblock(int b, int blocksize, u32_t seed, char *block) int readblock(int b, int blocksize, u32_t seed, char *block) { - int r; + char *bdata; + u64_t dev_off = (u64_t) b * blocksize; - r=vm_yield_block_get_block(VM_BLOCKID_NONE, b, bdata, blocksize); - if(r == ESRCH) { + if((bdata = vm_map_cacheblock(MYDEV, dev_off, + VMC_NO_INODE, 0, NULL, blocksize)) == MAP_FAILED) { return OK_BLOCK_GONE; } - if(r != OK) { - printf("readblock: vm_yield_block_get_block get %d\n", r); - exit(1); - } memcpy(block, bdata, blocksize); - r=vm_yield_block_get_block(b, VM_BLOCKID_NONE, bdata, blocksize); - if(r != OK) { - printf("readblock: vm_yield_block_get_block yield %d\n", r); + + if(minix_munmap(bdata, blocksize) < 0) { + printf("dowriteblock: minix_munmap failed\n"); exit(1); } return blocksize; } -void testend(void) { vm_forgetblocks(); } +void testend(void) { } static void writepipe(struct info *i) diff --git a/test/testvm.conf b/test/testvm.conf index d2c6907b7..24aed285e 100644 --- a/test/testvm.conf +++ b/test/testvm.conf @@ -1,7 +1,7 @@ service testvm { ipc ALL; # All system ipc targets allowed system BASIC; # Only basic kernel calls allowed - vm BASIC; + vm MAPCACHEPAGE SETCACHEPAGE; io NONE; # No I/O range allowed irq NONE; # No IRQ allowed sigmgr rs; # Signal manager is RS