]> Zhao Yanbai Git Server - minix.git/commitdiff
vm: mmap support 44/544/15
authorBen Gras <ben@minix3.org>
Tue, 7 May 2013 12:36:09 +0000 (12:36 +0000)
committerBen Gras <ben@minix3.org>
Fri, 31 May 2013 15:42:01 +0000 (15:42 +0000)
. test74 for mmap functionality
. vm: add a mem_file memory type that specifies an mmap()ped
  memory range, backed by a file
. add fdref, an object that keeps track of FD references within
  VM per process and so knows how to de-duplicate the use of FD's
  by various mmap()ped ranges; there can be many more than there can
  be FD's
. turned off for now, enable with 'filemap=1' as boot option

Change-Id: I640b1126cdaa522a0560301cf6732b7661555672

22 files changed:
distrib/sets/lists/minix/mi
etc/boot.cfg.default
servers/vm/Makefile
servers/vm/arch/i386/pagetable.c
servers/vm/fdref.c [new file with mode: 0644]
servers/vm/fdref.h [new file with mode: 0644]
servers/vm/glo.h
servers/vm/main.c
servers/vm/mem_cache.c
servers/vm/mem_file.c [new file with mode: 0644]
servers/vm/mmap.c
servers/vm/proto.h
servers/vm/region.c
servers/vm/region.h
servers/vm/vfs.c [new file with mode: 0644]
test/Makefile
test/common.c
test/run
test/test71.c
test/test74.c [new file with mode: 0644]
test/testcache.c
test/testcache.h

index 1e502071bc3493bdd480051f6852ad7d01665229..d34e16667f9c770c55af138e3ee0008f23f44cc5 100644 (file)
 ./usr/tests/minix-posix/test71         minix-sys
 ./usr/tests/minix-posix/test72         minix-sys
 ./usr/tests/minix-posix/test73         minix-sys
+./usr/tests/minix-posix/test74         minix-sys
 ./usr/tests/minix-posix/test7          minix-sys
 ./usr/tests/minix-posix/test8          minix-sys
 ./usr/tests/minix-posix/test9          minix-sys
index 2ad2cc40433e48f654f19872603fb5eba1c5d16f..23e805f05a00950a100a9628cb9d17ec5f9667dd 100644 (file)
@@ -4,5 +4,6 @@ default=2
 menu=Start MINIX 3:load_mods /boot/minix_default/mod*;multiboot /boot/minix_default/kernel rootdevname=$rootdevname $args
 menu=Start latest MINIX 3:load_mods /boot/minix_latest/mod*;multiboot /boot/minix_latest/kernel rootdevname=$rootdevname $args
 menu=Start latest MINIX 3 in single user mode:load_mods /boot/minix_latest/mod*;multiboot /boot/minix_latest/kernel rootdevname=$rootdevname bootopts=-s $args
+menu=Start latest MINIX 3 with file mmap:load_mods /boot/minix_latest/mod*;multiboot /boot/minix_latest/kernel rootdevname=$rootdevname filemap=1 $args
 menu=Edit menu option:edit
 menu=Drop to boot prompt:prompt
index ea9a67f37aa5df0a09124003ea7fe22136175015..f24cf428fa1b41b176945784675cd36f935d80cf 100644 (file)
@@ -6,7 +6,7 @@ SRCS=   main.c alloc.c utility.c exit.c fork.c break.c \
        mmap.c slaballoc.c region.c pagefaults.c \
        rs.c queryexit.c pb.c regionavl.c \
        mem_anon.c mem_directphys.c mem_anon_contig.c mem_shared.c      \
-       mem_cache.c cache.c
+       mem_cache.c cache.c vfs.c mem_file.c fdref.c
 
 .if ${MACHINE_ARCH} == "earm"
 LDFLAGS+= -T ${.CURDIR}/arch/${MACHINE_ARCH}/vm.lds
index 0f6cca9ceb4b10d23bc5218ea28d137f05119475..c9e6d8fc32a34064a4832870fae41f7327d53595 100644 (file)
@@ -1330,10 +1330,6 @@ int pt_bind(pt_t *pt, struct vmproc *who)
                        pdeslot * ARCH_PAGEDIR_SIZE);
 #endif
 
-#if 0
-       printf("VM: slot %d endpoint %d has pde val 0x%lx at kernel address 0x%lx\n",
-               slot, who->vm_endpoint, page_directories[slot], pdes);
-#endif
        /* Tell kernel about new page table root. */
        return sys_vmctl_set_addrspace(who->vm_endpoint, pt->pt_dir_phys, pdes);
 }
diff --git a/servers/vm/fdref.c b/servers/vm/fdref.c
new file mode 100644 (file)
index 0000000..d72c5fa
--- /dev/null
@@ -0,0 +1,177 @@
+
+/* File that implements the 'fdref' data structure. It keeps track
+ * of how many times a particular fd (per process) is referenced by
+ * mmapped objects.
+ *
+ * This is used to
+ *  - have many references to the same file, without needing an FD each
+ *  - deciding when we have to close an FD (last reference disappears)
+ *
+ * Examples:
+ *  - if a file-mmapped region is split, the refcount increases; there are
+ *    now two regions referencing the same FD. We can't simply close the
+ *    FD once either region is unmapped, as the pagefaults for the other
+ *    would stop working. So we increase the refcount to that fd.
+ *  - if a new file-maped region is requested, we might find out it's the
+ *    same dev/inode the same process already has referenced. we could
+ *    decide to close the new reference and use an existing one, so 
+ *    references to the same file aren't fd-limited.
+ *  - if a file-mapped region is copied, we have to create a new
+ *    fdref object, as the source process might disappear; we have to
+ *    use the new process' fd for it.
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include <minix/hash.h>
+
+#include "proto.h"
+#include "vm.h"
+#include "fdref.h"
+#include "vmproc.h"
+#include "glo.h"
+
+static struct fdref *fdrefs;
+
+void fdref_sanitycheck(void)
+{
+       struct vmproc *vmp;
+       region_iter v_iter;
+       struct fdref *fr;
+       static int prevopen = 0;
+       int openfd = 0;
+
+       for(fr = fdrefs; fr; fr = fr->next) {
+               struct fdref *fr2;
+               for(fr2 = fdrefs; fr2; fr2 = fr2->next) {
+                       if(fr == fr2) continue;
+                       if(fr->fd == fr2->fd) {
+                               printf("equal fd omg\n");
+                               util_stacktrace();
+                       }
+                       if(fr->ino == fr2->ino && fr->dev == fr2->dev) {
+                               printf("equal metadata omg\n");
+                               util_stacktrace();
+                       }
+               }
+               openfd++;
+       }
+
+       for(fr = fdrefs; fr; fr = fr->next) {
+               fr->counting = 0;
+       }
+
+       for(vmp = vmproc; vmp < &vmproc[VMP_NR]; vmp++) {
+               struct vir_region *vr;
+                if(!(vmp->vm_flags & VMF_INUSE))
+                       continue;
+               region_start_iter_least(&vmp->vm_regions_avl, &v_iter);
+               while((vr = region_get_iter(&v_iter))) {
+                       if(vr->def_memtype == &mem_type_mappedfile && vr->param.file.inited) {
+                               vr->param.file.fdref->counting++;
+                       }
+                       region_incr_iter(&v_iter);
+               }
+
+       }
+
+       for(fr = fdrefs; fr; fr = fr->next) {
+               if(fr->counting != fr->refcount) {
+                       printf("counting %d != refcount %d\n",
+                               fr->counting, fr->refcount);
+                       util_stacktrace();
+               }
+       }
+
+       if(prevopen != openfd && openfd > 100) {
+               printf("%d open\n", openfd);
+               prevopen = openfd;
+       }
+}
+
+struct fdref *fdref_new(struct vmproc *owner, ino_t ino, dev_t dev, int fd)
+{
+       struct fdref *fdref;
+
+       if(!SLABALLOC(fdref)) return NULL;
+
+       fdref->fd = fd;
+       fdref->refcount = 0;
+       fdref->dev = dev;
+       fdref->ino = ino;
+       fdref->next = fdrefs;
+       fdrefs = fdref;
+
+       return fdref;
+}
+
+void fdref_ref(struct fdref *ref, struct vir_region *region)
+{
+       assert(ref);
+       region->param.file.fdref = ref;
+       ref->refcount++;
+}
+
+void fdref_deref(struct vir_region *region)
+{
+       struct fdref *ref = region->param.file.fdref;
+       int fd;
+
+       assert(ref);
+       assert(ref->refcount > 0);
+
+       fd = ref->fd;
+       region->param.file.fdref = NULL;
+       ref->refcount--;
+       assert(ref->refcount >= 0);
+       if(ref->refcount > 0) return;
+
+       if(fdrefs == ref) fdrefs = ref->next;
+       else {
+               struct fdref *r;
+               for(r = fdrefs; r->next != ref; r = r->next)
+                       ;
+               assert(r);
+               assert(r->next == ref);
+               r->next = ref->next;
+       }
+
+       SLABFREE(ref);
+       ref = NULL;
+       
+       /* If the last reference has disappeared, free the
+        * ref object and asynchronously close the fd in VFS.
+        *
+        * We don't need a callback as a close failing, although
+        * unexpected, isn't a problem and can't be handled. VFS
+        * will print a diagnostic.
+        */
+       if(vfs_request(VMVFSREQ_FDCLOSE, fd, region->parent,
+               0, 0, NULL, NULL, NULL, 0) != OK) {
+               panic("fdref_deref: could not send close request");
+       }
+}
+
+struct fdref *fdref_dedup_or_new(struct vmproc *owner,
+       ino_t ino, dev_t dev, int fd, int mayclose)
+{
+       struct fdref *fr;
+
+       for(fr = fdrefs; fr; fr = fr->next) {
+               if(ino == fr->ino && dev == fr->dev) {
+                       if(fd == fr->fd) {
+                               return fr;
+                       }
+                       if(!mayclose) continue;
+                       if(vfs_request(VMVFSREQ_FDCLOSE, fd, owner,
+                               0, 0, NULL, NULL, NULL, 0) != OK) {
+                               printf("fdref_dedup_or_new: could not close\n");
+                       }
+                       return fr;
+               }
+       }
+
+       return fdref_new(owner, ino, dev, fd);
+}
+
diff --git a/servers/vm/fdref.h b/servers/vm/fdref.h
new file mode 100644 (file)
index 0000000..603baf4
--- /dev/null
@@ -0,0 +1,29 @@
+
+#ifndef _FDREF_H
+#define _FDREF_H 1
+
+#include <minix/callnr.h>
+#include <minix/com.h>
+#include <minix/config.h>
+#include <minix/const.h>
+#include <minix/ds.h>
+#include <minix/endpoint.h>
+#include <minix/keymap.h>
+#include <minix/minlib.h>
+#include <minix/type.h>
+#include <minix/ipc.h>
+#include <minix/sysutil.h>
+#include <minix/syslib.h>
+#include <minix/const.h>
+
+struct fdref {
+       int             fd;
+       int             refcount;
+       dev_t   dev;
+       ino_t   ino;
+       struct fdref    *next;
+       int counting;   /* sanity check */
+} *fdref;
+
+#endif
+
index 4a5e77fc3012c9386256d08a3d131c444b5d52c0..f4d4002b5d2d231f167a46288680a4fdc89a7458 100644 (file)
@@ -18,6 +18,8 @@
 
 EXTERN struct vmproc vmproc[VMP_NR];
 
+long enable_filemap;
+
 EXTERN kinfo_t kernel_boot_info;
 
 #if SANITYCHECKS
index caf148de50d817858810c62ef3a33047b97a9b9b..3f494033aca9e8241ed9b582a03754c1612616b0 100644 (file)
@@ -323,6 +323,9 @@ void init_vm(void)
                panic("couldn't get bootinfo: %d", s);
        }
 
+       /* Turn file mmap on? */
+       env_parse("filemap", "d", 0, &enable_filemap, 0, 1);
+
        /* Sanity check */
        assert(kernel_boot_info.mmap_size > 0);
        assert(kernel_boot_info.mods_with_kernel > 0);
@@ -414,6 +417,10 @@ void init_vm(void)
        CALLMAP(VM_WILLEXIT, do_willexit);
        CALLMAP(VM_NOTIFY_SIG, do_notify_sig);
 
+       /* Calls from VFS. */
+       CALLMAP(VM_VFS_REPLY, do_vfs_reply);
+       CALLMAP(VM_VFS_MMAP, do_vfs_mmap);
+
        /* Calls from RS */
        CALLMAP(VM_RS_SET_PRIV, do_rs_set_priv);
        CALLMAP(VM_RS_UPDATE, do_rs_update);
index b5fd758c833708301e7d5b95409aa5e1f2d73604..7bc0bc24e7a4a57c01b8ffd35ec29ace89a53189 100644 (file)
@@ -118,7 +118,6 @@ do_mapcache(message *msg)
                        printf("VM: map_pf failed\n");
                        return ENOMEM;
                }
-
                assert(!vr->param.pb_cache);
        }
 
diff --git a/servers/vm/mem_file.c b/servers/vm/mem_file.c
new file mode 100644 (file)
index 0000000..93b5462
--- /dev/null
@@ -0,0 +1,251 @@
+
+/* This file implements the methods of memory-mapped files. */
+
+#include <assert.h>
+
+#include "proto.h"
+#include "vm.h"
+#include "region.h"
+#include "glo.h"
+#include "cache.h"
+
+/* These functions are static so as to not pollute the
+ * global namespace, and are accessed through their function
+ * pointers.
+ */
+
+static void mappedfile_split(struct vmproc *vmp, struct vir_region *vr,
+       struct vir_region *r1, struct vir_region *r2);
+static int mappedfile_unreference(struct phys_region *pr);
+static int mappedfile_pagefault(struct vmproc *vmp, struct vir_region *region, 
+       struct phys_region *ph, int write, vfs_callback_t callback, void *, int);
+static int mappedfile_sanitycheck(struct phys_region *pr, char *file, int line);
+static int mappedfile_writable(struct phys_region *pr);
+static int mappedfile_copy(struct vir_region *vr, struct vir_region *newvr);
+static int mappedfile_lowshrink(struct vir_region *vr, vir_bytes len);
+static void mappedfile_delete(struct vir_region *region);
+
+struct mem_type mem_type_mappedfile = {
+       .name = "file-mapped memory",
+       .ev_unreference = mappedfile_unreference,
+       .ev_pagefault = mappedfile_pagefault,
+       .ev_sanitycheck = mappedfile_sanitycheck,
+       .ev_copy = mappedfile_copy,
+       .writable = mappedfile_writable,
+       .ev_split = mappedfile_split,
+       .ev_lowshrink = mappedfile_lowshrink,
+       .ev_delete = mappedfile_delete,
+};
+
+static int mappedfile_unreference(struct phys_region *pr)
+{
+       assert(pr->ph->refcount == 0);
+       if(pr->ph->phys != MAP_NONE)
+               free_mem(ABS2CLICK(pr->ph->phys), 1);
+       return OK;
+}
+
+static int cow_block(struct vmproc *vmp, struct vir_region *region,
+       struct phys_region *ph, u16_t clearend)
+{
+       int r;
+
+       if((r=mem_cow(region, ph, MAP_NONE, MAP_NONE)) != OK) {
+               printf("mappedfile_pagefault: COW failed\n");
+               return r;
+       }
+
+       /* After COW we are a normal piece of anonymous memory. */
+       ph->memtype = &mem_type_anon;
+
+       if(clearend) {
+               phys_bytes phaddr = ph->ph->phys, po = VM_PAGE_SIZE-clearend;
+               assert(clearend < VM_PAGE_SIZE);
+               phaddr += po;
+               if(sys_memset(NONE, 0, phaddr, clearend) != OK) {
+                       panic("cow_block: clearend failed\n");
+               }
+       }
+
+       return OK;
+}
+
+static int mappedfile_pagefault(struct vmproc *vmp, struct vir_region *region,
+       struct phys_region *ph, int write, vfs_callback_t cb,
+       void *state, int statelen)
+{
+       u32_t allocflags;
+       int procfd = region->param.file.fdref->fd;
+
+       allocflags = vrallocflags(region->flags);
+
+       assert(ph->ph->refcount > 0);
+       assert(region->param.file.inited);
+       assert(region->param.file.fdref);
+       assert(region->param.file.fdref->dev != NO_DEV);
+
+       /* Totally new block? Create it. */
+       if(ph->ph->phys == MAP_NONE) {
+               struct cached_page *cp;
+               u64_t referenced_offset =
+                       region->param.file.offset + ph->offset;
+               if(region->param.file.fdref->ino == VMC_NO_INODE) {
+                       cp = find_cached_page_bydev(region->param.file.fdref->dev,
+                               referenced_offset, VMC_NO_INODE, 0, 1);
+               } else {
+                       cp = find_cached_page_byino(region->param.file.fdref->dev,
+                               region->param.file.fdref->ino, referenced_offset, 1);
+               }
+               if(cp) {
+                       int result = OK;
+                       pb_unreferenced(region, ph, 0);
+                       pb_link(ph, cp->page, ph->offset, region);
+
+                       if(roundup(ph->offset+region->param.file.clearend,
+                               VM_PAGE_SIZE) >= region->length) {
+                               result = cow_block(vmp, region, ph,
+                                       region->param.file.clearend);
+                       } else if(result == OK && write) {
+                               result = cow_block(vmp, region, ph, 0);
+                       }
+
+                       return result;
+               }
+
+               if(!cb) {
+                       printf("VM: mem_file: no callback, returning EFAULT\n");
+                       sys_sysctl_stacktrace(vmp->vm_endpoint);
+                       return EFAULT;
+               }
+
+                if(vfs_request(VMVFSREQ_FDIO, procfd, vmp, referenced_offset,
+                       VM_PAGE_SIZE, cb, NULL, state, statelen) != OK) {
+                       printf("VM: mappedfile_pagefault: vfs_request failed\n");
+                       return ENOMEM;
+               }
+
+               return SUSPEND;
+       }
+
+       if(!write) {
+               printf("mappedfile_pagefault: nonwrite fault?\n");
+               return EFAULT;
+       }
+
+       return cow_block(vmp, region, ph, 0);
+}
+
+static int mappedfile_sanitycheck(struct phys_region *pr, char *file, int line)
+{
+       MYASSERT(usedpages_add(pr->ph->phys, VM_PAGE_SIZE) == OK);
+       return OK;
+}
+
+static int mappedfile_writable(struct phys_region *pr)
+{
+       /* We are never writable. */
+       return 0;
+}
+
+int mappedfile_copy(struct vir_region *vr, struct vir_region *newvr)
+{
+       assert(vr->param.file.inited);
+       mappedfile_setfile(newvr->parent, newvr, vr->param.file.fdref->fd,
+               vr->param.file.offset,
+               vr->param.file.fdref->dev, vr->param.file.fdref->ino,
+               vr->param.file.clearend, 0, 0);
+       assert(newvr->param.file.inited);
+
+       return OK;
+}
+
+int mappedfile_setfile(struct vmproc *owner,
+       struct vir_region *region, int fd, u64_t offset,
+       dev_t dev, ino_t ino, u16_t clearend, int prefill, int mayclosefd)
+{
+       vir_bytes vaddr;
+       struct fdref *newref;
+
+       newref = fdref_dedup_or_new(owner, ino, dev, fd, mayclosefd);
+
+       assert(newref);
+       assert(!region->param.file.inited);
+       assert(dev != NO_DEV);
+       fdref_ref(newref, region);
+       region->param.file.offset = offset;
+       region->param.file.clearend = clearend;
+       region->param.file.inited = 1;
+
+       if(!prefill) return OK;
+
+       for(vaddr = 0; vaddr < region->length; vaddr+=VM_PAGE_SIZE) {
+               struct cached_page *cp = NULL;
+               struct phys_region *pr;
+               u64_t referenced_offset = offset + vaddr;
+
+               if(roundup(vaddr+region->param.file.clearend,
+                       VM_PAGE_SIZE) >= region->length) {
+                       break;
+               }
+
+               if(ino == VMC_NO_INODE) {
+                       cp = find_cached_page_bydev(dev, referenced_offset,
+                               VMC_NO_INODE, 0, 1);
+               } else {
+                       cp = find_cached_page_byino(dev, ino,
+                               referenced_offset, 1);
+               }
+               if(!cp) continue;
+               if(!(pr = pb_reference(cp->page, vaddr, region,
+                       &mem_type_mappedfile))) {
+                       printf("mappedfile_setfile: pb_reference failed\n");
+                       break;
+               }
+               if(map_ph_writept(region->parent, region, pr) != OK) {
+                       printf("mappedfile_setfile: map_ph_writept failed\n");
+                       break;
+               }
+       }
+
+       return OK;
+}
+
+static void mappedfile_split(struct vmproc *vmp, struct vir_region *vr,
+       struct vir_region *r1, struct vir_region *r2)
+{
+       assert(!r1->param.file.inited);
+       assert(!r2->param.file.inited);
+       assert(vr->param.file.inited);
+       assert(r1->length + r2->length == vr->length);
+       assert(vr->def_memtype == &mem_type_mappedfile);
+       assert(r1->def_memtype == &mem_type_mappedfile);
+       assert(r2->def_memtype == &mem_type_mappedfile);
+
+       r1->param.file = vr->param.file;
+       r2->param.file = vr->param.file;
+
+       fdref_ref(vr->param.file.fdref, r1);
+       fdref_ref(vr->param.file.fdref, r2);
+
+       r1->param.file.clearend = 0;
+       r2->param.file.offset += r1->length;
+
+       assert(r1->param.file.inited);
+       assert(r2->param.file.inited);
+}
+
+static int mappedfile_lowshrink(struct vir_region *vr, vir_bytes len)
+{
+       assert(vr->param.file.inited);
+       vr->param.file.offset += len;
+       return OK;
+}
+
+static void mappedfile_delete(struct vir_region *region)
+{
+       assert(region->def_memtype == &mem_type_mappedfile);
+       assert(region->param.file.inited);
+       assert(region->param.file.fdref);
+       fdref_deref(region);
+       region->param.file.inited = 0;
+}
index 4340f94b55c54bf473de7333a1079872064f8bd0..3cc3707c65b6a9159c81af341861f1bf104a9bca 100644 (file)
@@ -81,6 +81,127 @@ static struct vir_region *mmap_region(struct vmproc *vmp, vir_bytes addr,
        return vr;
 }
 
+static int mmap_file(struct vmproc *vmp,
+       int vmfd, u32_t off_lo, u32_t off_hi, int flags,
+       ino_t ino, dev_t dev, u64_t filesize, vir_bytes addr, vir_bytes len,
+       vir_bytes *retaddr, u16_t clearend, int writable, int mayclosefd)
+{
+/* VFS has replied to a VMVFSREQ_FDLOOKUP request. */
+       struct vir_region *vr;
+       u64_t file_offset, page_offset;
+       int result = OK;
+       u32_t vrflags = 0;
+
+       if(writable) vrflags |= VR_WRITABLE;
+
+       if(flags & MAP_THIRDPARTY) {
+               file_offset = off_lo;
+       } else {
+               file_offset = make64(off_lo, off_hi);
+               if(off_hi && !off_lo) {
+                       /* XXX clang compatability hack */
+                       off_hi = file_offset = 0;
+               }
+       }
+
+       /* Do some page alignments. */
+       if((page_offset = (file_offset % VM_PAGE_SIZE))) {
+               file_offset -= page_offset;
+               len += page_offset;
+       }
+
+       len = roundup(len, VM_PAGE_SIZE);
+
+       /* All numbers should be page-aligned now. */
+       assert(!(len % VM_PAGE_SIZE));
+       assert(!(filesize % VM_PAGE_SIZE));
+       assert(!(file_offset % VM_PAGE_SIZE));
+
+#if 0
+       /* XXX ld.so relies on longer-than-file mapping */
+       if((u64_t) len + file_offset > filesize) {
+               printf("VM: truncating mmap dev 0x%x ino %d beyond file size in %d; offset %llu, len %lu, size %llu; ",
+                       dev, ino, vmp->vm_endpoint,
+                       file_offset, len, filesize);
+               len = filesize - file_offset;
+               return EINVAL;
+       }
+#endif
+
+       if(!(vr = mmap_region(vmp, addr, flags, len,
+               vrflags, &mem_type_mappedfile, 0))) {
+               result = ENOMEM;
+       } else {
+               *retaddr = vr->vaddr + page_offset;
+               result = OK;
+
+               mappedfile_setfile(vmp, vr, vmfd,
+                       file_offset, dev, ino, clearend, 1, mayclosefd);
+       }
+
+       return result;
+}
+
+int do_vfs_mmap(message *m)
+{
+       vir_bytes v;
+       struct vmproc *vmp;
+       int r, n;
+       u16_t clearend, flags = 0;
+
+       /* It might be disabled */
+       if(!enable_filemap) return ENXIO;
+
+       clearend = (m->m_u.m_vm_vfs.clearend_and_flags & MVM_LENMASK);
+       flags = (m->m_u.m_vm_vfs.clearend_and_flags & MVM_FLAGSMASK);
+
+       if((r=vm_isokendpt(m->m_u.m_vm_vfs.who, &n)) != OK)
+               panic("bad ep %d from vfs", m->m_u.m_vm_vfs.who);
+       vmp = &vmproc[n];
+
+       return mmap_file(vmp, m->m_u.m_vm_vfs.fd, m->m_u.m_vm_vfs.offset, 0,
+               MAP_PRIVATE | MAP_FIXED,
+               m->m_u.m_vm_vfs.ino, m->m_u.m_vm_vfs.dev,
+               (u64_t) LONG_MAX * VM_PAGE_SIZE,
+               m->m_u.m_vm_vfs.vaddr, m->m_u.m_vm_vfs.len, &v,
+               clearend, flags, 0);
+}
+
+static void mmap_file_cont(struct vmproc *vmp, message *replymsg, void *cbarg,
+       void *origmsg_v)
+{
+       message *origmsg = (message *) origmsg_v;
+       message mmap_reply;
+       int result;
+       int writable = 0;
+       vir_bytes v = (vir_bytes) MAP_FAILED;
+
+       if(origmsg->VMM_PROT & PROT_WRITE)
+               writable = 1;
+
+       if(replymsg->VMV_RESULT != OK) {
+               printf("VM: VFS reply failed (%d)\n", replymsg->VMV_RESULT);
+               sys_sysctl_stacktrace(vmp->vm_endpoint);
+               result = origmsg->VMV_RESULT;
+       } else {
+               /* Finish mmap */
+               result = mmap_file(vmp, replymsg->VMV_FD, origmsg->VMM_OFFSET_LO,
+                       origmsg->VMM_OFFSET_HI, origmsg->VMM_FLAGS, 
+                       replymsg->VMV_INO, replymsg->VMV_DEV,
+                       (u64_t) replymsg->VMV_SIZE_PAGES*PAGE_SIZE,
+                       origmsg->VMM_ADDR,
+                       origmsg->VMM_LEN, &v, 0, writable, 1);
+       }
+
+       /* Unblock requesting process. */
+       memset(&mmap_reply, 0, sizeof(mmap_reply));
+       mmap_reply.m_type = result;
+       mmap_reply.VMM_ADDR = v;
+
+       if(send(vmp->vm_endpoint, &mmap_reply) != OK)
+               panic("VM: mmap_file_cont: send() failed");
+}
+
 /*===========================================================================*
  *                             do_mmap                                      *
  *===========================================================================*/
@@ -111,11 +232,16 @@ int do_mmap(message *m)
 
        vmp = &vmproc[n];
 
+       /* "SUSv3 specifies that mmap() should fail if length is 0" */
+       if(len <= 0) {
+               return EINVAL;
+       }
+
        if(m->VMM_FD == -1 || (m->VMM_FLAGS & MAP_ANON)) {
                /* actual memory in some form */
                mem_type_t *mt = NULL;
 
-               if(m->VMM_FD != -1 || len <= 0) {
+               if(m->VMM_FD != -1) {
                        printf("VM: mmap: fd %d, len 0x%x\n", m->VMM_FD, len);
                        return EINVAL;
                }
@@ -134,7 +260,23 @@ int do_mmap(message *m)
                        return ENOMEM;
                }
        } else {
-               return ENXIO;
+               /* File mapping might be disabled */
+               if(!enable_filemap) return ENXIO;
+
+               /* files get private copies of pages on writes. */
+               if(!(m->VMM_FLAGS & MAP_PRIVATE)) {
+                       printf("VM: mmap file must MAP_PRIVATE\n");
+                       return ENXIO;
+               }
+
+               if(vfs_request(VMVFSREQ_FDLOOKUP, m->VMM_FD, vmp, 0, 0,
+                       mmap_file_cont, NULL, m, sizeof(*m)) != OK) {
+                       printf("VM: vfs_request for mmap failed\n");
+                       return ENXIO;
+               }
+
+               /* request queued; don't reply. */
+               return SUSPEND;
        }
 
        /* Return mapping, as seen from process. */
index 2d5018a50c8adfd9be56e62aa2f3514ea177e447..f162045d2698883b6a29748a61c7b806d8d04b1a 100644 (file)
@@ -228,5 +228,14 @@ int vfs_request(int reqno, int fd, struct vmproc *vmp, u64_t offset,
 int do_vfs_reply(message *m);
 
 /* mem_file.c */
-void mappedfile_setfile(struct vir_region *region, int fd, u64_t offset,
-       dev_t dev, ino_t ino, u16_t clearend, int prefill);
+int mappedfile_setfile(struct vmproc *owner, struct vir_region *region,
+       int fd, u64_t offset,
+       dev_t dev, ino_t ino, u16_t clearend, int prefill, int mayclose);
+
+/* fdref.c */
+struct fdref *fdref_new(struct vmproc *owner, ino_t ino, dev_t dev, int fd);
+struct fdref *fdref_dedup_or_new(struct vmproc *owner, ino_t ino, dev_t dev,
+       int fd, int mayclose);
+void fdref_ref(struct fdref *ref, struct vir_region *region);
+void fdref_deref(struct vir_region *region);
+void fdref_sanitycheck(void);
index d98caf5cb5f8f83e343cd1438f5ce355cbef319d..500056b9383bd0281adb4947895da43a4ffda2ef 100644 (file)
@@ -833,6 +833,8 @@ struct vir_region *map_copy_region(struct vmproc *vmp, struct vir_region *vr)
        if(!(newvr = region_new(vr->parent, vr->vaddr, vr->length, vr->flags, vr->def_memtype)))
                return NULL;
 
+       USE(newvr, newvr->parent = vmp;);
+
        if(vr->def_memtype->ev_copy && (r=vr->def_memtype->ev_copy(vr, newvr)) != OK) {
                map_free(newvr);
                printf("VM: memtype-specific copy failed (%d)\n", r);
@@ -980,7 +982,6 @@ struct vir_region *start_src_vr;
                        map_free_proc(dst);
                        return ENOMEM;
                }
-               USE(newvr, newvr->parent = dst;);
                region_insert(&dst->vm_regions_avl, newvr);
                assert(vr->length == newvr->length);
 
index 9d615ef03803f937e30e3eaaf5c7667f3595f158..070c0d467a9798d10ba7975ce7d1e95c8803ce9e 100644 (file)
@@ -19,6 +19,7 @@
 #include "phys_region.h"
 #include "memtype.h"
 #include "vm.h"
+#include "fdref.h"
 
 struct phys_block {
 #if SANITYCHECKS
@@ -53,11 +54,9 @@ typedef struct vir_region {
                } shared;
                struct phys_block *pb_cache;
                struct {
-                       int     procfd; /* cloned fd in proc for mmap */
-                       dev_t   dev;
-                       ino_t   ino;
-                       u64_t   offset;
                        int     inited;
+                       struct fdref    *fdref;
+                       u64_t   offset;
                        u16_t   clearend;
                } file;
        } param;
diff --git a/servers/vm/vfs.c b/servers/vm/vfs.c
new file mode 100644 (file)
index 0000000..274c2af
--- /dev/null
@@ -0,0 +1,144 @@
+
+/* Sending requests to VFS and handling the replies.  */
+
+#define _SYSTEM 1
+
+#include <minix/callnr.h>
+#include <minix/com.h>
+#include <minix/config.h>
+#include <minix/const.h>
+#include <minix/ds.h>
+#include <minix/endpoint.h>
+#include <minix/minlib.h>
+#include <minix/type.h>
+#include <minix/ipc.h>
+#include <minix/sysutil.h>
+#include <minix/syslib.h>
+#include <minix/type.h>
+#include <minix/bitmap.h>
+#include <string.h>
+#include <errno.h>
+#include <env.h>
+#include <unistd.h>
+#include <assert.h>
+#include <sys/param.h>
+
+#include "proto.h"
+#include "glo.h"
+#include "util.h"
+#include "region.h"
+#include "sanitycheck.h"
+
+#define STATELEN 50
+
+static struct vfs_request_node {
+       message                 reqmsg;
+       char                    reqstate[STATELEN];
+       void                    *opaque;
+       endpoint_t              who;
+       u32_t                   req_id;
+       vfs_callback_t          callback;
+       struct vfs_request_node *next;
+} *first_queued, *active;
+
+static void activate(void)
+{
+       assert(!active);
+       assert(first_queued);
+
+       active = first_queued;
+       first_queued = first_queued->next;
+
+       if(asynsend3(VFS_PROC_NR, &active->reqmsg, AMF_NOREPLY) != OK)
+               panic("VM: asynsend to VFS failed");
+}
+
+/*===========================================================================*
+ *                              vfs_request                                 *
+ *===========================================================================*/
+int vfs_request(int reqno, int fd, struct vmproc *vmp, u64_t offset, u32_t len,
+       vfs_callback_t reply_callback, void *cbarg, void *state, int statelen)
+{
+/* Perform an asynchronous request to VFS.
+ * We send a message of type VFS_VMCALL to VFS. VFS will respond
+ * with message type VM_VFS_REPLY. We send the request asynchronously
+ * and then handle the reply as it if were a VM_VFS_REPLY request.
+ */
+       message *m;
+       static u32_t reqid = 0;
+       struct vfs_request_node *reqnode;
+
+       reqid++;
+
+       assert(statelen <= STATELEN);
+
+       if(!SLABALLOC(reqnode)) {
+               printf("vfs_request: no memory for request node\n");
+               return ENOMEM;
+       }
+
+       m = &reqnode->reqmsg;
+       m->m_type = VFS_VMCALL;
+       m->VFS_VMCALL_REQ = reqno;
+       m->VFS_VMCALL_FD = fd;
+       m->VFS_VMCALL_REQID = reqid;
+       m->VFS_VMCALL_ENDPOINT = vmp->vm_endpoint;
+       m->VFS_VMCALL_OFFSET_LO = ex64lo(offset);
+       m->VFS_VMCALL_OFFSET_HI = ex64hi(offset);
+       m->VFS_VMCALL_LENGTH = len;
+
+       reqnode->who = vmp->vm_endpoint;
+       reqnode->req_id = reqid;
+       reqnode->next = first_queued;
+       reqnode->callback = reply_callback;
+       reqnode->opaque = cbarg;
+       if(state) memcpy(reqnode->reqstate, state, statelen);
+       first_queued = reqnode;
+
+       /* Send the request message if none pending. */
+       if(!active)
+               activate();
+
+       return OK;
+}
+
+/*===========================================================================*
+ *                              do_vfs_reply                                 *
+ *===========================================================================*/
+int do_vfs_reply(message *m)
+{
+/* VFS has handled a VM request and VFS has replied. It must be the
+ * active request.
+ */
+       struct vfs_request_node *orignode = active;
+       vfs_callback_t req_callback;
+       void *cbarg;
+       int n;
+       struct vmproc *vmp;
+       if(m->m_source != VFS_PROC_NR)
+               return ENOSYS;
+
+       assert(active);
+       assert(active->req_id == m->VMV_REQID);
+
+       /* the endpoint may have exited */
+       if(vm_isokendpt(m->VMV_ENDPOINT, &n) != OK)
+               vmp = NULL;
+       else    vmp = &vmproc[n];
+
+       req_callback = active->callback;
+       cbarg = active->opaque;
+       active = NULL;
+
+       /* Invoke requested reply-callback within VM. */
+       if(req_callback) req_callback(vmp, m, cbarg, orignode->reqstate);
+
+       SLABFREE(orignode);
+
+       /* Send the next request message if any. */
+       if(first_queued)
+               activate();
+
+       return SUSPEND; /* don't reply to the reply */
+}
+
index 7d4762dc4067c6ccabc1030922f6411c50da9782..87ee03dd6910aa69ac6b82abece63d1ab5d8d4e1 100644 (file)
@@ -34,6 +34,7 @@ OBJS.test57=  test57loop.o
 # Cache testing programs
 OBJS.test71+=  testcache.o
 OBJS.test72+=  testcache.o
+OBJS.test74+=  testcache.o
 LDADD.test72+= -lminixfs
 
 PROGS += testvm
@@ -47,7 +48,7 @@ MINIX_TESTS= \
  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 \
 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 \
 41 42 43 44 45 46    48 49 50    52 53 54 55 56    58 59 60 \
-61       64 65 66 67 68 69 70 71 72 73
+61       64 65 66 67 68 69 70 71 72 73 74
 
 .if ${MACHINE_ARCH} == "i386"
 MINIX_TESTS+= \
index ca672584399099278b3f784cdfd933d844329dd0..d0bebb75b5e7a606fe25d70c1bab6367166251fe 100644 (file)
@@ -13,8 +13,7 @@
 #include "common.h"
 
 int common_test_nr = -1, errct = 0, subtest;
-
-int quietflag = 1;
+int quietflag = 1, bigflag = 0;
 
 /* provide a default max_error symbol as Max_error with a value
  * of 5. The test program can override it wit its own max_error
@@ -30,6 +29,11 @@ int test_nr;
   char buf[64];
   int i;
 
+  /* if this variable is set, specify to tests we are running
+   * in 'overnight' mode
+   */
+  bigflag = !!getenv(BIGVARNAME);
+
   common_test_nr = test_nr;
   printf("Test %2d ", test_nr);
   fflush(stdout);              /* since stdout is probably line buffered */
index 41330fdc40d8c73eaa05c49dbf45042c9f879e05..0abb58b84d8ca5261a00e4683288bb192cc8fe63 100755 (executable)
--- a/test/run
+++ b/test/run
@@ -28,6 +28,11 @@ alltests="   1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 \
         sh1.sh sh2.sh interp.sh"
 tests_no=`expr 0`
 
+# test mmap only if enabled in sysenv
+if sysenv filemap >/dev/null
+then   alltests="$alltests 74"
+fi
+
 # If root, make sure the setuid tests have the correct permissions
 # and make the dir bin-owned.
 if [ "$ROOT" ]
index 94b7b4b4209898f258ec495447aeffe5168629e9..954b972c8227fbb53976c6d9ef5aaea3839696d5 100644 (file)
 #include "common.h"
 #include "testcache.h"
 
-/* we want to flexibly split this test over multiple files
- * - for big working sets we might run over the 2GB MFS file limit
- * - we might want to test the FS being able to handle lots of
- *   files / unusual metadata situations
- */
-#define MBPERFILE 100
-#define MB (1024*1024)
-#define MAXFILES ((u64_t) MAXBLOCKS * MAXBLOCKSIZE / MB / MBPERFILE + 1)
-
-static int fds[MAXFILES];
-
-static void
-get_fd_offset(int b, int blocksize, u64_t *file_offset, int *fd)
-{
-       u64_t offset = (u64_t) b * blocksize;
-       int filenumber;
-
-       filenumber = offset / MB / MBPERFILE;
-
-       assert(filenumber >= 0 && filenumber < MAXFILES);
-       assert(fds[filenumber] > 0);
-
-       *fd = fds[filenumber];
-       *file_offset = offset - (filenumber * MBPERFILE * MB);
-}
-
 int
 dowriteblock(int b, int blocksize, u32_t seed, char *data)
 {
@@ -86,19 +60,14 @@ void testend(void) { }
 int
 main(int argc, char *argv[])
 {
-       int f, big = !!getenv(BIGVARNAME), iter = 2;
+       int iter = 2;
 
        start(71);
 
-       cachequiet(!big);
-       if(big) iter = 3;
+       cachequiet(!bigflag);
+       if(bigflag) iter = 3;
 
-       for(f = 0; f < MAXFILES; f++) {
-               char tempfilename[] = "cachetest.XXXXXXXX";
-               fds[f] = mkstemp(tempfilename);
-               if(fds[f] < 0) { perror("mkstemp"); e(20); return 1; }
-               assert(fds[f] > 0);
-       }
+       makefiles(MAXFILES);
 
        /* Try various combinations working set sizes
         * and block sizes in order to specifically 
@@ -112,18 +81,13 @@ main(int argc, char *argv[])
        if(dotest(PAGE_SIZE*3,  100, iter)) e(3);
        if(dotest(PAGE_SIZE,  20000, iter)) e(5);
 
-       if(big) {
+       if(bigflag) {
                u32_t totalmem, freemem, cachedmem;
                if(dotest(PAGE_SIZE,  150000, iter)) e(5);
                getmem(&totalmem, &freemem, &cachedmem);
                if(dotest(PAGE_SIZE,  totalmem*1.5, iter)) e(6);
        }
 
-       for(f = 0; f < MAXFILES; f++) {
-               assert(fds[f] > 0);
-               close(fds[f]);
-       }
-
        quit();
 
        return 0;
diff --git a/test/test74.c b/test/test74.c
new file mode 100644 (file)
index 0000000..8ccdaaa
--- /dev/null
@@ -0,0 +1,113 @@
+/* Test 74 - mmap functionality test.
+ */
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/ioc_memory.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "common.h"
+#include "testcache.h"
+
+int
+dowriteblock(int b, int blocksize, u32_t seed, char *data)
+{
+       u64_t offset;
+       int fd;
+
+       get_fd_offset(b, blocksize, &offset, &fd);
+
+       if(pwrite(fd, data, blocksize, offset) < blocksize) {
+               perror("pwrite");
+               return -1;
+       }
+
+       return blocksize;
+}
+
+int
+readblock(int b, int blocksize, u32_t seed, char *data)
+{
+       u64_t offset;
+       int fd;
+       char *mmapdata;
+       int pread_first = random() % 2;
+
+       get_fd_offset(b, blocksize, &offset, &fd);
+
+       if(pread_first) {
+               if(pread(fd, data, blocksize, offset) < blocksize) {
+                       perror("pread");
+                       return -1;
+               }
+       }
+
+       if((mmapdata = minix_mmap(NULL, blocksize, PROT_READ, MAP_PRIVATE | MAP_FILE,
+               fd, offset)) == MAP_FAILED) {
+               perror("mmap");
+               return -1;
+       }
+
+       if(!pread_first) {
+               if(pread(fd, data, blocksize, offset) < blocksize) {
+                       perror("pread");
+                       return -1;
+               }
+       }
+
+       if(memcmp(mmapdata, data, blocksize)) {
+               fprintf(stderr, "readblock: mmap, pread mismatch\n");
+               return -1;
+       }
+
+       if(minix_munmap(mmapdata, blocksize) < 0) {
+               perror("munmap");
+               return -1;
+       }
+
+       return blocksize;
+}
+
+void testend(void) { }
+
+int
+main(int argc, char *argv[])
+{
+       int iter = 2;
+
+       start(74);
+
+       makefiles(MAXFILES);
+
+       cachequiet(!bigflag);
+       if(bigflag) iter = 3;
+
+       /* Try various combinations working set sizes
+        * and block sizes in order to specifically 
+        * target the primary cache, then primary+secondary
+        * cache, then primary+secondary cache+secondary
+        * cache eviction.
+        */
+
+       if(dotest(PAGE_SIZE,    100, iter)) e(5);
+       if(dotest(PAGE_SIZE*2,  100, iter)) e(2);
+       if(dotest(PAGE_SIZE*3,  100, iter)) e(3);
+       if(dotest(PAGE_SIZE,  20000, iter)) e(5);
+
+       if(bigflag) {
+               u32_t totalmem, freemem, cachedmem;
+               if(dotest(PAGE_SIZE,  150000, iter)) e(5);
+               getmem(&totalmem, &freemem, &cachedmem);
+               if(dotest(PAGE_SIZE,  totalmem*1.5, iter)) e(6);
+       }
+
+       quit();
+
+       return 0;
+}
+
index b25141a270a843307c95f92bf6620d9f8b46c671..e589250674f7b0198bc13af8f64346c147e0e67f 100644 (file)
@@ -23,6 +23,8 @@
 
 extern int quietflag;
 
+int fds[MAXFILES];
+
 static void
 genblock(int b, char *blockdata, int blocksize, u32_t seed)
 {
@@ -210,6 +212,37 @@ dotest(int blocksize, int nblocks, int iterations)
        return 0;
 }
 
+void
+get_fd_offset(int b, int blocksize, u64_t *file_offset, int *fd)
+{
+        u64_t offset = (u64_t) b * blocksize;
+        int filenumber;
+
+        filenumber = offset / MB / MBPERFILE;
+
+        assert(filenumber >= 0 && filenumber < MAXFILES);
+        assert(fds[filenumber] > 0);
+
+        *fd = fds[filenumber];
+        *file_offset = offset - (filenumber * MBPERFILE * MB);
+}
+
+void
+makefiles(int n)
+{
+       int f;
+        for(f = 0; f < n; f++) {
+                char tempfilename[] = "cachetest.XXXXXXXX";
+                fds[f] = mkstemp(tempfilename);
+                if(fds[f] < 0) {
+                       perror("mkstemp");
+                       fprintf(stderr, "mkstemp %d/%d failed\n", f, n);
+                       exit(1);
+               }
+                assert(fds[f] > 0);
+        }
+}
+
 void cachequiet(int quiet)
 {
        quietflag = quiet;
index de735f6ae02c78be1921f6b197a4ecfaa7465ec3..01ad939db57517d885e4701f6679a63be545874c 100644 (file)
@@ -14,5 +14,21 @@ int readblock(int b, int blocksize, u32_t seed, char *block);
 void testend(void);
 int dotest(int blocksize, int nblocks, int iterations);
 void cachequiet(int quiet);
+void get_fd_offset(int b, int blocksize, u64_t *file_offset, int *fd);
+void makefiles(int n);
 
 #define OK_BLOCK_GONE  -999
+
+/* for file-oriented tests:
+ *
+ * we want to flexibly split tests over multiple files
+ * - for big working sets we might run over the 2GB MFS file limit
+ * - we might want to test the FS being able to handle lots of
+ *   files / unusual metadata situations
+ */
+#define MBPERFILE 2000
+#define MB (1024*1024)
+#define MAXFILES ((u64_t) MAXBLOCKS * MAXBLOCKSIZE / MB / MBPERFILE + 1)
+
+extern int fds[MAXFILES], bigflag;
+