]> Zhao Yanbai Git Server - minix.git/commitdiff
VM: fix for handling one-shot page memory 34/2934/1
authorDavid van Moolenbroek <david@minix3.org>
Mon, 22 Dec 2014 17:20:40 +0000 (17:20 +0000)
committerDavid van Moolenbroek <david@minix3.org>
Tue, 10 Feb 2015 13:47:27 +0000 (13:47 +0000)
The original one-shot page patch (git-e321f65) did not account for the
possibility of pagefaults happening while copying memory in the
kernel.  This allowed a simple cp(1) from vbfs to hang the system,
since VM was repeatedly requesting the same page from the file system.

With this fix, VM no longer tries to fetch the same memory-mapped page
from VFS more than once per memory handling request from the kernel.
In addition to fixing the original issue, this change should make
handling memory somewhat more robust and ever-so-slightly faster.

Test74 has been extended with a simple test for this case.

Change-Id: I6e565f3750141e51b52ec98c938f8e1aa40070d0

minix/servers/vm/pagefaults.c
minix/tests/test74.c

index ed1f0a4760784a9535cd0e144ede95e7223a27e5..6e6b845cd58d105b58784a332d6c4c3292106804 100644 (file)
@@ -51,7 +51,7 @@ struct hm_state {
 
 static void handle_memory_continue(struct vmproc *vmp, message *m,
         void *arg, void *statearg);
-static int handle_memory_step(struct hm_state *hmstate);
+static int handle_memory_step(struct hm_state *hmstate, int retry);
 static void handle_memory_final(struct hm_state *state, int result);
 
 /*===========================================================================*
@@ -183,7 +183,7 @@ static void handle_memory_continue(struct vmproc *vmp, message *m,
                return;
        }
 
-       r = handle_memory_step(state);
+       r = handle_memory_step(state, TRUE /*retry*/);
 
        assert(state->valid == VALID);
 
@@ -271,7 +271,7 @@ int handle_memory_start(struct vmproc *vmp, vir_bytes mem, vir_bytes len,
        state.valid = VALID;
        state.vfs_avail = vfs_avail;
 
-       r = handle_memory_step(&state);
+       r = handle_memory_step(&state, FALSE /*retry*/);
 
        if(r == SUSPEND) {
                assert(caller != NONE);
@@ -328,9 +328,11 @@ void do_memory(void)
        }
 }
 
-static int handle_memory_step(struct hm_state *hmstate)
+static int handle_memory_step(struct hm_state *hmstate, int retry)
 {
        struct vir_region *region;
+       vir_bytes offset, length, sublen;
+       int r;
 
        /* Page-align memory and length. */
        assert(hmstate);
@@ -339,7 +341,6 @@ static int handle_memory_step(struct hm_state *hmstate)
        assert(!(hmstate->len % VM_PAGE_SIZE));
 
        while(hmstate->len > 0) {
-               int r;
                if(!(region = map_lookup(hmstate->vmp, hmstate->mem, NULL))) {
 #if VERBOSE
                        map_printmap(hmstate->vmp);
@@ -351,30 +352,59 @@ static int handle_memory_step(struct hm_state *hmstate)
                        printf("VM: do_memory: write to unwritable map\n");
 #endif
                        return EFAULT;
-               } else {
-                       vir_bytes offset, sublen;
-                       assert(region->vaddr <= hmstate->mem);
-                       assert(!(region->vaddr % VM_PAGE_SIZE));
-                       offset = hmstate->mem - region->vaddr;
-                       sublen = hmstate->len;
-                       if(offset + sublen > region->length)
-                               sublen = region->length - offset;
-       
+               }
+
+               assert(region->vaddr <= hmstate->mem);
+               assert(!(region->vaddr % VM_PAGE_SIZE));
+               offset = hmstate->mem - region->vaddr;
+               length = hmstate->len;
+               if (offset + length > region->length)
+                       length = region->length - offset;
+
+               /*
+                * Handle one page at a time.  While it seems beneficial to
+                * handle multiple pages in one go, the opposite is true:
+                * map_handle_memory will handle one page at a time anyway, and
+                * if we give it the whole range multiple times, it will have
+                * to recheck pages it already handled.  In addition, in order
+                * to handle one-shot pages, we need to know whether we are
+                * retrying a single page, and that is not possible if this is
+                * hidden in map_handle_memory.
+                */
+               while (length > 0) {
+                       sublen = VM_PAGE_SIZE;
+
+                       assert(sublen <= length);
+                       assert(offset + sublen <= region->length);
+
+                       /*
+                        * Upon the second try for this range, do not allow
+                        * calling into VFS again.  This prevents eternal loops
+                        * in case the FS messes up, and allows one-shot pages
+                        * to be mapped in on the second call.
+                        */
                        if((region->def_memtype == &mem_type_mappedfile &&
-                         !hmstate->vfs_avail) || hmstate->caller == NONE) {
-                               r = map_handle_memory(hmstate->vmp, region, offset,
-                                  sublen, hmstate->wrflag, NULL, NULL, 0);
+                           (!hmstate->vfs_avail || retry)) ||
+                           hmstate->caller == NONE) {
+                               r = map_handle_memory(hmstate->vmp, region,
+                                   offset, sublen, hmstate->wrflag, NULL,
+                                   NULL, 0);
                                assert(r != SUSPEND);
                        } else {
-                               r = map_handle_memory(hmstate->vmp, region, offset,
-                                  sublen, hmstate->wrflag, handle_memory_continue,
-                                       hmstate, sizeof(*hmstate));
+                               r = map_handle_memory(hmstate->vmp, region,
+                                   offset, sublen, hmstate->wrflag,
+                                   handle_memory_continue, hmstate,
+                                   sizeof(*hmstate));
                        }
 
                        if(r != OK) return r;
 
                        hmstate->len -= sublen;
                        hmstate->mem += sublen;
+
+                       offset += sublen;
+                       length -= sublen;
+                       retry = FALSE;
                }
        }
 
index fbfb4f6cadb5f8453cc104c5bef35a1ee0f6554b..a0b3b1b4090b9d4c6585a99f5f62b26f27ada468 100644 (file)
@@ -471,7 +471,7 @@ static void basic_regression(void)
 static void
 nonedev_regression(void)
 {
-       int fd;
+       int fd, fd2;
        char *buf;
        unsigned long uptime1, uptime2, uptime3;
 
@@ -516,6 +516,19 @@ nonedev_regression(void)
 
        if (munmap(buf, 4096) != 0) e(16);
 
+       /* Also test page faults not incurred by the process itself. */
+       if ((fd2 = open("testfile", O_CREAT | O_TRUNC | O_WRONLY)) < 0) e(17);
+
+       if (unlink("testfile") != 0) e(18);
+
+       buf = mmap(NULL, 4096, PROT_READ, MAP_SHARED | MAP_FILE, fd, 0);
+       if (buf == MAP_FAILED) e(19);
+
+       if (write(fd2, buf, 10) != 10) e(20);
+
+       if (munmap(buf, 4096) != 0) e(21);
+
+       close(fd2);
        close(fd);
 }