From: David van Moolenbroek Date: Mon, 22 Dec 2014 17:20:40 +0000 (+0000) Subject: VM: fix for handling one-shot page memory X-Git-Url: http://zhaoyanbai.com/repos/Bv9ARM.pdf?a=commitdiff_plain;h=f202792edf8f6ad088098ebce355e08c37bd92d0;p=minix.git VM: fix for handling one-shot page memory The original one-shot page patch (git-e321f65) did not account for the possibility of pagefaults happening while copying memory in the kernel. This allowed a simple cp(1) from vbfs to hang the system, since VM was repeatedly requesting the same page from the file system. With this fix, VM no longer tries to fetch the same memory-mapped page from VFS more than once per memory handling request from the kernel. In addition to fixing the original issue, this change should make handling memory somewhat more robust and ever-so-slightly faster. Test74 has been extended with a simple test for this case. Change-Id: I6e565f3750141e51b52ec98c938f8e1aa40070d0 --- diff --git a/minix/servers/vm/pagefaults.c b/minix/servers/vm/pagefaults.c index ed1f0a476..6e6b845cd 100644 --- a/minix/servers/vm/pagefaults.c +++ b/minix/servers/vm/pagefaults.c @@ -51,7 +51,7 @@ struct hm_state { static void handle_memory_continue(struct vmproc *vmp, message *m, void *arg, void *statearg); -static int handle_memory_step(struct hm_state *hmstate); +static int handle_memory_step(struct hm_state *hmstate, int retry); static void handle_memory_final(struct hm_state *state, int result); /*===========================================================================* @@ -183,7 +183,7 @@ static void handle_memory_continue(struct vmproc *vmp, message *m, return; } - r = handle_memory_step(state); + r = handle_memory_step(state, TRUE /*retry*/); assert(state->valid == VALID); @@ -271,7 +271,7 @@ int handle_memory_start(struct vmproc *vmp, vir_bytes mem, vir_bytes len, state.valid = VALID; state.vfs_avail = vfs_avail; - r = handle_memory_step(&state); + r = handle_memory_step(&state, FALSE /*retry*/); if(r == SUSPEND) { assert(caller != NONE); @@ -328,9 +328,11 @@ void do_memory(void) } } -static int handle_memory_step(struct hm_state *hmstate) +static int handle_memory_step(struct hm_state *hmstate, int retry) { struct vir_region *region; + vir_bytes offset, length, sublen; + int r; /* Page-align memory and length. */ assert(hmstate); @@ -339,7 +341,6 @@ static int handle_memory_step(struct hm_state *hmstate) assert(!(hmstate->len % VM_PAGE_SIZE)); while(hmstate->len > 0) { - int r; if(!(region = map_lookup(hmstate->vmp, hmstate->mem, NULL))) { #if VERBOSE map_printmap(hmstate->vmp); @@ -351,30 +352,59 @@ static int handle_memory_step(struct hm_state *hmstate) printf("VM: do_memory: write to unwritable map\n"); #endif return EFAULT; - } else { - vir_bytes offset, sublen; - assert(region->vaddr <= hmstate->mem); - assert(!(region->vaddr % VM_PAGE_SIZE)); - offset = hmstate->mem - region->vaddr; - sublen = hmstate->len; - if(offset + sublen > region->length) - sublen = region->length - offset; - + } + + assert(region->vaddr <= hmstate->mem); + assert(!(region->vaddr % VM_PAGE_SIZE)); + offset = hmstate->mem - region->vaddr; + length = hmstate->len; + if (offset + length > region->length) + length = region->length - offset; + + /* + * Handle one page at a time. While it seems beneficial to + * handle multiple pages in one go, the opposite is true: + * map_handle_memory will handle one page at a time anyway, and + * if we give it the whole range multiple times, it will have + * to recheck pages it already handled. In addition, in order + * to handle one-shot pages, we need to know whether we are + * retrying a single page, and that is not possible if this is + * hidden in map_handle_memory. + */ + while (length > 0) { + sublen = VM_PAGE_SIZE; + + assert(sublen <= length); + assert(offset + sublen <= region->length); + + /* + * Upon the second try for this range, do not allow + * calling into VFS again. This prevents eternal loops + * in case the FS messes up, and allows one-shot pages + * to be mapped in on the second call. + */ if((region->def_memtype == &mem_type_mappedfile && - !hmstate->vfs_avail) || hmstate->caller == NONE) { - r = map_handle_memory(hmstate->vmp, region, offset, - sublen, hmstate->wrflag, NULL, NULL, 0); + (!hmstate->vfs_avail || retry)) || + hmstate->caller == NONE) { + r = map_handle_memory(hmstate->vmp, region, + offset, sublen, hmstate->wrflag, NULL, + NULL, 0); assert(r != SUSPEND); } else { - r = map_handle_memory(hmstate->vmp, region, offset, - sublen, hmstate->wrflag, handle_memory_continue, - hmstate, sizeof(*hmstate)); + r = map_handle_memory(hmstate->vmp, region, + offset, sublen, hmstate->wrflag, + handle_memory_continue, hmstate, + sizeof(*hmstate)); } if(r != OK) return r; hmstate->len -= sublen; hmstate->mem += sublen; + + offset += sublen; + length -= sublen; + retry = FALSE; } } diff --git a/minix/tests/test74.c b/minix/tests/test74.c index fbfb4f6ca..a0b3b1b40 100644 --- a/minix/tests/test74.c +++ b/minix/tests/test74.c @@ -471,7 +471,7 @@ static void basic_regression(void) static void nonedev_regression(void) { - int fd; + int fd, fd2; char *buf; unsigned long uptime1, uptime2, uptime3; @@ -516,6 +516,19 @@ nonedev_regression(void) if (munmap(buf, 4096) != 0) e(16); + /* Also test page faults not incurred by the process itself. */ + if ((fd2 = open("testfile", O_CREAT | O_TRUNC | O_WRONLY)) < 0) e(17); + + if (unlink("testfile") != 0) e(18); + + buf = mmap(NULL, 4096, PROT_READ, MAP_SHARED | MAP_FILE, fd, 0); + if (buf == MAP_FAILED) e(19); + + if (write(fd2, buf, 10) != 10) e(20); + + if (munmap(buf, 4096) != 0) e(21); + + close(fd2); close(fd); }