From: Ben Gras Date: Mon, 21 Sep 2009 14:31:52 +0000 (+0000) Subject: Primary goal for these changes is: X-Git-Tag: v3.1.5~134 X-Git-Url: http://zhaoyanbai.com/repos/%22http:/www.isc.org/icons/zpipe.c?a=commitdiff_plain;h=cd8b915ed9966e9d6d8a3fae09a8a05b1e8d273b;p=minix.git Primary goal for these changes is: - no longer have kernel have its own page table that is loaded on every kernel entry (trap, interrupt, exception). the primary purpose is to reduce the number of required reloads. Result: - kernel can only access memory of process that was running when kernel was entered - kernel must be mapped into every process page table, so traps to kernel keep working Problem: - kernel must often access memory of arbitrary processes (e.g. send arbitrary processes messages); this can't happen directly any more; usually because that process' page table isn't loaded at all, sometimes because that memory isn't mapped in at all, sometimes because it isn't mapped in read-write. So: - kernel must be able to map in memory of any process, in its own address space. Implementation: - VM and kernel share a range of memory in which addresses of all page tables of all processes are available. This has two purposes: . Kernel has to know what data to copy in order to map in a range . Kernel has to know where to write the data in order to map it in That last point is because kernel has to write in the currently loaded page table. - Processes and kernel are separated through segments; kernel segments haven't changed. - The kernel keeps the process whose page table is currently loaded in 'ptproc.' - If it wants to map in a range of memory, it writes the value of the page directory entry for that range into the page directory entry in the currently loaded map. There is a slot reserved for such purposes. The kernel can then access this memory directly. - In order to do this, its segment has been increased (and the segments of processes start where it ends). - In the pagefault handler, detect if the kernel is doing 'trappable' memory access (i.e. a pagefault isn't a fatal error) and if so, - set the saved instruction pointer to phys_copy_fault, breaking out of phys_copy - set the saved eax register to the address of the page fault, both for sanity checking and for checking in which of the two ranges that phys_copy was called with the fault occured - Some boot-time processes do not have their own page table, and are mapped in with the kernel, and separated with segments. The kernel detects this using HASPT. If such a process has to be scheduled, any page table will work and no page table switch is done. Major changes in kernel are - When accessing user processes memory, kernel no longer explicitly checks before it does so if that memory is OK. It simply makes the mapping (if necessary), tries to do the operation, and traps the pagefault if that memory isn't present; if that happens, the copy function returns EFAULT. So all of the CHECKRANGE_OR_SUSPEND macros are gone. - Kernel no longer has to copy/read and parse page tables. - A message copying optimisation: when messages are copied, and the recipient isn't mapped in, they are copied into a buffer in the kernel. This is done in QueueMess. The next time the recipient is scheduled, this message is copied into its memory. This happens in schedcheck(). This eliminates the mapping/copying step for messages, and makes it easier to deliver messages. This eliminates soft_notify. - Kernel no longer creates a page table at all, so the vm_setbuf and pagetable writing in memory.c is gone. Minor changes in kernel are - ipc_stats thrown out, wasn't used - misc flags all renamed to MF_* - NOREC_* macros to enter and leave functions that should not be called recursively; just sanity checks really - code to fully decode segment selectors and descriptors to print on exceptions - lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1 --- diff --git a/kernel/arch/i386/arch_do_vmctl.c b/kernel/arch/i386/arch_do_vmctl.c index 523165d82..2a8d526a2 100644 --- a/kernel/arch/i386/arch_do_vmctl.c +++ b/kernel/arch/i386/arch_do_vmctl.c @@ -10,7 +10,9 @@ #include "../../system.h" #include -extern u32_t kernel_cr3; +#include "proto.h" + +extern u32_t *vm_pagedirs; /*===========================================================================* * arch_do_vmctl * @@ -30,7 +32,7 @@ struct proc *p; p->p_seg.p_cr3 = m_ptr->SVMCTL_VALUE; p->p_misc_flags |= MF_FULLVM; } else { - p->p_seg.p_cr3 = kernel_cr3; + p->p_seg.p_cr3 = 0; p->p_misc_flags &= ~MF_FULLVM; } RTS_LOCK_UNSET(p, VMINHIBIT); @@ -53,8 +55,33 @@ struct proc *p; m_ptr->SVMCTL_PF_I386_ERR = rp->p_pagefault.pf_flags; return OK; } + case VMCTL_I386_KERNELLIMIT: + { + int r; + /* VM wants kernel to increase its segment. */ + r = prot_set_kern_seg_limit(m_ptr->SVMCTL_VALUE); + return r; + } + case VMCTL_I386_PAGEDIRS: + { + int pde; + vm_pagedirs = (u32_t *) m_ptr->SVMCTL_VALUE; + return OK; + } + case VMCTL_I386_FREEPDE: + { + i386_freepde(m_ptr->SVMCTL_VALUE); + return OK; + } + case VMCTL_FLUSHTLB: + { + level0(reload_cr3); + return OK; + } } + + kprintf("arch_do_vmctl: strange param %d\n", m_ptr->SVMCTL_PARAM); return EINVAL; } diff --git a/kernel/arch/i386/do_sdevio.c b/kernel/arch/i386/do_sdevio.c index 7443c132f..63b152aff 100644 --- a/kernel/arch/i386/do_sdevio.c +++ b/kernel/arch/i386/do_sdevio.c @@ -24,6 +24,8 @@ PUBLIC int do_sdevio(m_ptr) register message *m_ptr; /* pointer to request message */ { + vir_bytes newoffset; + endpoint_t newep; int proc_nr, proc_nr_e = m_ptr->DIO_VEC_ENDPT; int count = m_ptr->DIO_VEC_SIZE; long port = m_ptr->DIO_PORT; @@ -32,6 +34,9 @@ register message *m_ptr; /* pointer to request message */ struct proc *rp; struct priv *privp; struct io_range *iorp; + int rem; + vir_bytes addr; + struct proc *destproc; /* Allow safe copies and accesses to SELF */ if ((m_ptr->DIO_REQUEST & _DIO_SAFEMASK) != _DIO_SAFE && @@ -64,11 +69,23 @@ register message *m_ptr; /* pointer to request message */ /* Check for 'safe' variants. */ if((m_ptr->DIO_REQUEST & _DIO_SAFEMASK) == _DIO_SAFE) { /* Map grant address to physical address. */ - if ((phys_buf = umap_verify_grant(proc_addr(proc_nr), who_e, + if(verify_grant(proc_nr_e, who_e, (vir_bytes) m_ptr->DIO_VEC_ADDR, - (vir_bytes) m_ptr->DIO_OFFSET, count, - req_dir == _DIO_INPUT ? CPF_WRITE : CPF_READ)) == 0) - return(EPERM); + count, + req_dir == _DIO_INPUT ? CPF_WRITE : CPF_READ, + (vir_bytes) m_ptr->DIO_OFFSET, + &newoffset, &newep) != OK) { + printf("do_sdevio: verify_grant failed\n"); + return EPERM; + } + if(!isokendpt(newep, &proc_nr)) + return(EINVAL); + destproc = proc_addr(proc_nr); + if ((phys_buf = umap_local(destproc, D, + (vir_bytes) newoffset, count)) == 0) { + printf("do_sdevio: umap_local failed\n"); + return(EFAULT); + } } else { if(proc_nr != who_p) { @@ -77,10 +94,14 @@ register message *m_ptr; /* pointer to request message */ return EPERM; } /* Get and check physical address. */ - if ((phys_buf = umap_virtual(proc_addr(proc_nr), D, + if ((phys_buf = umap_local(proc_addr(proc_nr), D, (vir_bytes) m_ptr->DIO_VEC_ADDR, count)) == 0) return(EFAULT); + destproc = proc_addr(proc_nr); } + /* current process must be target for phys_* to be OK */ + + vm_set_cr3(destproc); switch (io_type) { diff --git a/kernel/arch/i386/exception.c b/kernel/arch/i386/exception.c index 35fb55440..7e54f2745 100755 --- a/kernel/arch/i386/exception.c +++ b/kernel/arch/i386/exception.c @@ -10,59 +10,87 @@ #include #include #include "../../proc.h" +#include "../../proto.h" +#include "../../vm.h" -extern int vm_copy_in_progress; +extern int vm_copy_in_progress, catch_pagefaults; extern struct proc *vm_copy_from, *vm_copy_to; -extern u32_t vm_copy_from_v, vm_copy_to_v; -extern u32_t vm_copy_from_p, vm_copy_to_p, vm_copy_cr3; -u32_t pagefault_cr2, pagefault_count = 0; - -void pagefault(struct proc *pr, int trap_errno) +void pagefault(vir_bytes old_eip, struct proc *pr, int trap_errno, + u32_t *old_eipptr, u32_t *old_eaxptr, u32_t pagefaultcr2) { int s; vir_bytes ph; u32_t pte; + int procok = 0, pcok = 0, rangeok = 0; + int in_physcopy = 0; + vir_bytes test_eip; - if(pagefault_count != 1) - minix_panic("recursive pagefault", pagefault_count); + vmassert(old_eipptr); + vmassert(old_eaxptr); - /* Don't schedule this process until pagefault is handled. */ - if(RTS_ISSET(pr, PAGEFAULT)) - minix_panic("PAGEFAULT set", pr->p_endpoint); - RTS_LOCK_SET(pr, PAGEFAULT); + vmassert(*old_eipptr == old_eip); + vmassert(old_eipptr != &old_eip); + +#if 0 + printf("kernel: pagefault in pr %d, addr 0x%lx, his cr3 0x%lx, actual cr3 0x%lx\n", + pr->p_endpoint, pagefaultcr2, pr->p_seg.p_cr3, read_cr3()); +#endif + + if(pr->p_seg.p_cr3) { + vmassert(pr->p_seg.p_cr3 == read_cr3()); + } + + test_eip = k_reenter ? old_eip : pr->p_reg.pc; + + in_physcopy = (test_eip > (vir_bytes) phys_copy) && + (test_eip < (vir_bytes) phys_copy_fault); + + if((k_reenter || iskernelp(pr)) && + catch_pagefaults && in_physcopy) { +#if 0 + printf("pf caught! addr 0x%lx\n", pagefaultcr2); +#endif + *old_eipptr = (u32_t) phys_copy_fault; + *old_eaxptr = pagefaultcr2; + + return; + } - if(pr->p_endpoint <= INIT_PROC_NR && !(pr->p_misc_flags & MF_FULLVM)) { + /* System processes that don't have their own page table can't + * have page faults. VM does have its own page table but also + * can't have page faults (because VM has to handle them). + */ + if(k_reenter || (pr->p_endpoint <= INIT_PROC_NR && + !(pr->p_misc_flags & MF_FULLVM)) || pr->p_endpoint == VM_PROC_NR) { /* Page fault we can't / don't want to * handle. */ - kprintf("pagefault for process %d ('%s'), pc = 0x%x, addr = 0x%x, flags = 0x%x\n", + kprintf("pagefault for process %d ('%s'), pc = 0x%x, addr = 0x%x, flags = 0x%x, k_reenter %d\n", pr->p_endpoint, pr->p_name, pr->p_reg.pc, - pagefault_cr2, trap_errno); + pagefaultcr2, trap_errno, k_reenter); proc_stacktrace(pr); minix_panic("page fault in system process", pr->p_endpoint); return; } + /* Don't schedule this process until pagefault is handled. */ + vmassert(pr->p_seg.p_cr3 == read_cr3()); + vmassert(!RTS_ISSET(pr, PAGEFAULT)); + RTS_LOCK_SET(pr, PAGEFAULT); + /* Save pagefault details, suspend process, * add process to pagefault chain, * and tell VM there is a pagefault to be * handled. */ - pr->p_pagefault.pf_virtual = pagefault_cr2; + pr->p_pagefault.pf_virtual = pagefaultcr2; pr->p_pagefault.pf_flags = trap_errno; pr->p_nextpagefault = pagefaults; pagefaults = pr; - lock_notify(HARDWARE, VM_PROC_NR); - - pagefault_count = 0; - -#if 0 - kprintf("pagefault for process %d ('%s'), pc = 0x%x\n", - pr->p_endpoint, pr->p_name, pr->p_reg.pc); - proc_stacktrace(pr); -#endif + + mini_notify(proc_addr(HARDWARE), VM_PROC_NR); return; } @@ -70,12 +98,16 @@ void pagefault(struct proc *pr, int trap_errno) /*===========================================================================* * exception * *===========================================================================*/ -PUBLIC void exception(vec_nr, trap_errno, old_eip, old_cs, old_eflags) +PUBLIC void exception(vec_nr, trap_errno, old_eip, old_cs, old_eflags, + old_eipptr, old_eaxptr, pagefaultcr2) unsigned vec_nr; u32_t trap_errno; u32_t old_eip; U16_t old_cs; u32_t old_eflags; +u32_t *old_eipptr; +u32_t *old_eaxptr; +u32_t pagefaultcr2; { /* An exception or unexpected interrupt has occurred. */ @@ -108,16 +140,14 @@ struct proc *t; register struct ex_s *ep; struct proc *saved_proc; -#if DEBUG_SCHED_CHECK - for (t = BEG_PROC_ADDR; t < END_PROC_ADDR; ++t) { - if(t->p_magic != PMAGIC) - kprintf("entry %d broken\n", t->p_nr); + if(k_reenter > 2) { + /* This can't end well. */ + minix_panic("exception: k_reenter too high", k_reenter); } -#endif /* Save proc_ptr, because it may be changed by debug statements. */ saved_proc = proc_ptr; - + ep = &ex_data[vec_nr]; if (vec_nr == 2) { /* spurious NMI on some machines */ @@ -126,8 +156,9 @@ struct proc *t; } if(vec_nr == PAGE_FAULT_VECTOR) { - pagefault(saved_proc, trap_errno); - return; + pagefault(old_eip, saved_proc, trap_errno, + old_eipptr, old_eaxptr, pagefaultcr2); + return; } /* If an exception occurs while running a process, the k_reenter variable @@ -137,22 +168,19 @@ struct proc *t; if (k_reenter == 0 && ! iskernelp(saved_proc)) { { - kprintf( -"exception for process %d, endpoint %d ('%s'), pc = 0x%x:0x%x, sp = 0x%x:0x%x\n", - proc_nr(saved_proc), saved_proc->p_endpoint, - saved_proc->p_name, - saved_proc->p_reg.cs, saved_proc->p_reg.pc, - saved_proc->p_reg.ss, saved_proc->p_reg.sp); kprintf( "vec_nr= %d, trap_errno= 0x%lx, eip= 0x%lx, cs= 0x%x, eflags= 0x%lx\n", vec_nr, (unsigned long)trap_errno, (unsigned long)old_eip, old_cs, (unsigned long)old_eflags); + printseg("cs: ", 1, saved_proc, old_cs); + printseg("ds: ", 0, saved_proc, saved_proc->p_reg.ds); + if(saved_proc->p_reg.ds != saved_proc->p_reg.ss) { + printseg("ss: ", 0, saved_proc, saved_proc->p_reg.ss); + } proc_stacktrace(saved_proc); } - kprintf("kernel: cause_sig %d for %d\n", - ep->signum, saved_proc->p_endpoint); cause_sig(proc_nr(saved_proc), ep->signum); return; } @@ -168,7 +196,7 @@ struct proc *t; vec_nr, trap_errno, old_eip, old_cs, old_eflags); /* TODO should we enable this only when compiled for some debug mode? */ if (saved_proc) { - kprintf("process %d (%s), ", proc_nr(saved_proc), saved_proc->p_name); + kprintf("scheduled was: process %d (%s), ", proc_nr(saved_proc), saved_proc->p_name); kprintf("pc = %u:0x%x\n", (unsigned) saved_proc->p_reg.cs, (unsigned) saved_proc->p_reg.pc); proc_stacktrace(saved_proc); @@ -184,24 +212,30 @@ struct proc *t; /*===========================================================================* * stacktrace * *===========================================================================*/ -PUBLIC void proc_stacktrace(struct proc *proc) +PUBLIC void proc_stacktrace(struct proc *whichproc) { reg_t bp, v_bp, v_pc, v_hbp; + int iskernel; + + v_bp = whichproc->p_reg.fp; - v_bp = proc->p_reg.fp; + iskernel = iskernelp(whichproc); - kprintf("%8.8s %6d 0x%lx ", - proc->p_name, proc->p_endpoint, proc->p_reg.pc); + kprintf("%-8.8s %6d 0x%lx ", + whichproc->p_name, whichproc->p_endpoint, whichproc->p_reg.pc); while(v_bp) { - if(data_copy(proc->p_endpoint, v_bp, - SYSTEM, (vir_bytes) &v_hbp, sizeof(v_hbp)) != OK) { + +#define PRCOPY(pr, pv, v, n) \ + (iskernel ? (memcpy((char *) v, (char *) pv, n), OK) : \ + data_copy(pr->p_endpoint, pv, SYSTEM, (vir_bytes) (v), n)) + + if(PRCOPY(whichproc, v_bp, &v_hbp, sizeof(v_hbp)) != OK) { kprintf("(v_bp 0x%lx ?)", v_bp); break; } - if(data_copy(proc->p_endpoint, v_bp + sizeof(v_pc), - SYSTEM, (vir_bytes) &v_pc, sizeof(v_pc)) != OK) { - kprintf("(v_pc 0x%lx ?)", v_pc); + if(PRCOPY(whichproc, v_bp + sizeof(v_pc), &v_pc, sizeof(v_pc)) != OK) { + kprintf("(v_pc 0x%lx ?)", v_bp + sizeof(v_pc)); break; } kprintf("0x%lx ", (unsigned long) v_pc); diff --git a/kernel/arch/i386/include/archconst.h b/kernel/arch/i386/include/archconst.h index d968b7b15..0bf7a3cbd 100644 --- a/kernel/arch/i386/include/archconst.h +++ b/kernel/arch/i386/include/archconst.h @@ -54,6 +54,7 @@ #define INTR_PRIVILEGE 0 /* kernel and interrupt handlers */ #define TASK_PRIVILEGE 1 /* kernel tasks */ #define USER_PRIVILEGE 3 /* servers and user processes */ +#define RPL_MASK 0x03 /* bits in selector RPL */ /* 286 hardware constants. */ @@ -137,5 +138,6 @@ #define IOPL_MASK 0x003000 #define vir2phys(vir) (kinfo.data_base + (vir_bytes) (vir)) +#define phys2vir(ph) ((vir_bytes) (ph) - kinfo.data_base) #endif /* _I386_ACONST_H */ diff --git a/kernel/arch/i386/include/archtypes.h b/kernel/arch/i386/include/archtypes.h index ac17eeb7f..bdd8f8246 100644 --- a/kernel/arch/i386/include/archtypes.h +++ b/kernel/arch/i386/include/archtypes.h @@ -56,7 +56,7 @@ struct segdesc_s { /* segment descriptor for protected mode */ typedef struct segframe { reg_t p_ldt_sel; /* selector in gdt with ldt base and limit */ reg_t p_cr3; /* page table root */ - struct segdesc_s p_ldt[2+NR_REMOTE_SEGS]; /* CS, DS and remote */ + struct segdesc_s p_ldt[LDT_SIZE]; /* CS, DS and remote */ } segframe_t; /* Page fault event. Stored in process table. Only valid if PAGEFAULT @@ -68,5 +68,7 @@ struct pagefault u32_t pf_flags; /* Pagefault flags on stack. */ }; +#define INMEMORY(p) (!p->p_seg.p_cr3 || ptproc == p) + #endif /* #ifndef _I386_TYPES_H */ diff --git a/kernel/arch/i386/klib386.s b/kernel/arch/i386/klib386.s index 0aff6c0bd..cc6cd748e 100755 --- a/kernel/arch/i386/klib386.s +++ b/kernel/arch/i386/klib386.s @@ -8,7 +8,6 @@ #include #include #include "../../const.h" -#include "vm.h" #include "sconst.h" ! This file contains a number of assembly code utility routines needed by the @@ -28,6 +27,7 @@ .define _intr_unmask ! enable an irq at the 8259 controller .define _intr_mask ! disable an irq .define _phys_copy ! copy data from anywhere to anywhere in memory +.define _phys_copy_fault! phys_copy pagefault .define _phys_memset ! write pattern anywhere in memory .define _mem_rdw ! copy one word from [segment:offset] .define _reset ! reset the system @@ -35,13 +35,12 @@ .define _level0 ! call a function at level 0 .define _read_cpu_flags ! read the cpu flags .define _read_cr0 ! read cr0 -.define _write_cr3 ! write cr3 -.define _last_cr3 +.define _getcr3val .define _write_cr0 ! write a value in cr0 .define _read_cr4 +.define _thecr3 .define _write_cr4 - -.define _kernel_cr3 +.define _catch_pagefaults ! The routines only guarantee to preserve the registers the C compiler ! expects to be preserved (ebx, esi, edi, ebp, esp, segment registers, and @@ -156,55 +155,6 @@ csinit: mov eax, DS_SELECTOR ret -!*===========================================================================* -!* cp_mess * -!*===========================================================================* -! PUBLIC void cp_mess(int src, phys_clicks src_clicks, vir_bytes src_offset, -! phys_clicks dst_clicks, vir_bytes dst_offset); -! This routine makes a fast copy of a message from anywhere in the address -! space to anywhere else. It also copies the source address provided as a -! parameter to the call into the first word of the destination message. -! -! Note that the message size, "Msize" is in DWORDS (not bytes) and must be set -! correctly. Changing the definition of message in the type file and not -! changing it here will lead to total disaster. -! -!CM_ARGS = 4 + 4 + 4 + 4 + 4 ! 4 + 4 + 4 + 4 + 4 -!! es ds edi esi eip proc scl sof dcl dof -! -! .align 16 -!_cp_mess: -! cld -! push esi -! push edi -! push ds -! push es -! -! mov eax, FLAT_DS_SELECTOR -! mov ds, ax -! mov es, ax -! -! mov esi, CM_ARGS+4(esp) ! src clicks -! shl esi, CLICK_SHIFT -! add esi, CM_ARGS+4+4(esp) ! src offset -! mov edi, CM_ARGS+4+4+4(esp) ! dst clicks -! shl edi, CLICK_SHIFT -! add edi, CM_ARGS+4+4+4+4(esp) ! dst offset -! -! mov eax, CM_ARGS(esp) ! process number of sender -! stos ! copy number of sender to dest message -! add esi, 4 ! do not copy first word -! mov ecx, Msize - 1 ! remember, first word does not count -! rep -! movs ! copy the message -! -! pop es -! pop ds -! pop edi -! pop esi -! ret ! that is all folks! -! - !*===========================================================================* !* exit * !*===========================================================================* @@ -236,8 +186,6 @@ _phys_insw: push edi push es - LOADKERNELCR3 - mov ecx, FLAT_DS_SELECTOR mov es, cx mov edx, 8(ebp) ! port to read from @@ -264,8 +212,6 @@ _phys_insb: push edi push es - LOADKERNELCR3 - mov ecx, FLAT_DS_SELECTOR mov es, cx mov edx, 8(ebp) ! port to read from @@ -293,8 +239,6 @@ _phys_outsw: push esi push ds - LOADKERNELCR3 - mov ecx, FLAT_DS_SELECTOR mov ds, cx mov edx, 8(ebp) ! port to write to @@ -322,8 +266,6 @@ _phys_outsb: push esi push ds - LOADKERNELCR3 - mov ecx, FLAT_DS_SELECTOR mov ds, cx mov edx, 8(ebp) ! port to write to @@ -416,7 +358,7 @@ dis_already: !*===========================================================================* !* phys_copy * !*===========================================================================* -! PUBLIC void phys_copy(phys_bytes source, phys_bytes destination, +! PUBLIC phys_bytes phys_copy(phys_bytes source, phys_bytes destination, ! phys_bytes bytecount); ! Copy a block of physical memory. @@ -430,8 +372,6 @@ _phys_copy: push edi push es - LOADKERNELCR3 - mov eax, FLAT_DS_SELECTOR mov es, ax @@ -457,6 +397,8 @@ pc_small: rep eseg movsb + mov eax, 0 ! 0 means: no fault +_phys_copy_fault: ! kernel can send us here pop es pop edi pop esi @@ -477,8 +419,6 @@ _phys_memset: push ebx push ds - LOADKERNELCR3 - mov esi, 8(ebp) mov eax, 16(ebp) mov ebx, FLAT_DS_SELECTOR @@ -633,14 +573,13 @@ _write_cr4: pop ebp ret + !*===========================================================================* -!* write_cr3 * +!* getcr3val * !*===========================================================================* -! PUBLIC void write_cr3(unsigned long value); -_write_cr3: - push ebp - mov ebp, esp - LOADCR3WITHEAX(0x22, 8(ebp)) - pop ebp +! PUBLIC unsigned long getcr3val(void); +_getcr3val: + mov eax, cr3 + mov (_thecr3), eax ret diff --git a/kernel/arch/i386/memory.c b/kernel/arch/i386/memory.c index a04b0e94c..11f673285 100644 --- a/kernel/arch/i386/memory.c +++ b/kernel/arch/i386/memory.c @@ -1,4 +1,5 @@ + #include "../../kernel.h" #include "../../proc.h" #include "../../vm.h" @@ -15,137 +16,246 @@ #include "proto.h" #include "../../proto.h" +#include "../../proto.h" #include "../../debug.h" -/* VM functions and data. */ -PRIVATE u32_t vm_cr3; -PUBLIC u32_t kernel_cr3; -extern u32_t cswitch; -u32_t last_cr3 = 0; +#include "sha1.h" + +PRIVATE int psok = 0; + +#define PROCPDEPTR(pr, pi) ((u32_t *) ((u8_t *) vm_pagedirs +\ + I386_PAGE_SIZE * pr->p_nr + \ + I386_VM_PT_ENT_SIZE * pi)) + +u8_t *vm_pagedirs = NULL; + +#define NOPDE -1 +#define PDEMASK(n) (1L << (n)) +PUBLIC u32_t dirtypde; +#define WANT_FREEPDES (sizeof(dirtypde)*8-5) +PRIVATE int nfreepdes = 0, freepdes[WANT_FREEPDES], inusepde = NOPDE; #define HASPT(procptr) ((procptr)->p_seg.p_cr3 != 0) -FORWARD _PROTOTYPE( void phys_put32, (phys_bytes addr, u32_t value) ); -FORWARD _PROTOTYPE( u32_t phys_get32, (phys_bytes addr) ); -FORWARD _PROTOTYPE( void vm_set_cr3, (u32_t value) ); +FORWARD _PROTOTYPE( u32_t phys_get32, (vir_bytes v) ); FORWARD _PROTOTYPE( void set_cr3, (void) ); FORWARD _PROTOTYPE( void vm_enable_paging, (void) ); -#if DEBUG_VMASSERT -#define vmassert(t) { \ - if(!(t)) { minix_panic("vm: assert " #t " failed\n", __LINE__); } } -#else -#define vmassert(t) { } -#endif - + /* *** Internal VM Functions *** */ -PUBLIC void vm_init(void) +PUBLIC void vm_init(struct proc *newptproc) { - int o; - phys_bytes p, pt_size; - phys_bytes vm_dir_base, vm_pt_base, phys_mem; - u32_t entry; - unsigned pages; - struct proc* rp; - struct proc *sys = proc_addr(SYSTEM); - static int init_done = 0; - - if (!vm_size) - minix_panic("i386_vm_init: no space for page tables", NO_NUM); - - if(init_done) - return; + int i; + if(vm_running) + minix_panic("vm_init: vm_running", NO_NUM); + vm_set_cr3(newptproc); + level0(vm_enable_paging); + vm_running = 1; - /* Align page directory */ - o= (vm_base % I386_PAGE_SIZE); - if (o != 0) - o= I386_PAGE_SIZE-o; - vm_dir_base= vm_base+o; - - /* Page tables start after the page directory */ - vm_pt_base= vm_dir_base+I386_PAGE_SIZE; - - pt_size= (vm_base+vm_size)-vm_pt_base; - pt_size -= (pt_size % I386_PAGE_SIZE); - - /* Compute the number of pages based on vm_mem_high */ - pages= (vm_mem_high-1)/I386_PAGE_SIZE + 1; - - if (pages * I386_VM_PT_ENT_SIZE > pt_size) - minix_panic("i386_vm_init: page table too small", NO_NUM); - - for (p= 0; p*I386_VM_PT_ENT_SIZE < pt_size; p++) - { - phys_mem= p*I386_PAGE_SIZE; - entry= phys_mem | I386_VM_USER | I386_VM_WRITE | - I386_VM_PRESENT; - if (phys_mem >= vm_mem_high) - entry= 0; -#if VM_KERN_NOPAGEZERO - if (phys_mem == (sys->p_memmap[T].mem_phys << CLICK_SHIFT) || - phys_mem == (sys->p_memmap[D].mem_phys << CLICK_SHIFT)) { - entry = 0; - } -#endif - phys_put32(vm_pt_base + p*I386_VM_PT_ENT_SIZE, entry); - } +} - for (p= 0; p < I386_VM_DIR_ENTRIES; p++) - { - phys_mem= vm_pt_base + p*I386_PAGE_SIZE; - entry= phys_mem | I386_VM_USER | I386_VM_WRITE | - I386_VM_PRESENT; - if (phys_mem >= vm_pt_base + pt_size) - entry= 0; - phys_put32(vm_dir_base + p*I386_VM_PT_ENT_SIZE, entry); - } +#define TYPEDIRECT 0 +#define TYPEPROCMAP 1 +#define TYPEPHYS 2 + +/* This macro sets up a mapping from within the kernel's address + * space to any other area of memory, either straight physical + * memory (PROC == NULL) or a process view of memory, in 4MB chunks. + * It recognizes PROC having kernel address space as a special case. + * + * It sets PTR to the pointer within kernel address space at the start + * of the 4MB chunk, and OFFSET to the offset within that chunk + * that corresponds to LINADDR. + * + * It needs FREEPDE (available and addressable PDE within kernel + * address space), SEG (hardware segment), VIRT (in-datasegment + * address if known). + */ +#define CREATEPDE(PROC, PTR, LINADDR, REMAIN, BYTES, PDE, TYPE) { \ + u32_t *pdeptr = NULL; \ + int proc_pde_index; \ + proc_pde_index = I386_VM_PDE(LINADDR); \ + PDE = NOPDE; \ + if((PROC) && (((PROC) == ptproc) || !HASPT(PROC))) { \ + PTR = LINADDR; \ + TYPE = TYPEDIRECT; \ + } else { \ + int fp; \ + int mustinvl; \ + u32_t pdeval, *pdevalptr, mask; \ + phys_bytes offset; \ + vmassert(psok); \ + if(PROC) { \ + TYPE = TYPEPROCMAP; \ + vmassert(!iskernelp(PROC)); \ + vmassert(HASPT(PROC)); \ + pdeptr = PROCPDEPTR(PROC, proc_pde_index); \ + pdeval = *pdeptr; \ + } else { \ + TYPE = TYPEPHYS; \ + pdeval = (LINADDR & I386_VM_ADDR_MASK_4MB) | \ + I386_VM_BIGPAGE | I386_VM_PRESENT | \ + I386_VM_WRITE | I386_VM_USER; \ + } \ + for(fp = 0; fp < nfreepdes; fp++) { \ + int k = freepdes[fp]; \ + if(inusepde == k) \ + continue; \ + *PROCPDEPTR(ptproc, k) = 0; \ + PDE = k; \ + vmassert(k >= 0); \ + vmassert(k < sizeof(dirtypde)*8); \ + mask = PDEMASK(PDE); \ + if(dirtypde & mask) \ + continue; \ + break; \ + } \ + vmassert(PDE != NOPDE); \ + vmassert(mask); \ + if(dirtypde & mask) { \ + mustinvl = 1; \ + } else { \ + mustinvl = 0; \ + } \ + inusepde = PDE; \ + *PROCPDEPTR(ptproc, PDE) = pdeval; \ + offset = LINADDR & I386_VM_OFFSET_MASK_4MB; \ + PTR = I386_BIG_PAGE_SIZE*PDE + offset; \ + REMAIN = MIN(REMAIN, I386_BIG_PAGE_SIZE - offset); \ + if(mustinvl) { \ + level0(reload_cr3); \ + } \ + } \ +} - /* Set this cr3 in all currently running processes for - * future context switches. - */ - for (rp=BEG_PROC_ADDR; rpp_seg.p_cr3 = vm_dir_base; - } +#define DONEPDE(PDE) { \ + if(PDE != NOPDE) { \ + vmassert(PDE > 0); \ + vmassert(PDE < sizeof(dirtypde)*8); \ + dirtypde |= PDEMASK(PDE); \ + } \ +} - kernel_cr3 = vm_dir_base; +#define WIPEPDE(PDE) { \ + if(PDE != NOPDE) { \ + vmassert(PDE > 0); \ + vmassert(PDE < sizeof(dirtypde)*8); \ + *PROCPDEPTR(ptproc, PDE) = 0; \ + } \ +} - /* Set this cr3 now (not active until paging enabled). */ - vm_set_cr3(vm_dir_base); +/*===========================================================================* + * lin_lin_copy * + *===========================================================================*/ +int lin_lin_copy(struct proc *srcproc, vir_bytes srclinaddr, + struct proc *dstproc, vir_bytes dstlinaddr, vir_bytes bytes) +{ + u32_t addr; + int o1, o2; + int procslot; + int firstloop = 1; - /* Actually enable paging (activating cr3 load above). */ - level0(vm_enable_paging); + NOREC_ENTER(linlincopy); - /* Don't do this init in the future. */ - init_done = 1; - vm_running = 1; -} + vmassert(vm_running); + vmassert(nfreepdes >= 3); -PRIVATE void phys_put32(addr, value) -phys_bytes addr; -u32_t value; -{ - phys_copy(vir2phys((vir_bytes)&value), addr, sizeof(value)); + vmassert(ptproc); + vmassert(proc_ptr); + vmassert(read_cr3() == ptproc->p_seg.p_cr3); + + procslot = ptproc->p_nr; + + vmassert(procslot >= 0 && procslot < I386_VM_DIR_ENTRIES); + + while(bytes > 0) { + phys_bytes srcptr, dstptr; + vir_bytes chunk = bytes; + int srcpde, dstpde; + int srctype, dsttype; + + /* Set up 4MB ranges. */ + inusepde = NOPDE; + CREATEPDE(srcproc, srcptr, srclinaddr, chunk, bytes, srcpde, srctype); + CREATEPDE(dstproc, dstptr, dstlinaddr, chunk, bytes, dstpde, dsttype); + + /* Copy pages. */ + PHYS_COPY_CATCH(srcptr, dstptr, chunk, addr); + + DONEPDE(srcpde); + DONEPDE(dstpde); + + if(addr) { + /* If addr is nonzero, a page fault was caught. */ + + if(addr >= srcptr && addr < (srcptr + chunk)) { + WIPEPDE(srcpde); + WIPEPDE(dstpde); + NOREC_RETURN(linlincopy, EFAULT_SRC); + } + if(addr >= dstptr && addr < (dstptr + chunk)) { + WIPEPDE(srcpde); + WIPEPDE(dstpde); + NOREC_RETURN(linlincopy, EFAULT_DST); + } + + minix_panic("lin_lin_copy fault out of range", NO_NUM); + + /* Not reached. */ + NOREC_RETURN(linlincopy, EFAULT); + } + + WIPEPDE(srcpde); + WIPEPDE(dstpde); + + /* Update counter and addresses for next iteration, if any. */ + bytes -= chunk; + srclinaddr += chunk; + dstlinaddr += chunk; + + firstloop = 0; + } + + NOREC_RETURN(linlincopy, OK); } + PRIVATE u32_t phys_get32(addr) phys_bytes addr; { - u32_t value; + u32_t v; + int r; + + if(!vm_running) { + phys_copy(addr, vir2phys(&v), sizeof(v)); + return v; + } - phys_copy(addr, vir2phys((vir_bytes)&value), sizeof(value)); + if((r=lin_lin_copy(NULL, addr, + proc_addr(SYSTEM), vir2phys(&v), sizeof(v))) != OK) { + minix_panic("lin_lin_copy for phys_get32 failed", r); + } - return value; + return v; } -PRIVATE void vm_set_cr3(value) -u32_t value; +PRIVATE u32_t vm_cr3; /* temp arg to level0() func */ + +PUBLIC void vm_set_cr3(struct proc *newptproc) { - vm_cr3= value; - level0(set_cr3); + int u = 0; + if(!intr_disabled()) { lock; u = 1; } + vm_cr3= newptproc->p_seg.p_cr3; + if(vm_cr3) { + vmassert(intr_disabled()); + level0(set_cr3); + vmassert(intr_disabled()); + ptproc = newptproc; + vmassert(intr_disabled()); + } + if(u) { unlock; } } PRIVATE void set_cr3() @@ -153,10 +263,42 @@ PRIVATE void set_cr3() write_cr3(vm_cr3); } +char *cr0_str(u32_t e) +{ + static char str[80]; + strcpy(str, ""); +#define FLAG(v) do { if(e & (v)) { strcat(str, #v " "); e &= ~v; } } while(0) + FLAG(I386_CR0_PE); + FLAG(I386_CR0_MP); + FLAG(I386_CR0_EM); + FLAG(I386_CR0_TS); + FLAG(I386_CR0_ET); + FLAG(I386_CR0_PG); + FLAG(I386_CR0_WP); + if(e) { strcat(str, " (++)"); } + return str; +} + +char *cr4_str(u32_t e) +{ + static char str[80]; + strcpy(str, ""); + FLAG(I386_CR4_VME); + FLAG(I386_CR4_PVI); + FLAG(I386_CR4_TSD); + FLAG(I386_CR4_DE); + FLAG(I386_CR4_PSE); + FLAG(I386_CR4_PAE); + FLAG(I386_CR4_MCE); + FLAG(I386_CR4_PGE); + if(e) { strcat(str, " (++)"); } + return str; +} + PRIVATE void vm_enable_paging(void) { u32_t cr0, cr4; - int psok, pgeok; + int pgeok; psok = _cpufeature(_CPUF_I386_PSE); pgeok = _cpufeature(_CPUF_I386_PGE); @@ -166,19 +308,26 @@ PRIVATE void vm_enable_paging(void) /* First clear PG and PGE flag, as PGE must be enabled after PG. */ write_cr0(cr0 & ~I386_CR0_PG); - write_cr4(cr4 & ~I386_CR4_PGE); + write_cr4(cr4 & ~(I386_CR4_PGE | I386_CR4_PSE)); cr0= read_cr0(); cr4= read_cr4(); + /* Our first page table contains 4MB entries. */ + if(psok) + cr4 |= I386_CR4_PSE; + + write_cr4(cr4); + /* First enable paging, then enable global page flag. */ - write_cr0(cr0 | I386_CR0_PG); + cr0 |= I386_CR0_PG; + write_cr0(cr0 ); + cr0 |= I386_CR0_WP; + write_cr0(cr0); /* May we enable these features? */ if(pgeok) cr4 |= I386_CR4_PGE; - if(psok) - cr4 |= I386_CR4_PSE; write_cr4(cr4); } @@ -315,6 +464,7 @@ vir_bytes bytes; /* # of bytes to be copied */ return phys; } + /*===========================================================================* * vm_lookup * *===========================================================================*/ @@ -323,6 +473,7 @@ PUBLIC int vm_lookup(struct proc *proc, vir_bytes virtual, vir_bytes *physical, u32_t *root, *pt; int pde, pte; u32_t pde_v, pte_v; + NOREC_ENTER(vmlookup); vmassert(proc); vmassert(physical); @@ -330,7 +481,7 @@ PUBLIC int vm_lookup(struct proc *proc, vir_bytes virtual, vir_bytes *physical, if(!HASPT(proc)) { *physical = virtual; - return OK; + NOREC_RETURN(vmlookup, OK); } /* Retrieve page directory entry. */ @@ -339,39 +490,35 @@ PUBLIC int vm_lookup(struct proc *proc, vir_bytes virtual, vir_bytes *physical, pde = I386_VM_PDE(virtual); vmassert(pde >= 0 && pde < I386_VM_DIR_ENTRIES); pde_v = phys_get32((u32_t) (root + pde)); + if(!(pde_v & I386_VM_PRESENT)) { -#if 0 - kprintf("vm_lookup: %d:%s:0x%lx: cr3 0x%lx: pde %d not present\n", - proc->p_endpoint, proc->p_name, virtual, root, pde); - kprintf("kernel stack: "); - util_stacktrace(); -#endif - return EFAULT; + NOREC_RETURN(vmlookup, EFAULT); } - /* Retrieve page table entry. */ - pt = (u32_t *) I386_VM_PFA(pde_v); - vmassert(!((u32_t) pt % I386_PAGE_SIZE)); - pte = I386_VM_PTE(virtual); - vmassert(pte >= 0 && pte < I386_VM_PT_ENTRIES); - pte_v = phys_get32((u32_t) (pt + pte)); - if(!(pte_v & I386_VM_PRESENT)) { -#if 0 - kprintf("vm_lookup: %d:%s:0x%lx: cr3 %lx: pde %d: pte %d not present\n", - proc->p_endpoint, proc->p_name, virtual, root, pde, pte); - kprintf("kernel stack: "); - util_stacktrace(); -#endif - return EFAULT; - } + /* We don't expect to ever see this. */ + if(pde_v & I386_VM_BIGPAGE) { + *physical = pde_v & I386_VM_ADDR_MASK_4MB; + if(ptent) *ptent = pde_v; + *physical += virtual & I386_VM_OFFSET_MASK_4MB; + } else { + /* Retrieve page table entry. */ + pt = (u32_t *) I386_VM_PFA(pde_v); + vmassert(!((u32_t) pt % I386_PAGE_SIZE)); + pte = I386_VM_PTE(virtual); + vmassert(pte >= 0 && pte < I386_VM_PT_ENTRIES); + pte_v = phys_get32((u32_t) (pt + pte)); + if(!(pte_v & I386_VM_PRESENT)) { + NOREC_RETURN(vmlookup, EFAULT); + } - if(ptent) *ptent = pte_v; + if(ptent) *ptent = pte_v; - /* Actual address now known; retrieve it and add page offset. */ - *physical = I386_VM_PFA(pte_v); - *physical += virtual % I386_PAGE_SIZE; + /* Actual address now known; retrieve it and add page offset. */ + *physical = I386_VM_PFA(pte_v); + *physical += virtual % I386_PAGE_SIZE; + } - return OK; + NOREC_RETURN(vmlookup, OK); } /* From virtual address v in process p, @@ -390,54 +537,6 @@ PUBLIC int vm_lookup(struct proc *proc, vir_bytes virtual, vir_bytes *physical, return r; \ } } } -/*===========================================================================* - * vm_copy * - *===========================================================================*/ -int vm_copy(vir_bytes src, struct proc *srcproc, - vir_bytes dst, struct proc *dstproc, phys_bytes bytes) -{ -#define WRAPS(v) (ULONG_MAX - (v) <= bytes) - - if(WRAPS(src) || WRAPS(dst)) - minix_panic("vm_copy: linear address wraps", NO_NUM); - - while(bytes > 0) { - u32_t n, flags; - phys_bytes p_src, p_dst; -#define PAGEREMAIN(v) (I386_PAGE_SIZE - ((v) % I386_PAGE_SIZE)) - - /* We can copy this number of bytes without - * crossing a page boundary, but don't copy more - * than asked. - */ - n = MIN(PAGEREMAIN(src), PAGEREMAIN(dst)); - n = MIN(n, bytes); - vmassert(n > 0); - vmassert(n <= I386_PAGE_SIZE); - - /* Convert both virtual addresses to physical and do - * copy. - */ - LOOKUP(p_src, srcproc, src, NULL); - LOOKUP(p_dst, dstproc, dst, &flags); - if(!(flags & I386_VM_WRITE)) { - kprintf("vm_copy: copying to nonwritable page\n"); - kprintf("kernel stack: "); - util_stacktrace(); - return EFAULT; - } - phys_copy(p_src, p_dst, n); - - /* Book number of bytes copied. */ - vmassert(bytes >= n); - bytes -= n; - src += n; - dst += n; - } - - return OK; -} - /*===========================================================================* * vm_contiguous * *===========================================================================*/ @@ -493,155 +592,99 @@ PUBLIC int vm_contiguous(struct proc *targetproc, u32_t vir_buf, size_t bytes) boundaries++; } - if(verbose_vm) - kprintf("vm_contiguous: yes (%d boundaries tested)\n", - boundaries); - return 1; } -int vm_checkrange_verbose = 0; - /*===========================================================================* - * vm_checkrange * + * vm_suspend * *===========================================================================*/ -PUBLIC int vm_checkrange(struct proc *caller, struct proc *target, - vir_bytes vir, vir_bytes bytes, int wrfl, int checkonly) +PUBLIC int vm_suspend(struct proc *caller, struct proc *target, + vir_bytes linaddr, vir_bytes len, int wrflag, int type) { - u32_t flags, po, v; - int r; + /* This range is not OK for this process. Set parameters + * of the request and notify VM about the pending request. + */ + vmassert(!RTS_ISSET(caller, VMREQUEST)); + vmassert(!RTS_ISSET(target, VMREQUEST)); - if(!HASPT(target)) - return OK; + RTS_LOCK_SET(caller, VMREQUEST); - /* If caller has had a reply to this request, return it. */ - if(RTS_ISSET(caller, VMREQUEST)) { - if(caller->p_vmrequest.who == target->p_endpoint) { - if(caller->p_vmrequest.vmresult == VMSUSPEND) - minix_panic("check sees VMSUSPEND?", NO_NUM); - RTS_LOCK_UNSET(caller, VMREQUEST); -#if 0 - kprintf("SYSTEM: vm_checkrange: returning vmresult %d\n", - caller->p_vmrequest.vmresult); -#endif - return caller->p_vmrequest.vmresult; - } else { -#if 0 - kprintf("SYSTEM: vm_checkrange: caller has a request for %d, " - "but our target is %d\n", - caller->p_vmrequest.who, target->p_endpoint); +#if DEBUG_VMASSERT + caller->p_vmrequest.stacktrace[0] = '\0'; + util_stacktrace_strcat(caller->p_vmrequest.stacktrace); #endif - } - } - - po = vir % I386_PAGE_SIZE; - if(po > 0) { - vir -= po; - bytes += po; - } - vmassert(target); - vmassert(bytes > 0); - - for(v = vir; v < vir + bytes; v+= I386_PAGE_SIZE) { - u32_t phys; + caller->p_vmrequest.writeflag = 1; + caller->p_vmrequest.start = linaddr; + caller->p_vmrequest.length = len; + caller->p_vmrequest.who = target->p_endpoint; + caller->p_vmrequest.type = type; + + /* Connect caller on vmrequest wait queue. */ + if(!(caller->p_vmrequest.nextrequestor = vmrequest)) + mini_notify(proc_addr(SYSTEM), VM_PROC_NR); + vmrequest = caller; +} - /* If page exists and it's writable if desired, we're OK - * for this page. - */ - if(vm_lookup(target, v, &phys, &flags) == OK && - !(wrfl && !(flags & I386_VM_WRITE))) { - if(vm_checkrange_verbose) { -#if 0 - kprintf("SYSTEM: checkrange:%s:%d: 0x%lx: write 0x%lx, flags 0x%lx, phys 0x%lx, OK\n", - target->p_name, target->p_endpoint, v, wrfl, flags, phys); -#endif - } - continue; - } +/*===========================================================================* + * delivermsg * + *===========================================================================*/ +int delivermsg(struct proc *rp) +{ + phys_bytes addr; + int r; + NOREC_ENTER(deliver); - if(vm_checkrange_verbose) { - kprintf("SYSTEM: checkrange:%s:%d: 0x%lx: write 0x%lx, flags 0x%lx, phys 0x%lx, NOT OK\n", - target->p_name, target->p_endpoint, v, wrfl, flags, phys); - } + vmassert(rp->p_misc_flags & MF_DELIVERMSG); + vmassert(rp->p_delivermsg.m_source != NONE); - if(checkonly) { - return VMSUSPEND; - } + vmassert(rp->p_delivermsg_lin); +#if DEBUG_VMASSERT + if(rp->p_delivermsg_lin != + umap_local(rp, D, rp->p_delivermsg_vir, sizeof(message))) { + printf("vir: 0x%lx lin was: 0x%lx umap now: 0x%lx\n", + rp->p_delivermsg_vir, rp->p_delivermsg_lin, + umap_local(rp, D, rp->p_delivermsg_vir, sizeof(message))); + minix_panic("that's wrong", NO_NUM); + } - /* This range is not OK for this process. Set parameters - * of the request and notify VM about the pending request. - */ - if(RTS_ISSET(caller, VMREQUEST)) - minix_panic("VMREQUEST already set", caller->p_endpoint); - RTS_LOCK_SET(caller, VMREQUEST); - - /* Set parameters in caller. */ - caller->p_vmrequest.writeflag = wrfl; - caller->p_vmrequest.start = vir; - caller->p_vmrequest.length = bytes; - caller->p_vmrequest.who = target->p_endpoint; - - /* Set caller in target. */ - target->p_vmrequest.requestor = caller; - - /* Connect caller on vmrequest wait queue. */ - caller->p_vmrequest.nextrequestor = vmrequest; - vmrequest = caller; - if(!caller->p_vmrequest.nextrequestor) { - int n = 0; - struct proc *vmr; - for(vmr = vmrequest; vmr; vmr = vmr->p_vmrequest.nextrequestor) - n++; - soft_notify(VM_PROC_NR); -#if 0 - kprintf("(%d) ", n); - kprintf("%d/%d ", - caller->p_endpoint, target->p_endpoint); - util_stacktrace(); #endif - } -#if 0 - kprintf("SYSTEM: vm_checkrange: range bad for " - "target %s:0x%lx-0x%lx, caller %s\n", - target->p_name, vir, vir+bytes, caller->p_name); - - kprintf("vm_checkrange kernel trace: "); - util_stacktrace(); - kprintf("target trace: "); - proc_stacktrace(target); -#endif + vm_set_cr3(rp); - if(target->p_endpoint == VM_PROC_NR) { - kprintf("caller trace: "); - proc_stacktrace(caller); - kprintf("target trace: "); - proc_stacktrace(target); - minix_panic("VM ranges should be OK", NO_NUM); - } + PHYS_COPY_CATCH(vir2phys(&rp->p_delivermsg), + rp->p_delivermsg_lin, sizeof(message), addr); - return VMSUSPEND; + if(addr) { + vm_suspend(rp, rp, rp->p_delivermsg_lin, sizeof(message), 1, + VMSTYPE_DELIVERMSG); + r = VMSUSPEND; + } else { +#if DEBUG_VMASSERT + rp->p_delivermsg.m_source = NONE; + rp->p_delivermsg_lin = 0; +#endif + rp->p_misc_flags &= ~MF_DELIVERMSG; + r = OK; } - return OK; + NOREC_RETURN(deliver, r); } char *flagstr(u32_t e, int dir) { static char str[80]; strcpy(str, ""); -#define FLAG(v) do { if(e & (v)) { strcat(str, #v " "); } } while(0) FLAG(I386_VM_PRESENT); FLAG(I386_VM_WRITE); FLAG(I386_VM_USER); FLAG(I386_VM_PWT); FLAG(I386_VM_PCD); + FLAG(I386_VM_GLOBAL); if(dir) FLAG(I386_VM_BIGPAGE); /* Page directory entry only */ else FLAG(I386_VM_DIRTY); /* Page table entry only */ - return str; } @@ -658,8 +701,9 @@ void vm_pt_print(u32_t *pagetable, u32_t v) if(!(pte_v & I386_VM_PRESENT)) continue; pfa = I386_VM_PFA(pte_v); - kprintf("%4d:%08lx:%08lx ", - pte, v + I386_PAGE_SIZE*pte, pfa); + kprintf("%4d:%08lx:%08lx %2s ", + pte, v + I386_PAGE_SIZE*pte, pfa, + (pte_v & I386_VM_WRITE) ? "rw":"RO"); col++; if(col == 3) { kprintf("\n"); col = 0; } } @@ -668,31 +712,85 @@ void vm_pt_print(u32_t *pagetable, u32_t v) return; } -/*===========================================================================* - * vm_print * - *===========================================================================*/ void vm_print(u32_t *root) { int pde; vmassert(!((u32_t) root % I386_PAGE_SIZE)); - for(pde = 0; pde < I386_VM_DIR_ENTRIES; pde++) { + printf("page table 0x%lx:\n", root); + + for(pde = 10; pde < I386_VM_DIR_ENTRIES; pde++) { u32_t pde_v; u32_t *pte_a; pde_v = phys_get32((u32_t) (root + pde)); if(!(pde_v & I386_VM_PRESENT)) continue; - pte_a = (u32_t *) I386_VM_PFA(pde_v); - kprintf("%4d: pt %08lx %s\n", - pde, pte_a, flagstr(pde_v, 1)); - vm_pt_print(pte_a, pde * I386_VM_PT_ENTRIES * I386_PAGE_SIZE); + if(pde_v & I386_VM_BIGPAGE) { + kprintf("%4d: 0x%lx, flags %s\n", + pde, I386_VM_PFA(pde_v), flagstr(pde_v, 1)); + } else { + pte_a = (u32_t *) I386_VM_PFA(pde_v); + kprintf("%4d: pt %08lx %s\n", + pde, pte_a, flagstr(pde_v, 1)); + vm_pt_print(pte_a, pde * I386_VM_PT_ENTRIES * I386_PAGE_SIZE); + kprintf("\n"); + } } return; } +u32_t thecr3; + +u32_t read_cr3(void) +{ + level0(getcr3val); + return thecr3; +} + + +/*===========================================================================* + * lin_memset * + *===========================================================================*/ +int vm_phys_memset(phys_bytes ph, u8_t c, phys_bytes bytes) +{ + char *v; + u32_t p; + NOREC_ENTER(physmemset); + + p = c | (c << 8) | (c << 16) | (c << 24); + + if(!vm_running) { + phys_memset(ph, p, bytes); + NOREC_RETURN(physmemset, OK); + } + + vmassert(nfreepdes >= 3); + + /* With VM, we have to map in the physical memory. + * We can do this 4MB at a time. + */ + while(bytes > 0) { + int pde, t; + vir_bytes chunk = bytes; + phys_bytes ptr; + inusepde = NOPDE; + CREATEPDE(((struct proc *) NULL), ptr, ph, chunk, bytes, pde, t); + /* We can memset as many bytes as we have remaining, + * or as many as remain in the 4MB chunk we mapped in. + */ + phys_memset(ptr, p, chunk); + DONEPDE(pde); + bytes -= chunk; + ph += chunk; + } + + + NOREC_RETURN(physmemset, OK); +} + /*===========================================================================* * virtual_copy_f * *===========================================================================*/ @@ -710,6 +808,7 @@ int vmcheck; /* if nonzero, can return VMSUSPEND */ int seg_index; int i, r; struct proc *procs[2]; + NOREC_ENTER(virtualcopy); /* Check copy count. */ if (bytes <= 0) return(EDOM); @@ -735,7 +834,9 @@ int vmcheck; /* if nonzero, can return VMSUSPEND */ switch(type) { case LOCAL_SEG: case LOCAL_VM_SEG: - if(!p) return EDEADSRCDST; + if(!p) { + NOREC_RETURN(virtualcopy, EDEADSRCDST); + } seg_index = vir_addr[i]->segment & SEGMENT_INDEX; if(type == LOCAL_SEG) phys_addr[i] = umap_local(p, seg_index, vir_addr[i]->offset, @@ -751,7 +852,9 @@ int vmcheck; /* if nonzero, can return VMSUSPEND */ } break; case REMOTE_SEG: - if(!p) return EDEADSRCDST; + if(!p) { + NOREC_RETURN(virtualcopy, EDEADSRCDST); + } seg_index = vir_addr[i]->segment & SEGMENT_INDEX; phys_addr[i] = umap_remote(p, seg_index, vir_addr[i]->offset, bytes); break; @@ -763,43 +866,96 @@ int vmcheck; /* if nonzero, can return VMSUSPEND */ case PHYS_SEG: phys_addr[i] = vir_addr[i]->offset; break; - case GRANT_SEG: - phys_addr[i] = umap_grant(p, vir_addr[i]->offset, bytes); - break; default: kprintf("virtual_copy: strange type 0x%x\n", type); - return(EINVAL); + NOREC_RETURN(virtualcopy, EINVAL); } /* Check if mapping succeeded. */ if (phys_addr[i] <= 0 && vir_addr[i]->segment != PHYS_SEG) { kprintf("virtual_copy EFAULT\n"); - return(EFAULT); + NOREC_RETURN(virtualcopy, EFAULT); } } - if(vmcheck && procs[_SRC_]) - CHECKRANGE_OR_SUSPEND(procs[_SRC_], phys_addr[_SRC_], bytes, 0); - if(vmcheck && procs[_DST_]) - CHECKRANGE_OR_SUSPEND(procs[_DST_], phys_addr[_DST_], bytes, 1); + if(vm_running) { + int r; + struct proc *caller; -#define NOPT(p) (!(p) || !HASPT(p)) - /* Now copy bytes between physical addresseses. */ - if(NOPT(procs[_SRC_]) && NOPT(procs[_DST_])) { - /* Without vm, address ranges actually are physical. */ - phys_copy(phys_addr[_SRC_], phys_addr[_DST_], (phys_bytes) bytes); - r = OK; - } else { - /* With vm, addresses need further interpretation. */ - r = vm_copy(phys_addr[_SRC_], procs[_SRC_], - phys_addr[_DST_], procs[_DST_], (phys_bytes) bytes); - if(r != OK) { - kprintf("vm_copy: %lx to %lx failed\n", - phys_addr[_SRC_],phys_addr[_DST_]); + caller = proc_addr(who_p); + + if(RTS_ISSET(caller, VMREQUEST)) { + struct proc *target; + int pn; + vmassert(caller->p_vmrequest.vmresult != VMSUSPEND); + RTS_LOCK_UNSET(caller, VMREQUEST); + if(caller->p_vmrequest.vmresult != OK) { + printf("virtual_copy: returning VM error %d\n", + caller->p_vmrequest.vmresult); + NOREC_RETURN(virtualcopy, caller->p_vmrequest.vmresult); + } + } + + if((r=lin_lin_copy(procs[_SRC_], phys_addr[_SRC_], + procs[_DST_], phys_addr[_DST_], bytes)) != OK) { + struct proc *target; + int wr; + phys_bytes lin; + if(r != EFAULT_SRC && r != EFAULT_DST) + minix_panic("lin_lin_copy failed", r); + if(!vmcheck) { + NOREC_RETURN(virtualcopy, r); + } + + vmassert(procs[_SRC_] && procs[_DST_]); + + if(r == EFAULT_SRC) { + lin = phys_addr[_SRC_]; + target = procs[_SRC_]; + wr = 0; + } else if(r == EFAULT_DST) { + lin = phys_addr[_DST_]; + target = procs[_DST_]; + wr = 1; + } else { + minix_panic("r strange", r); + } + +#if 0 + printf("virtual_copy: suspending caller %d / %s, target %d / %s\n", + caller->p_endpoint, caller->p_name, + target->p_endpoint, target->p_name); +#endif + + vmassert(k_reenter == -1); + vmassert(proc_ptr->p_endpoint == SYSTEM); + vm_suspend(caller, target, lin, bytes, wr, VMSTYPE_KERNELCALL); + + NOREC_RETURN(virtualcopy, VMSUSPEND); } + + NOREC_RETURN(virtualcopy, OK); } - return(r); + vmassert(!vm_running); + + /* can't copy to/from process with PT without VM */ +#define NOPT(p) (!(p) || !HASPT(p)) + if(!NOPT(procs[_SRC_])) { + kprintf("ignoring page table src: %s / %d at 0x%lx\n", + procs[_SRC_]->p_name, procs[_SRC_]->p_endpoint, procs[_SRC_]->p_seg.p_cr3); +} + if(!NOPT(procs[_DST_])) { + kprintf("ignoring page table dst: %s / %d at 0x%lx\n", + procs[_DST_]->p_name, procs[_DST_]->p_endpoint, + procs[_DST_]->p_seg.p_cr3); + } + + /* Now copy bytes between physical addresseses. */ + if(phys_copy(phys_addr[_SRC_], phys_addr[_DST_], (phys_bytes) bytes)) + NOREC_RETURN(virtualcopy, EFAULT); + + NOREC_RETURN(virtualcopy, OK); } /*===========================================================================* @@ -821,6 +977,25 @@ PUBLIC int data_copy( return virtual_copy(&src, &dst, bytes); } +/*===========================================================================* + * data_copy_vmcheck * + *===========================================================================*/ +PUBLIC int data_copy_vmcheck( + endpoint_t from_proc, vir_bytes from_addr, + endpoint_t to_proc, vir_bytes to_addr, + size_t bytes) +{ + struct vir_addr src, dst; + + src.segment = dst.segment = D; + src.offset = from_addr; + dst.offset = to_addr; + src.proc_nr_e = from_proc; + dst.proc_nr_e = to_proc; + + return virtual_copy_vmcheck(&src, &dst, bytes); +} + /*===========================================================================* * arch_pre_exec * *===========================================================================*/ @@ -852,4 +1027,10 @@ PUBLIC int arch_umap(struct proc *pr, vir_bytes offset, vir_bytes count, return EINVAL; } - +/* VM reports page directory slot we're allowed to use freely. */ +void i386_freepde(int pde) +{ + if(nfreepdes >= WANT_FREEPDES) + return; + freepdes[nfreepdes++] = pde; +} diff --git a/kernel/arch/i386/mpx386.s b/kernel/arch/i386/mpx386.s index 066df8573..3c2248c48 100755 --- a/kernel/arch/i386/mpx386.s +++ b/kernel/arch/i386/mpx386.s @@ -60,7 +60,6 @@ begbss: #include #include #include "../../const.h" -#include "vm.h" #include "sconst.h" /* Selected 386 tss offsets. */ @@ -74,9 +73,8 @@ begbss: .define _restart .define save -.define _kernel_cr3 -.define _pagefault_cr2 -.define _pagefault_count +.define _reload_cr3 +.define _write_cr3 ! write cr3 .define errexception .define exception1 @@ -101,6 +99,8 @@ begbss: .define _params_size .define _params_offset .define _mon_ds +.define _schedcheck +.define _dirtypde .define _hwint00 ! handlers for hardware interrupts .define _hwint01 @@ -218,12 +218,6 @@ csinit: ltr ax push 0 ! set flags to known good state popf ! esp, clear nested task and int enable -#if VM_KERN_NOPAGEZERO - jmp laststep - -.align I386_PAGE_SIZE -laststep: -#endif jmp _main ! main() @@ -239,7 +233,6 @@ laststep: #define hwint_master(irq) \ call save /* save interrupted process state */;\ push (_irq_handlers+4*irq) /* irq_handlers[irq] */;\ - LOADCR3WITHEAX(irq, (_kernel_cr3)) /* switch to kernel page table */;\ call _intr_handle /* intr_handle(irq_handlers[irq]) */;\ pop ecx ;\ cmp (_irq_actids+4*irq), 0 /* interrupt still active? */;\ @@ -291,7 +284,6 @@ _hwint07: ! Interrupt routine for irq 7 (printer) #define hwint_slave(irq) \ call save /* save interrupted process state */;\ push (_irq_handlers+4*irq) /* irq_handlers[irq] */;\ - LOADCR3WITHEAX(irq, (_kernel_cr3)) /* switch to kernel page table */;\ call _intr_handle /* intr_handle(irq_handlers[irq]) */;\ pop ecx ;\ cmp (_irq_actids+4*irq), 0 /* interrupt still active? */;\ @@ -398,11 +390,9 @@ _p_s_call: push eax ! source / destination push ecx ! call number (ipc primitive to use) -! LOADCR3WITHEAX(0x20, (_kernel_cr3)) - call _sys_call ! sys_call(call_nr, src_dst, m_ptr, bit_map) ! caller is now explicitly in proc_ptr - mov AXREG(esi), eax ! sys_call MUST PRESERVE si + mov AXREG(esi), eax ! Fall into code to restart proc/task running. @@ -413,14 +403,21 @@ _restart: ! Restart the current process or the next process if it is set. - cmp (_next_ptr), 0 ! see if another process is scheduled - jz 0f - mov eax, (_next_ptr) - mov (_proc_ptr), eax ! schedule new process - mov (_next_ptr), 0 -0: mov esp, (_proc_ptr) ! will assume P_STACKBASE == 0 + cli + call _schedcheck ! ask C function who we're running + mov esp, (_proc_ptr) ! will assume P_STACKBASE == 0 lldt P_LDT_SEL(esp) ! enable process' segment descriptors - LOADCR3WITHEAX(0x21, P_CR3(esp)) ! switch to process page table + cmp P_CR3(esp), 0 ! process does not have its own PT + jz 0f + mov eax, P_CR3(esp) + cmp eax, (loadedcr3) + jz 0f + mov cr3, eax + mov (loadedcr3), eax + mov eax, (_proc_ptr) + mov (_ptproc), eax + mov (_dirtypde), 0 +0: lea eax, P_STACKTOP(esp) ! arrange for next interrupt mov (_tss+TSS3_S_SP0), eax ! to save state in process table restart1: @@ -496,8 +493,7 @@ _page_fault: push PAGE_FAULT_VECTOR push eax mov eax, cr2 -sseg mov (_pagefault_cr2), eax -sseg inc (_pagefault_count) +sseg mov (pagefaultcr2), eax pop eax jmp errexception @@ -526,19 +522,26 @@ errexception: sseg pop (ex_number) sseg pop (trap_errno) exception1: ! Common for all exceptions. + sseg mov (old_eax_ptr), esp ! where will eax be saved? + sseg sub (old_eax_ptr), PCREG-AXREG ! here + push eax ! eax is scratch register mov eax, 0+4(esp) ! old eip sseg mov (old_eip), eax + mov eax, esp + add eax, 4 + sseg mov (old_eip_ptr), eax movzx eax, 4+4(esp) ! old cs sseg mov (old_cs), eax mov eax, 8+4(esp) ! old eflags sseg mov (old_eflags), eax - LOADCR3WITHEAX(0x24, (_kernel_cr3)) - pop eax call save + push (pagefaultcr2) + push (old_eax_ptr) + push (old_eip_ptr) push (old_eflags) push (old_cs) push (old_eip) @@ -546,34 +549,53 @@ exception1: ! Common for all exceptions. push (ex_number) call _exception ! (ex_number, trap_errno, old_eip, ! old_cs, old_eflags) - add esp, 5*4 + add esp, 8*4 ret + !*===========================================================================* -!* level0_call * +!* write_cr3 * !*===========================================================================* -_level0_call: - call save - jmp (_level0_func) +! PUBLIC void write_cr3(unsigned long value); +_write_cr3: + push ebp + mov ebp, esp + mov eax, 8(ebp) + cmp eax, (loadedcr3) + jz 0f + mov cr3, eax + mov (loadedcr3), eax + mov (_dirtypde), 0 +0: + pop ebp + ret !*===========================================================================* -!* load_kernel_cr3 * +!* reload_cr3 * !*===========================================================================* -.align 16 -_load_kernel_cr3: - mov eax, (_kernel_cr3) - mov cr3, eax +! PUBLIC void reload_cr3(void); +_reload_cr3: + push ebp + mov ebp, esp + mov (_dirtypde), 0 + mov eax, cr3 + mov cr3, eax + pop ebp ret +!*===========================================================================* +!* level0_call * +!*===========================================================================* +_level0_call: + call save + jmp (_level0_func) + !*===========================================================================* !* data * !*===========================================================================* .sect .rom ! Before the string table please .data2 0x526F ! this must be the first data entry (magic #) -#if VM_KERN_NOPAGEZERO -.align I386_PAGE_SIZE -#endif .sect .bss k_stack: @@ -581,7 +603,11 @@ k_stack: k_stktop: ! top of kernel stack .comm ex_number, 4 .comm trap_errno, 4 + .comm old_eip_ptr, 4 + .comm old_eax_ptr, 4 .comm old_eip, 4 .comm old_cs, 4 .comm old_eflags, 4 + .comm pagefaultcr2, 4 + .comm loadedcr3, 4 diff --git a/kernel/arch/i386/protect.c b/kernel/arch/i386/protect.c index 398f8d19c..b2ae9eaff 100755 --- a/kernel/arch/i386/protect.c +++ b/kernel/arch/i386/protect.c @@ -167,6 +167,11 @@ PUBLIC void prot_init(void) unsigned ldt_index; register struct proc *rp; + /* Click-round kernel. */ + if(kinfo.data_base % CLICK_SIZE) + minix_panic("kinfo.data_base not aligned", NO_NUM); + kinfo.data_size = ((kinfo.data_size+CLICK_SIZE-1)/CLICK_SIZE) * CLICK_SIZE; + /* Build gdt and idt pointers in GDT where the BIOS expects them. */ dtp= (struct desctableptr_s *) &gdt[GDT_INDEX]; * (u16_t *) dtp->limit = (sizeof gdt) - 1; @@ -334,3 +339,118 @@ PUBLIC void alloc_segments(register struct proc *rp) rp->p_reg.ds = (DS_LDT_INDEX*DESC_SIZE) | TI | privilege; } +/*===========================================================================* + * printseg * + *===========================================================================*/ +PUBLIC void printseg(char *banner, int iscs, struct proc *pr, u32_t selector) +{ + u32_t base, limit, index, dpl; + struct segdesc_s *desc; + + if(banner) { kprintf("%s", banner); } + + index = selector >> 3; + + kprintf("RPL %d, ind %d of ", + (selector & RPL_MASK), index); + + if(selector & TI) { + kprintf("LDT"); + if(index < 0 || index >= LDT_SIZE) { + kprintf("invalid index in ldt\n"); + return; + } + desc = &pr->p_seg.p_ldt[index]; + } else { + kprintf("GDT"); + if(index < 0 || index >= GDT_SIZE) { + kprintf("invalid index in gdt\n"); + return; + } + desc = &gdt[index]; + } + + limit = desc->limit_low | + (((u32_t) desc->granularity & LIMIT_HIGH) << GRANULARITY_SHIFT); + + if(desc->granularity & GRANULAR) { + limit = (limit << PAGE_GRAN_SHIFT) + 0xfff; + } + + base = desc->base_low | + ((u32_t) desc->base_middle << BASE_MIDDLE_SHIFT) | + ((u32_t) desc->base_high << BASE_HIGH_SHIFT); + + kprintf(" -> base 0x%08lx size 0x%08lx ", base, limit+1); + + if(iscs) { + if(!(desc->granularity & BIG)) + kprintf("16bit "); + } else { + if(!(desc->granularity & BIG)) + kprintf("not big "); + } + + if(desc->granularity & 0x20) { /* reserved */ + minix_panic("granularity reserved field set", NO_NUM); + } + + if(!(desc->access & PRESENT)) + kprintf("notpresent "); + + if(!(desc->access & SEGMENT)) + kprintf("system "); + + if(desc->access & EXECUTABLE) { + kprintf(" exec "); + if(desc->access & CONFORMING) kprintf("conforming "); + if(!(desc->access & READABLE)) kprintf("non-readable "); + } else { + kprintf("nonexec "); + if(desc->access & EXPAND_DOWN) kprintf("non-expand-down "); + if(!(desc->access & WRITEABLE)) kprintf("non-writable "); + } + + if(!(desc->access & ACCESSED)) { + kprintf("nonacc "); + } + + dpl = ((u32_t) desc->access & DPL) >> DPL_SHIFT; + + kprintf("DPL %d\n", dpl); + + return; +} + +/*===========================================================================* + * prot_set_kern_seg_limit * + *===========================================================================*/ +PUBLIC int prot_set_kern_seg_limit(vir_bytes limit) +{ + struct proc *rp; + vir_bytes prev; + int orig_click; + int incr_clicks; + + if(limit <= kinfo.data_base) { + kprintf("prot_set_kern_seg_limit: limit bogus\n"); + return EINVAL; + } + + /* Do actual increase. */ + orig_click = kinfo.data_size / CLICK_SIZE; + kinfo.data_size = limit - kinfo.data_base; + incr_clicks = kinfo.data_size / CLICK_SIZE - orig_click; + + prot_init(); + + /* Increase kernel processes too. */ + for (rp = BEG_PROC_ADDR; rp < END_PROC_ADDR; ++rp) { + if (RTS_ISSET(rp, SLOT_FREE) || !iskernelp(rp)) + continue; + rp->p_memmap[S].mem_len += incr_clicks; + alloc_segments(rp); + } + + return OK; +} diff --git a/kernel/arch/i386/proto.h b/kernel/arch/i386/proto.h index 8b18f0c91..5ab71ea57 100644 --- a/kernel/arch/i386/proto.h +++ b/kernel/arch/i386/proto.h @@ -49,11 +49,17 @@ _PROTOTYPE( void vir_insb, (u16_t port, struct proc *proc, u32_t vir, size_t cou _PROTOTYPE( void vir_outsb, (u16_t port, struct proc *proc, u32_t vir, size_t count)); _PROTOTYPE( void vir_insw, (u16_t port, struct proc *proc, u32_t vir, size_t count)); _PROTOTYPE( void vir_outsw, (u16_t port, struct proc *proc, u32_t vir, size_t count)); +_PROTOTYPE( void i386_updatepde, (int pde, u32_t val)); +_PROTOTYPE( void i386_freepde, (int pde)); +_PROTOTYPE( void getcr3val, (void)); +_PROTOTYPE( void switchedcr3, (void)); +_PROTOTYPE( void vm_set_cr3, (struct proc *)); /* exception.c */ _PROTOTYPE( void exception, (unsigned vec_nr, u32_t trap_errno, - u32_t old_eip, U16_t old_cs, u32_t old_eflags) ); + u32_t old_eip, U16_t old_cs, u32_t old_eflags, + u32_t *old_eip_ptr, u32_t *old_eax_ptr, u32_t pagefaultcr2) ); /* klib386.s */ _PROTOTYPE( void level0, (void (*func)(void)) ); @@ -70,7 +76,12 @@ _PROTOTYPE( void phys_insb, (U16_t port, phys_bytes buf, size_t count) ); _PROTOTYPE( void phys_insw, (U16_t port, phys_bytes buf, size_t count) ); _PROTOTYPE( void phys_outsb, (U16_t port, phys_bytes buf, size_t count) ); _PROTOTYPE( void phys_outsw, (U16_t port, phys_bytes buf, size_t count) ); -_PROTOTYPE( void i386_invlpg, (U32_t addr) ); +_PROTOTYPE( void i386_invlpg_level0, (void) ); +_PROTOTYPE( int _memcpy_k, (void *dst, void *src, size_t n) ); +_PROTOTYPE( int _memcpy_k_fault, (void) ); +_PROTOTYPE( u32_t read_cr3, (void) ); +_PROTOTYPE( void reload_cr3, (void) ); +_PROTOTYPE( void phys_memset, (phys_bytes ph, u32_t c, phys_bytes bytes) ); /* protect.c */ _PROTOTYPE( void prot_init, (void) ); @@ -79,6 +90,8 @@ _PROTOTYPE( void init_codeseg, (struct segdesc_s *segdp, phys_bytes base, _PROTOTYPE( void init_dataseg, (struct segdesc_s *segdp, phys_bytes base, vir_bytes size, int privilege) ); _PROTOTYPE( void enable_iop, (struct proc *pp) ); +_PROTOTYPE( int prot_set_kern_seg_limit, (vir_bytes limit) ); +_PROTOTYPE( void printseg, (char *banner, int iscs, struct proc *pr, u32_t selector) ); /* prototype of an interrupt vector table entry */ struct gate_table_s { diff --git a/kernel/arch/i386/sha1.h b/kernel/arch/i386/sha1.h new file mode 100644 index 000000000..dbfdff356 --- /dev/null +++ b/kernel/arch/i386/sha1.h @@ -0,0 +1,551 @@ +/* sha1.c : Implementation of the Secure Hash Algorithm */ + +/* SHA: NIST's Secure Hash Algorithm */ + +/* This version written November 2000 by David Ireland of + DI Management Services Pty Limited + + Adapted from code in the Python Cryptography Toolkit, + version 1.0.0 by A.M. Kuchling 1995. +*/ + +/* AM Kuchling's posting:- + Based on SHA code originally posted to sci.crypt by Peter Gutmann + in message <30ajo5$oe8@ccu2.auckland.ac.nz>. + Modified to test for endianness on creation of SHA objects by AMK. + Also, the original specification of SHA was found to have a weakness + by NSA/NIST. This code implements the fixed version of SHA. +*/ + +/* Here's the first paragraph of Peter Gutmann's posting: + +The following is my SHA (FIPS 180) code updated to allow use of the "fixed" +SHA, thanks to Jim Gillogly and an anonymous contributor for the information on +what's changed in the new version. The fix is a simple change which involves +adding a single rotate in the initial expansion function. It is unknown +whether this is an optimal solution to the problem which was discovered in the +SHA or whether it's simply a bandaid which fixes the problem with a minimum of +effort (for example the reengineering of a great many Capstone chips). +*/ + +/* h files included here to make this just one file ... */ + +/* global.h */ + +#ifndef _GLOBAL_H_ +#define _GLOBAL_H_ 1 + +/* POINTER defines a generic pointer type */ +typedef unsigned char *POINTER; + +/* UINT4 defines a four byte word */ +typedef unsigned long int UINT4; + +/* SHA1BYTE defines a unsigned character */ +typedef unsigned char SHA1BYTE; + +#endif /* end _GLOBAL_H_ */ + +/* sha.h */ + +#ifndef _SHA_H_ +#define _SHA_H_ 1 + +/* #include "global.h" */ + +/* The structure for storing SHS info */ + +typedef struct +{ + UINT4 digest[ 5 ]; /* Message digest */ + UINT4 countLo, countHi; /* 64-bit bit count */ + UINT4 data[ 16 ]; /* SHS data buffer */ + int Endianness; +} SHA_CTX; + +/* Message digest functions */ + +void SHAInit(SHA_CTX *); +void SHAUpdate(SHA_CTX *, SHA1BYTE *buffer, int count); +void SHAFinal(SHA1BYTE *output, SHA_CTX *); + +#endif /* end _SHA_H_ */ + +/* endian.h */ + +#ifndef _ENDIAN_H_ +#define _ENDIAN_H_ 1 + +void endianTest(int *endianness); + +#endif /* end _ENDIAN_H_ */ + + +/* sha.c */ + +#include +#include + +static void SHAtoByte(SHA1BYTE *output, UINT4 *input, unsigned int len); + +/* The SHS block size and message digest sizes, in bytes */ + +#define SHS_DATASIZE 64 +#define SHS_DIGESTSIZE 20 + + +/* The SHS f()-functions. The f1 and f3 functions can be optimized to + save one boolean operation each - thanks to Rich Schroeppel, + rcs@cs.arizona.edu for discovering this */ + +/*#define f1(x,y,z) ( ( x & y ) | ( ~x & z ) ) // Rounds 0-19 */ +#define f1(x,y,z) ( z ^ ( x & ( y ^ z ) ) ) /* Rounds 0-19 */ +#define f2(x,y,z) ( x ^ y ^ z ) /* Rounds 20-39 */ +/*#define f3(x,y,z) ( ( x & y ) | ( x & z ) | ( y & z ) ) // Rounds 40-59 */ +#define f3(x,y,z) ( ( x & y ) | ( z & ( x | y ) ) ) /* Rounds 40-59 */ +#define f4(x,y,z) ( x ^ y ^ z ) /* Rounds 60-79 */ + +/* The SHS Mysterious Constants */ + +#define K1 0x5A827999L /* Rounds 0-19 */ +#define K2 0x6ED9EBA1L /* Rounds 20-39 */ +#define K3 0x8F1BBCDCL /* Rounds 40-59 */ +#define K4 0xCA62C1D6L /* Rounds 60-79 */ + +/* SHS initial values */ + +#define h0init 0x67452301L +#define h1init 0xEFCDAB89L +#define h2init 0x98BADCFEL +#define h3init 0x10325476L +#define h4init 0xC3D2E1F0L + +/* Note that it may be necessary to add parentheses to these macros if they + are to be called with expressions as arguments */ +/* 32-bit rotate left - kludged with shifts */ + +#define ROTL(n,X) ( ( ( X ) << n ) | ( ( X ) >> ( 32 - n ) ) ) + +/* The initial expanding function. The hash function is defined over an + 80-UINT2 expanded input array W, where the first 16 are copies of the input + data, and the remaining 64 are defined by + + W[ i ] = W[ i - 16 ] ^ W[ i - 14 ] ^ W[ i - 8 ] ^ W[ i - 3 ] + + This implementation generates these values on the fly in a circular + buffer - thanks to Colin Plumb, colin@nyx10.cs.du.edu for this + optimization. + + The updated SHS changes the expanding function by adding a rotate of 1 + bit. Thanks to Jim Gillogly, jim@rand.org, and an anonymous contributor + for this information */ + +#define expand(W,i) ( W[ i & 15 ] = ROTL( 1, ( W[ i & 15 ] ^ W[ (i - 14) & 15 ] ^ \ + W[ (i - 8) & 15 ] ^ W[ (i - 3) & 15 ] ) ) ) + + +/* The prototype SHS sub-round. The fundamental sub-round is: + + a' = e + ROTL( 5, a ) + f( b, c, d ) + k + data; + b' = a; + c' = ROTL( 30, b ); + d' = c; + e' = d; + + but this is implemented by unrolling the loop 5 times and renaming the + variables ( e, a, b, c, d ) = ( a', b', c', d', e' ) each iteration. + This code is then replicated 20 times for each of the 4 functions, using + the next 20 values from the W[] array each time */ + +#define subRound(a, b, c, d, e, f, k, data) \ + ( e += ROTL( 5, a ) + f( b, c, d ) + k + data, b = ROTL( 30, b ) ) + +/* Initialize the SHS values */ + +void SHAInit(SHA_CTX *shsInfo) +{ + endianTest(&shsInfo->Endianness); + /* Set the h-vars to their initial values */ + shsInfo->digest[ 0 ] = h0init; + shsInfo->digest[ 1 ] = h1init; + shsInfo->digest[ 2 ] = h2init; + shsInfo->digest[ 3 ] = h3init; + shsInfo->digest[ 4 ] = h4init; + + /* Initialise bit count */ + shsInfo->countLo = shsInfo->countHi = 0; +} + +/* Perform the SHS transformation. Note that this code, like MD5, seems to + break some optimizing compilers due to the complexity of the expressions + and the size of the basic block. It may be necessary to split it into + sections, e.g. based on the four subrounds + + Note that this corrupts the shsInfo->data area */ + +static void SHSTransform( UINT4 *digest, UINT4 *data ) + { + UINT4 A, B, C, Dv, E; /* Local vars */ + UINT4 eData[ 16 ]; /* Expanded data */ + + /* Set up first buffer and local data buffer */ + A = digest[ 0 ]; + B = digest[ 1 ]; + C = digest[ 2 ]; + Dv = digest[ 3 ]; + E = digest[ 4 ]; + memcpy( (POINTER)eData, (POINTER)data, SHS_DATASIZE ); + + /* Heavy mangling, in 4 sub-rounds of 20 interations each. */ + subRound( A, B, C, Dv, E, f1, K1, eData[ 0 ] ); + subRound( E, A, B, C, Dv, f1, K1, eData[ 1 ] ); + subRound( Dv, E, A, B, C, f1, K1, eData[ 2 ] ); + subRound( C, Dv, E, A, B, f1, K1, eData[ 3 ] ); + subRound( B, C, Dv, E, A, f1, K1, eData[ 4 ] ); + subRound( A, B, C, Dv, E, f1, K1, eData[ 5 ] ); + subRound( E, A, B, C, Dv, f1, K1, eData[ 6 ] ); + subRound( Dv, E, A, B, C, f1, K1, eData[ 7 ] ); + subRound( C, Dv, E, A, B, f1, K1, eData[ 8 ] ); + subRound( B, C, Dv, E, A, f1, K1, eData[ 9 ] ); + subRound( A, B, C, Dv, E, f1, K1, eData[ 10 ] ); + subRound( E, A, B, C, Dv, f1, K1, eData[ 11 ] ); + subRound( Dv, E, A, B, C, f1, K1, eData[ 12 ] ); + subRound( C, Dv, E, A, B, f1, K1, eData[ 13 ] ); + subRound( B, C, Dv, E, A, f1, K1, eData[ 14 ] ); + subRound( A, B, C, Dv, E, f1, K1, eData[ 15 ] ); + subRound( E, A, B, C, Dv, f1, K1, expand( eData, 16 ) ); + subRound( Dv, E, A, B, C, f1, K1, expand( eData, 17 ) ); + subRound( C, Dv, E, A, B, f1, K1, expand( eData, 18 ) ); + subRound( B, C, Dv, E, A, f1, K1, expand( eData, 19 ) ); + + subRound( A, B, C, Dv, E, f2, K2, expand( eData, 20 ) ); + subRound( E, A, B, C, Dv, f2, K2, expand( eData, 21 ) ); + subRound( Dv, E, A, B, C, f2, K2, expand( eData, 22 ) ); + subRound( C, Dv, E, A, B, f2, K2, expand( eData, 23 ) ); + subRound( B, C, Dv, E, A, f2, K2, expand( eData, 24 ) ); + subRound( A, B, C, Dv, E, f2, K2, expand( eData, 25 ) ); + subRound( E, A, B, C, Dv, f2, K2, expand( eData, 26 ) ); + subRound( Dv, E, A, B, C, f2, K2, expand( eData, 27 ) ); + subRound( C, Dv, E, A, B, f2, K2, expand( eData, 28 ) ); + subRound( B, C, Dv, E, A, f2, K2, expand( eData, 29 ) ); + subRound( A, B, C, Dv, E, f2, K2, expand( eData, 30 ) ); + subRound( E, A, B, C, Dv, f2, K2, expand( eData, 31 ) ); + subRound( Dv, E, A, B, C, f2, K2, expand( eData, 32 ) ); + subRound( C, Dv, E, A, B, f2, K2, expand( eData, 33 ) ); + subRound( B, C, Dv, E, A, f2, K2, expand( eData, 34 ) ); + subRound( A, B, C, Dv, E, f2, K2, expand( eData, 35 ) ); + subRound( E, A, B, C, Dv, f2, K2, expand( eData, 36 ) ); + subRound( Dv, E, A, B, C, f2, K2, expand( eData, 37 ) ); + subRound( C, Dv, E, A, B, f2, K2, expand( eData, 38 ) ); + subRound( B, C, Dv, E, A, f2, K2, expand( eData, 39 ) ); + + subRound( A, B, C, Dv, E, f3, K3, expand( eData, 40 ) ); + subRound( E, A, B, C, Dv, f3, K3, expand( eData, 41 ) ); + subRound( Dv, E, A, B, C, f3, K3, expand( eData, 42 ) ); + subRound( C, Dv, E, A, B, f3, K3, expand( eData, 43 ) ); + subRound( B, C, Dv, E, A, f3, K3, expand( eData, 44 ) ); + subRound( A, B, C, Dv, E, f3, K3, expand( eData, 45 ) ); + subRound( E, A, B, C, Dv, f3, K3, expand( eData, 46 ) ); + subRound( Dv, E, A, B, C, f3, K3, expand( eData, 47 ) ); + subRound( C, Dv, E, A, B, f3, K3, expand( eData, 48 ) ); + subRound( B, C, Dv, E, A, f3, K3, expand( eData, 49 ) ); + subRound( A, B, C, Dv, E, f3, K3, expand( eData, 50 ) ); + subRound( E, A, B, C, Dv, f3, K3, expand( eData, 51 ) ); + subRound( Dv, E, A, B, C, f3, K3, expand( eData, 52 ) ); + subRound( C, Dv, E, A, B, f3, K3, expand( eData, 53 ) ); + subRound( B, C, Dv, E, A, f3, K3, expand( eData, 54 ) ); + subRound( A, B, C, Dv, E, f3, K3, expand( eData, 55 ) ); + subRound( E, A, B, C, Dv, f3, K3, expand( eData, 56 ) ); + subRound( Dv, E, A, B, C, f3, K3, expand( eData, 57 ) ); + subRound( C, Dv, E, A, B, f3, K3, expand( eData, 58 ) ); + subRound( B, C, Dv, E, A, f3, K3, expand( eData, 59 ) ); + + subRound( A, B, C, Dv, E, f4, K4, expand( eData, 60 ) ); + subRound( E, A, B, C, Dv, f4, K4, expand( eData, 61 ) ); + subRound( Dv, E, A, B, C, f4, K4, expand( eData, 62 ) ); + subRound( C, Dv, E, A, B, f4, K4, expand( eData, 63 ) ); + subRound( B, C, Dv, E, A, f4, K4, expand( eData, 64 ) ); + subRound( A, B, C, Dv, E, f4, K4, expand( eData, 65 ) ); + subRound( E, A, B, C, Dv, f4, K4, expand( eData, 66 ) ); + subRound( Dv, E, A, B, C, f4, K4, expand( eData, 67 ) ); + subRound( C, Dv, E, A, B, f4, K4, expand( eData, 68 ) ); + subRound( B, C, Dv, E, A, f4, K4, expand( eData, 69 ) ); + subRound( A, B, C, Dv, E, f4, K4, expand( eData, 70 ) ); + subRound( E, A, B, C, Dv, f4, K4, expand( eData, 71 ) ); + subRound( Dv, E, A, B, C, f4, K4, expand( eData, 72 ) ); + subRound( C, Dv, E, A, B, f4, K4, expand( eData, 73 ) ); + subRound( B, C, Dv, E, A, f4, K4, expand( eData, 74 ) ); + subRound( A, B, C, Dv, E, f4, K4, expand( eData, 75 ) ); + subRound( E, A, B, C, Dv, f4, K4, expand( eData, 76 ) ); + subRound( Dv, E, A, B, C, f4, K4, expand( eData, 77 ) ); + subRound( C, Dv, E, A, B, f4, K4, expand( eData, 78 ) ); + subRound( B, C, Dv, E, A, f4, K4, expand( eData, 79 ) ); + + /* Build message digest */ + digest[ 0 ] += A; + digest[ 1 ] += B; + digest[ 2 ] += C; + digest[ 3 ] += Dv; + digest[ 4 ] += E; + } + +/* When run on a little-endian CPU we need to perform byte reversal on an + array of long words. */ + +static void longReverse(UINT4 *buffer, int byteCount, int Endianness ) +{ + UINT4 value; + + if (Endianness) return; + byteCount /= sizeof( UINT4 ); + while( byteCount-- ) + { + value = *buffer; + value = ( ( value & 0xFF00FF00L ) >> 8 ) | \ + ( ( value & 0x00FF00FFL ) << 8 ); + *buffer++ = ( value << 16 ) | ( value >> 16 ); + } +} + +/* Update SHS for a block of data */ + +void SHAUpdate(SHA_CTX *shsInfo, SHA1BYTE *buffer, int count) +{ + UINT4 tmp; + int dataCount; + + /* Update bitcount */ + tmp = shsInfo->countLo; + if ( ( shsInfo->countLo = tmp + ( ( UINT4 ) count << 3 ) ) < tmp ) + shsInfo->countHi++; /* Carry from low to high */ + shsInfo->countHi += count >> 29; + + /* Get count of bytes already in data */ + dataCount = ( int ) ( tmp >> 3 ) & 0x3F; + + /* Handle any leading odd-sized chunks */ + if( dataCount ) + { + SHA1BYTE *p = ( SHA1BYTE * ) shsInfo->data + dataCount; + + dataCount = SHS_DATASIZE - dataCount; + if( count < dataCount ) + { + memcpy( p, buffer, count ); + return; + } + memcpy( p, buffer, dataCount ); + longReverse( shsInfo->data, SHS_DATASIZE, shsInfo->Endianness); + SHSTransform( shsInfo->digest, shsInfo->data ); + buffer += dataCount; + count -= dataCount; + } + + /* Process data in SHS_DATASIZE chunks */ + while( count >= SHS_DATASIZE ) + { + memcpy( (POINTER)shsInfo->data, (POINTER)buffer, SHS_DATASIZE ); + longReverse( shsInfo->data, SHS_DATASIZE, shsInfo->Endianness ); + SHSTransform( shsInfo->digest, shsInfo->data ); + buffer += SHS_DATASIZE; + count -= SHS_DATASIZE; + } + + /* Handle any remaining bytes of data. */ + memcpy( (POINTER)shsInfo->data, (POINTER)buffer, count ); + } + +/* Final wrapup - pad to SHS_DATASIZE-byte boundary with the bit pattern + 1 0* (64-bit count of bits processed, MSB-first) */ + +void SHAFinal(SHA1BYTE *output, SHA_CTX *shsInfo) +{ + int count; + SHA1BYTE *dataPtr; + + /* Compute number of bytes mod 64 */ + count = ( int ) shsInfo->countLo; + count = ( count >> 3 ) & 0x3F; + + /* Set the first char of padding to 0x80. This is safe since there is + always at least one byte free */ + dataPtr = ( SHA1BYTE * ) shsInfo->data + count; + *dataPtr++ = 0x80; + + /* Bytes of padding needed to make 64 bytes */ + count = SHS_DATASIZE - 1 - count; + + /* Pad out to 56 mod 64 */ + if( count < 8 ) + { + /* Two lots of padding: Pad the first block to 64 bytes */ + memset( dataPtr, 0, count ); + longReverse( shsInfo->data, SHS_DATASIZE, shsInfo->Endianness ); + SHSTransform( shsInfo->digest, shsInfo->data ); + + /* Now fill the next block with 56 bytes */ + memset( (POINTER)shsInfo->data, 0, SHS_DATASIZE - 8 ); + } + else + /* Pad block to 56 bytes */ + memset( dataPtr, 0, count - 8 ); + + /* Append length in bits and transform */ + shsInfo->data[ 14 ] = shsInfo->countHi; + shsInfo->data[ 15 ] = shsInfo->countLo; + + longReverse( shsInfo->data, SHS_DATASIZE - 8, shsInfo->Endianness ); + SHSTransform( shsInfo->digest, shsInfo->data ); + + /* Output to an array of bytes */ + SHAtoByte(output, shsInfo->digest, SHS_DIGESTSIZE); + + /* Zeroise sensitive stuff */ + memset((POINTER)shsInfo, 0, sizeof(shsInfo)); +} + +static void SHAtoByte(SHA1BYTE *output, UINT4 *input, unsigned int len) +{ /* Output SHA digest in byte array */ + unsigned int i, j; + + for(i = 0, j = 0; j < len; i++, j += 4) + { + output[j+3] = (SHA1BYTE)( input[i] & 0xff); + output[j+2] = (SHA1BYTE)((input[i] >> 8 ) & 0xff); + output[j+1] = (SHA1BYTE)((input[i] >> 16) & 0xff); + output[j ] = (SHA1BYTE)((input[i] >> 24) & 0xff); + } +} + + +unsigned char digest[SHS_DIGESTSIZE]; +unsigned char testmessage[3] = {'a', 'b', 'c' }; +unsigned char *mess56 = (unsigned char *) + "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"; + +/* Correct solutions from FIPS PUB 180-1 */ +char *dig1 = "A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D"; +char *dig2 = "84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1"; +char *dig3 = "34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F"; + +/* Output should look like:- + a9993e36 4706816a ba3e2571 7850c26c 9cd0d89d + A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D <= correct + 84983e44 1c3bd26e baae4aa1 f95129e5 e54670f1 + 84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1 <= correct + 34aa973c d4c4daa4 f61eeb2b dbad2731 6534016f + 34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F <= correct +*/ + +void sha1test(void) +{ + SHA_CTX sha; + int i; + SHA1BYTE big[1000]; + + SHAInit(&sha); + SHAUpdate(&sha, testmessage, 3); + SHAFinal(digest, &sha); + + for (i = 0; i < SHS_DIGESTSIZE; i++) + { + if ((i % 4) == 0) printf(" "); + printf("%02x", digest[i]); + } + printf("\n"); + printf(" %s <= correct\n", dig1); + + SHAInit(&sha); + SHAUpdate(&sha, mess56, 56); + SHAFinal(digest, &sha); + + for (i = 0; i < SHS_DIGESTSIZE; i++) + { + if ((i % 4) == 0) printf(" "); + printf("%02x", digest[i]); + } + printf("\n"); + printf(" %s <= correct\n", dig2); + + /* Fill up big array */ + for (i = 0; i < 1000; i++) + big[i] = 'a'; + + SHAInit(&sha); + /* Digest 1 million x 'a' */ + for (i = 0; i < 1000; i++) + SHAUpdate(&sha, big, 1000); + SHAFinal(digest, &sha); + + for (i = 0; i < SHS_DIGESTSIZE; i++) + { + if ((i % 4) == 0) printf(" "); + printf("%02x", digest[i]); + } + printf("\n"); + printf(" %s <= correct\n", dig3); +} + +/* endian.c */ + +void endianTest(int *endian_ness) +{ + if((*(unsigned short *) ("#S") >> 8) == '#') + { + /* printf("Big endian = no change\n"); */ + *endian_ness = !(0); + } + else + { + /* printf("Little endian = swap\n"); */ + *endian_ness = 0; + } +} + +static char * +sha1print(char *digest) +{ + int i; + for(i = 0; i < SHS_DIGESTSIZE; i++) { + printf("%02x", (unsigned char) digest[i]); + } + printf("\n"); +} + +static int +phys_sha1(unsigned long ptr, unsigned long bytes, unsigned char *digest) +{ + unsigned long addr = 0; + SHA_CTX sha; + + SHAInit(&sha); + + while(bytes > 0) { + unsigned long chunk; + static unsigned char buf[1024]; + chunk = bytes > sizeof(buf) ? sizeof(buf) : bytes; + PHYS_COPY_CATCH(ptr, vir2phys(buf), chunk, addr); + if(addr) { + return EFAULT; + } + SHAUpdate(&sha, buf, chunk); + ptr += chunk; + bytes -= chunk; + } + + SHAFinal(digest, &sha); + return OK; +} + +static void +sha1(unsigned char *ptr, unsigned long bytes, unsigned char *digest) +{ + SHA_CTX sha; + + SHAInit(&sha); + SHAUpdate(&sha, ptr, bytes); + SHAFinal(digest, &sha); + + return; +} + diff --git a/kernel/arch/i386/system.c b/kernel/arch/i386/system.c index 805e4d451..80a7fb9a3 100644 --- a/kernel/arch/i386/system.c +++ b/kernel/arch/i386/system.c @@ -14,11 +14,11 @@ #include "proto.h" #include "../../proc.h" +#include "../../debug.h" #define CR0_EM 0x0004 /* set to enable trap on any FP instruction */ FORWARD _PROTOTYPE( void ser_debug, (int c)); -FORWARD _PROTOTYPE( void ser_dump_stats, (void)); PUBLIC void arch_shutdown(int how) { @@ -137,82 +137,143 @@ PUBLIC void do_ser_debug() ser_debug(c); } +PRIVATE void ser_dump_queues(void) +{ + int q; + for(q = 0; q < NR_SCHED_QUEUES; q++) { + struct proc *p; + if(rdy_head[q]) + printf("%2d: ", q); + for(p = rdy_head[q]; p; p = p->p_nextready) { + printf("%s / %d ", p->p_name, p->p_endpoint); + } + printf("\n"); + } + +} + +PRIVATE void ser_dump_segs(void) +{ + struct proc *pp; + for (pp= BEG_PROC_ADDR; pp < END_PROC_ADDR; pp++) + { + if (pp->p_rts_flags & SLOT_FREE) + continue; + kprintf("%d: %s ep %d\n", proc_nr(pp), pp->p_name, pp->p_endpoint); + printseg("cs: ", 1, pp, pp->p_reg.cs); + printseg("ds: ", 0, pp, pp->p_reg.ds); + if(pp->p_reg.ss != pp->p_reg.ds) { + printseg("ss: ", 0, pp, pp->p_reg.ss); + } + } +} + PRIVATE void ser_debug(int c) { + int u = 0; + do_serial_debug++; - kprintf("ser_debug: %d\n", c); + /* Disable interrupts so that we get a consistent state. */ + if(!intr_disabled()) { lock; u = 1; }; + switch(c) { case '1': ser_dump_proc(); break; case '2': - ser_dump_stats(); + ser_dump_queues(); break; + case '3': + ser_dump_segs(); + break; +#if DEBUG_TRACE +#define TOGGLECASE(ch, flag) \ + case ch: { \ + if(verboseflags & flag) { \ + verboseflags &= ~flag; \ + printf("%s disabled\n", #flag); \ + } else { \ + verboseflags |= flag; \ + printf("%s enabled\n", #flag); \ + } \ + break; \ + } + TOGGLECASE('8', VF_SCHEDULING) + TOGGLECASE('9', VF_PICKPROC) +#endif } do_serial_debug--; + if(u) { unlock; } } -PUBLIC void ser_dump_proc() +PRIVATE void printslot(struct proc *pp, int level) { - struct proc *pp; - int u = 0; + struct proc *depproc = NULL; + int dep = NONE; +#define COL { int i; for(i = 0; i < level; i++) printf("> "); } - /* Disable interrupts so that we get a consistent state. */ - if(!intr_disabled()) { lock; u = 1; }; + if(level >= NR_PROCS) { + kprintf("loop??\n"); + return; + } - for (pp= BEG_PROC_ADDR; pp < END_PROC_ADDR; pp++) - { - if (pp->p_rts_flags & SLOT_FREE) - continue; - kprintf( - "%d: 0x%02x %s e %d src %d dst %d prio %d/%d time %d/%d EIP 0x%x\n", - proc_nr(pp), - pp->p_rts_flags, pp->p_name, - pp->p_endpoint, pp->p_getfrom_e, pp->p_sendto_e, - pp->p_priority, pp->p_max_priority, - pp->p_user_time, pp->p_sys_time, - pp->p_reg.pc); - proc_stacktrace(pp); + COL + + kprintf("%d: %s %d prio %d/%d time %d/%d cr3 0x%lx rts %s misc %s", + proc_nr(pp), pp->p_name, pp->p_endpoint, + pp->p_priority, pp->p_max_priority, pp->p_user_time, + pp->p_sys_time, pp->p_seg.p_cr3, + rtsflagstr(pp->p_rts_flags), miscflagstr(pp->p_misc_flags)); + + if(pp->p_rts_flags & SENDING) { + dep = pp->p_sendto_e; + kprintf(" to: "); + } else if(pp->p_rts_flags & RECEIVING) { + dep = pp->p_getfrom_e; + kprintf(" from: "); } - if(u) { unlock; } + if(dep != NONE) { + if(dep == ANY) { + kprintf(" ANY\n"); + } else { + int procno; + if(!isokendpt(dep, &procno)) { + kprintf(" ??? %d\n", dep); + } else { + depproc = proc_addr(procno); + if(depproc->p_rts_flags & SLOT_FREE) { + kprintf(" empty slot %d???\n", procno); + depproc = NULL; + } else { + kprintf(" %s\n", depproc->p_name); + } + } + } + } else { + kprintf("\n"); + } + + COL + proc_stacktrace(pp); + + + if(depproc) + printslot(depproc, level+1); } -PRIVATE void ser_dump_stats() + +PUBLIC void ser_dump_proc() { - kprintf("ipc_stats:\n"); - kprintf("deadproc: %d\n", ipc_stats.deadproc); - kprintf("bad_endpoint: %d\n", ipc_stats.bad_endpoint); - kprintf("dst_not_allowed: %d\n", ipc_stats.dst_not_allowed); - kprintf("bad_call: %d\n", ipc_stats.bad_call); - kprintf("call_not_allowed: %d\n", ipc_stats.call_not_allowed); - kprintf("bad_buffer: %d\n", ipc_stats.bad_buffer); - kprintf("deadlock: %d\n", ipc_stats.deadlock); - kprintf("not_ready: %d\n", ipc_stats.not_ready); - kprintf("src_died: %d\n", ipc_stats.src_died); - kprintf("dst_died: %d\n", ipc_stats.dst_died); - kprintf("no_priv: %d\n", ipc_stats.no_priv); - kprintf("bad_size: %d\n", ipc_stats.bad_size); - kprintf("bad_senda: %d\n", ipc_stats.bad_senda); - if (ex64hi(ipc_stats.total)) - { - kprintf("total: %x:%08x\n", ex64hi(ipc_stats.total), - ex64lo(ipc_stats.total)); - } - else - kprintf("total: %u\n", ex64lo(ipc_stats.total)); + struct proc *pp; - kprintf("sys_stats:\n"); - kprintf("bad_req: %d\n", sys_stats.bad_req); - kprintf("not_allowed: %d\n", sys_stats.not_allowed); - if (ex64hi(sys_stats.total)) + for (pp= BEG_PROC_ADDR; pp < END_PROC_ADDR; pp++) { - kprintf("total: %x:%08x\n", ex64hi(sys_stats.total), - ex64lo(sys_stats.total)); + if (pp->p_rts_flags & SLOT_FREE) + continue; + printslot(pp, 0); } - else - kprintf("total: %u\n", ex64lo(sys_stats.total)); } #if SPROFILE diff --git a/kernel/arch/i386/vm.h b/kernel/arch/i386/vm.h deleted file mode 100644 index 1707ac990..000000000 --- a/kernel/arch/i386/vm.h +++ /dev/null @@ -1,27 +0,0 @@ - -.define _load_kernel_cr3 -.define _last_cr3 - -#define LOADKERNELCR3 ;\ - inc (_cr3switch) ;\ - mov eax, (_kernel_cr3) ;\ - cmp (_last_cr3), eax ;\ - jz 9f ;\ - push _load_kernel_cr3 ;\ - call _level0 ;\ - pop eax ;\ - mov eax, (_kernel_cr3) ;\ - mov (_last_cr3), eax ;\ - inc (_cr3reload) ;\ -9: - -#define LOADCR3WITHEAX(type, newcr3) ;\ -sseg inc (_cr3switch) ;\ -sseg mov eax, newcr3 ;\ -sseg cmp (_last_cr3), eax ;\ - jz 8f ;\ - mov cr3, eax ;\ -sseg inc (_cr3reload) ;\ -sseg mov (_last_cr3), eax ;\ -8: - diff --git a/kernel/clock.c b/kernel/clock.c index d5eb1ddfc..0be366e4b 100755 --- a/kernel/clock.c +++ b/kernel/clock.c @@ -230,25 +230,23 @@ irq_hook_t *hook; * If any of the timers expire, do_clocktick() will send out signals. */ expired = 0; - if ((proc_ptr->p_misc_flags & VIRT_TIMER) && + if ((proc_ptr->p_misc_flags & MF_VIRT_TIMER) && (proc_ptr->p_virt_left -= ticks) <= 0) expired = 1; - if ((proc_ptr->p_misc_flags & PROF_TIMER) && + if ((proc_ptr->p_misc_flags & MF_PROF_TIMER) && (proc_ptr->p_prof_left -= ticks) <= 0) expired = 1; if (! (priv(proc_ptr)->s_flags & BILLABLE) && - (bill_ptr->p_misc_flags & PROF_TIMER) && + (bill_ptr->p_misc_flags & MF_PROF_TIMER) && (bill_ptr->p_prof_left -= ticks) <= 0) expired = 1; -#if 0 /* Update load average. */ load_update(); -#endif /* Check if do_clocktick() must be called. Done for alarms and scheduling. * Some processes, such as the kernel tasks, cannot be preempted. */ if ((next_timeout <= realtime) || (proc_ptr->p_ticks_left <= 0) || expired) { prev_ptr = proc_ptr; /* store running process */ - lock_notify(HARDWARE, CLOCK); /* send notification */ + mini_notify(proc_addr(HARDWARE), CLOCK); /* send notification */ } if (do_serial_debug) diff --git a/kernel/debug.c b/kernel/debug.c index 12cd8aa64..7324d4857 100644 --- a/kernel/debug.c +++ b/kernel/debug.c @@ -25,6 +25,8 @@ check_runqueues_f(char *file, int line) minix_panic("check_runqueues called with interrupts enabled", NO_NUM); } + FIXME("check_runqueues being done"); + #define MYPANIC(msg) { \ kprintf("check_runqueues:%s:%d: %s\n", file, line, msg); \ minix_panic("check_runqueues failed", NO_NUM); \ @@ -94,7 +96,9 @@ check_runqueues_f(char *file, int line) for (xp = BEG_PROC_ADDR; xp < END_PROC_ADDR; ++xp) { if(xp->p_magic != PMAGIC) MYPANIC("p_magic wrong in proc table"); - if (! isemptyp(xp) && xp->p_ready && ! xp->p_found) { + if (isemptyp(xp)) + continue; + if(xp->p_ready && ! xp->p_found) { kprintf("sched error: ready proc %d not on queue\n", xp->p_nr); MYPANIC("ready proc not on scheduling queue"); if (l++ > MAX_LOOP) { MYPANIC("loop in debug.c?"); } @@ -103,3 +107,43 @@ check_runqueues_f(char *file, int line) } #endif /* DEBUG_SCHED_CHECK */ + +PUBLIC char * +rtsflagstr(int flags) +{ + static char str[100]; + str[0] = '\0'; + +#define FLAG(n) if(flags & n) { strcat(str, #n " "); } + + FLAG(SLOT_FREE); + FLAG(NO_PRIORITY); + FLAG(SENDING); + FLAG(RECEIVING); + FLAG(SIGNALED); + FLAG(SIG_PENDING); + FLAG(P_STOP); + FLAG(NO_PRIV); + FLAG(NO_ENDPOINT); + FLAG(VMINHIBIT); + FLAG(PAGEFAULT); + FLAG(VMREQUEST); + FLAG(VMREQTARGET); + + return str; +} + +PUBLIC char * +miscflagstr(int flags) +{ + static char str[100]; + str[0] = '\0'; + + FLAG(MF_REPLY_PEND); + FLAG(MF_ASYNMSG); + FLAG(MF_FULLVM); + FLAG(MF_DELIVERMSG); + + return str; +} + diff --git a/kernel/debug.h b/kernel/debug.h index 283b00be2..e25605e0c 100644 --- a/kernel/debug.h +++ b/kernel/debug.h @@ -8,6 +8,7 @@ */ #include +#include #include "config.h" /* Enable prints such as @@ -24,7 +25,46 @@ #define DEBUG_TIME_LOCKS 1 /* Runtime sanity checking. */ -#define DEBUG_VMASSERT 1 +#define DEBUG_VMASSERT 0 #define DEBUG_SCHED_CHECK 0 +#define DEBUG_STACK_CHECK 0 +#define DEBUG_TRACE 0 + +#if DEBUG_TRACE + +#define VF_SCHEDULING (1L << 1) +#define VF_PICKPROC (1L << 2) + +#define TRACE(code, statement) if(verboseflags & code) { printf("%s:%d: ", __FILE__, __LINE__); statement } + +#else +#define TRACE(code, statement) +#endif + +#define NOREC_ENTER(varname) \ + static int varname = 0; \ + int mustunlock = 0; \ + if(!intr_disabled()) { lock; mustunlock = 1; } \ + if(varname) { \ + minix_panic(#varname " recursive enter", __LINE__); \ + } \ + varname = 1; + +#define NOREC_RETURN(varname, v) do { \ + if(!varname) \ + minix_panic(#varname " flag off", __LINE__); \ + if(!intr_disabled()) \ + minix_panic(#varname " interrupts on", __LINE__); \ + varname = 0; \ + if(mustunlock) { unlock; } \ + return v; \ + } while(0) + +#if DEBUG_VMASSERT +#define vmassert(t) { \ + if(!(t)) { minix_panic("vm: assert " #t " failed\n", __LINE__); } } +#else +#define vmassert(t) { } +#endif #endif /* DEBUG_H */ diff --git a/kernel/glo.h b/kernel/glo.h index e3ed5735b..208818353 100755 --- a/kernel/glo.h +++ b/kernel/glo.h @@ -16,6 +16,7 @@ #include #include #include "config.h" +#include "debug.h" /* Variables relating to shutting down MINIX. */ EXTERN char kernel_exception; /* TRUE after system exceptions */ @@ -29,14 +30,13 @@ EXTERN struct k_randomness krandom; /* gather kernel random information */ EXTERN struct loadinfo kloadinfo; /* status of load average */ /* Process scheduling information and the kernel reentry count. */ -EXTERN struct proc *prev_ptr; /* previously running process */ EXTERN struct proc *proc_ptr; /* pointer to currently running process */ EXTERN struct proc *next_ptr; /* next process to run after restart() */ +EXTERN struct proc *prev_ptr; EXTERN struct proc *bill_ptr; /* process to bill for clock ticks */ EXTERN struct proc *vmrestart; /* first process on vmrestart queue */ EXTERN struct proc *vmrequest; /* first process on vmrequest queue */ EXTERN struct proc *pagefaults; /* first process on pagefault queue */ -EXTERN struct proc *softnotify; /* first process on softnotify queue */ EXTERN char k_reenter; /* kernel reentry count (entry count less 1) */ EXTERN unsigned lost_ticks; /* clock ticks counted outside clock task */ @@ -47,32 +47,6 @@ EXTERN int irq_actids[NR_IRQ_VECTORS]; /* IRQ ID bits active */ EXTERN int irq_use; /* map of all in-use irq's */ EXTERN u32_t system_hz; /* HZ value */ -EXTERN struct ipc_stats -{ - unsigned long deadproc; - unsigned long bad_endpoint; - unsigned long dst_not_allowed; - unsigned long bad_call; - unsigned long call_not_allowed; - unsigned long bad_buffer; - unsigned long deadlock; - unsigned long not_ready; - unsigned long src_died; - unsigned long dst_died; - unsigned long no_priv; - unsigned long bad_size; - unsigned long bad_senda; - u64_t total; -} ipc_stats; -extern endpoint_t ipc_stats_target; - -EXTERN struct system_stats -{ - unsigned long bad_req; - unsigned long not_allowed; - u64_t total; -} sys_stats; - /* Miscellaneous. */ EXTERN reg_t mon_ss, mon_sp; /* boot monitor stack */ EXTERN int mon_return; /* true if we can return to monitor */ @@ -85,18 +59,14 @@ EXTERN char params_buffer[512]; /* boot monitor parameters */ EXTERN int minix_panicing; EXTERN int locklevel; -EXTERN unsigned long cr3switch; -EXTERN unsigned long cr3reload; +#if DEBUG_TRACE +EXTERN int verboseflags; +#endif /* VM */ -EXTERN phys_bytes vm_base; -EXTERN phys_bytes vm_size; -EXTERN phys_bytes vm_mem_high; EXTERN int vm_running; -EXTERN int must_notify_vm; - -/* Verbose flags (debugging). */ -EXTERN int verbose_vm; +EXTERN int catch_pagefaults; +EXTERN struct proc *ptproc; /* Timing */ EXTERN util_timingdata_t timingdata[TIMING_CATEGORIES]; diff --git a/kernel/main.c b/kernel/main.c index f07997cdd..b847b2bef 100755 --- a/kernel/main.c +++ b/kernel/main.c @@ -17,6 +17,7 @@ #include #include #include "proc.h" +#include "debug.h" /* Prototype declarations for PRIVATE functions. */ FORWARD _PROTOTYPE( void announce, (void)); @@ -161,6 +162,9 @@ PUBLIC void main() rp->p_reg.sp -= sizeof(reg_t); } + /* scheduling functions depend on proc_ptr pointing somewhere. */ + if(!proc_ptr) proc_ptr = rp; + /* If this process has its own page table, VM will set the * PT up and manage it. VM will signal the kernel when it has * done this; until then, don't let it run. @@ -186,8 +190,21 @@ PUBLIC void main() /* MINIX is now ready. All boot image processes are on the ready queue. * Return to the assembly code to start running the current process. */ - bill_ptr = proc_addr(IDLE); /* it has to point somewhere */ + bill_ptr = proc_addr(IDLE); /* it has to point somewhere */ announce(); /* print MINIX startup banner */ +/* Warnings for sanity checks that take time. These warnings are printed + * so it's a clear warning no full release should be done with them + * enabled. + */ +#if DEBUG_SCHED_CHECK + FIXME("DEBUG_SCHED_CHECK enabled"); +#endif +#if DEBUG_VMASSERT + FIXME("DEBUG_VMASSERT enabled"); +#endif +#if DEBUG_PROC_CHECK + FIXME("PROC check enabled"); +#endif restart(); } @@ -204,6 +221,8 @@ PRIVATE void announce(void) "Copyright 2009, Vrije Universiteit, Amsterdam, The Netherlands\n", OS_RELEASE, OS_VERSION); kprintf("MINIX is open source software, see http://www.minix3.org\n"); + + FIXME("pm, vfs, etc own page table"); } /*===========================================================================* diff --git a/kernel/proc.c b/kernel/proc.c index bb3d8543f..7b9250173 100755 --- a/kernel/proc.c +++ b/kernel/proc.c @@ -6,10 +6,7 @@ * * As well as several entry points used from the interrupt and task level: * - * lock_notify: notify a process of a system event * lock_send: send a message to a process - * lock_enqueue: put a process on one of the scheduling queues - * lock_dequeue: remove a process from the scheduling queues * * Changes: * Aug 19, 2005 rewrote scheduling code (Jorrit N. Herder) @@ -57,7 +54,6 @@ FORWARD _PROTOTYPE( int mini_send, (struct proc *caller_ptr, int dst_e, message *m_ptr, int flags)); FORWARD _PROTOTYPE( int mini_receive, (struct proc *caller_ptr, int src, message *m_ptr, int flags)); -FORWARD _PROTOTYPE( int mini_notify, (struct proc *caller_ptr, int dst)); FORWARD _PROTOTYPE( int mini_senda, (struct proc *caller_ptr, asynmsg_t *table, size_t size)); FORWARD _PROTOTYPE( int deadlock, (int function, @@ -67,8 +63,10 @@ FORWARD _PROTOTYPE( int try_one, (struct proc *src_ptr, struct proc *dst_ptr)); FORWARD _PROTOTYPE( void sched, (struct proc *rp, int *queue, int *front)); FORWARD _PROTOTYPE( void pick_proc, (void)); -#define BuildMess(m_ptr, src, dst_ptr) \ - (m_ptr)->m_source = proc_addr(src)->p_endpoint; \ +#define PICK_ANY 1 +#define PICK_HIGHERONLY 2 + +#define BuildNotifyMessage(m_ptr, src, dst_ptr) \ (m_ptr)->m_type = NOTIFY_FROM(src); \ (m_ptr)->NOTIFY_TIMESTAMP = get_uptime(); \ switch (src) { \ @@ -82,49 +80,88 @@ FORWARD _PROTOTYPE( void pick_proc, (void)); break; \ } -#define CopyMess(s,sp,sm,dp,dm) do { \ - vir_bytes dstlin; \ - endpoint_t e = proc_addr(s)->p_endpoint; \ - struct vir_addr src, dst; \ - int r; \ - if((dstlin = umap_local((dp), D, (vir_bytes) dm, sizeof(message))) == 0){\ - minix_panic("CopyMess: umap_local failed", __LINE__); \ - } \ - \ - if(vm_running && \ - (r=vm_checkrange((dp), (dp), dstlin, sizeof(message), 1, 0)) != OK) { \ - if(r != VMSUSPEND) \ - minix_panic("CopyMess: vm_checkrange error", __LINE__); \ - (dp)->p_vmrequest.saved.msgcopy.dst = (dp); \ - (dp)->p_vmrequest.saved.msgcopy.dst_v = (vir_bytes) dm; \ - if(data_copy((sp)->p_endpoint, \ - (vir_bytes) (sm), SYSTEM, \ - (vir_bytes) &(dp)->p_vmrequest.saved.msgcopy.msgbuf, \ - sizeof(message)) != OK) { \ - minix_panic("CopyMess: data_copy failed", __LINE__);\ - } \ - (dp)->p_vmrequest.saved.msgcopy.msgbuf.m_source = e; \ - (dp)->p_vmrequest.type = VMSTYPE_MSGCOPY; \ - } else { \ - src.proc_nr_e = (sp)->p_endpoint; \ - dst.proc_nr_e = (dp)->p_endpoint; \ - src.segment = dst.segment = D; \ - src.offset = (vir_bytes) (sm); \ - dst.offset = (vir_bytes) (dm); \ - if(virtual_copy(&src, &dst, sizeof(message)) != OK) { \ - kprintf("copymess: copy %d:%lx to %d:%lx failed\n",\ - (sp)->p_endpoint, (sm), (dp)->p_endpoint, dm);\ - minix_panic("CopyMess: virtual_copy (1) failed", __LINE__); \ - } \ - src.proc_nr_e = SYSTEM; \ - src.offset = (vir_bytes) &e; \ - if(virtual_copy(&src, &dst, sizeof(e)) != OK) { \ - kprintf("copymess: copy %d:%lx to %d:%lx\n", \ - (sp)->p_endpoint, (sm), (dp)->p_endpoint, dm);\ - minix_panic("CopyMess: virtual_copy (2) failed", __LINE__); \ - } \ - } \ -} while(0) +/*===========================================================================* + * QueueMess * + *===========================================================================*/ +PRIVATE int QueueMess(endpoint_t ep, vir_bytes msg_lin, struct proc *dst) +{ + int k; + phys_bytes addr; + NOREC_ENTER(queuemess); + /* Queue a message from the src process (in memory) to the dst + * process (using dst process table entry). Do actual copy to + * kernel here; it's an error if the copy fails into kernel. + */ + vmassert(!(dst->p_misc_flags & MF_DELIVERMSG)); + vmassert(dst->p_delivermsg_lin); + vmassert(isokendpt(ep, &k)); + +#if 0 + if(INMEMORY(dst)) { + PHYS_COPY_CATCH(msg_lin, dst->p_delivermsg_lin, + sizeof(message), addr); + if(!addr) { + PHYS_COPY_CATCH(vir2phys(&ep), dst->p_delivermsg_lin, + sizeof(ep), addr); + if(!addr) { + NOREC_RETURN(queuemess, OK); + } + } + } +#else + FIXME("in-memory process copy"); +#endif + + PHYS_COPY_CATCH(msg_lin, vir2phys(&dst->p_delivermsg), sizeof(message), addr); + if(addr) { + NOREC_RETURN(queuemess, EFAULT); + } + + dst->p_delivermsg.m_source = ep; + dst->p_misc_flags |= MF_DELIVERMSG; + + NOREC_RETURN(queuemess, OK); +} + +/*===========================================================================* + * schedcheck * + *===========================================================================*/ +PUBLIC void schedcheck(void) +{ + /* This function is called an instant before proc_ptr is + * to be scheduled again. + */ + NOREC_ENTER(schedch); + vmassert(intr_disabled()); + if(next_ptr) { + proc_ptr = next_ptr; + next_ptr = NULL; + } + vmassert(proc_ptr); + vmassert(!proc_ptr->p_rts_flags); + while(proc_ptr->p_misc_flags & MF_DELIVERMSG) { + vmassert(!next_ptr); + vmassert(!proc_ptr->p_rts_flags); + TRACE(VF_SCHEDULING, printf("delivering to %s / %d\n", + proc_ptr->p_name, proc_ptr->p_endpoint);); + if(delivermsg(proc_ptr) == VMSUSPEND) { + vmassert(next_ptr); + TRACE(VF_SCHEDULING, printf("suspending %s / %d\n", + proc_ptr->p_name, proc_ptr->p_endpoint);); + vmassert(proc_ptr->p_rts_flags); + vmassert(next_ptr != proc_ptr); + proc_ptr = next_ptr; + vmassert(!proc_ptr->p_rts_flags); + next_ptr = NULL; + } + } + TRACE(VF_SCHEDULING, printf("starting %s / %d\n", + proc_ptr->p_name, proc_ptr->p_endpoint);); +#if DEBUG_TRACE + proc_ptr->p_schedules++; +#endif + NOREC_RETURN(schedch, ); +} /*===========================================================================* * sys_call * @@ -146,8 +183,13 @@ long bit_map; /* notification event set or flags */ int src_dst_p; /* Process slot number */ size_t msg_size; - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.total= add64u(ipc_stats.total, 1); +#if DEBUG_SCHED_CHECK + if(caller_ptr->p_misc_flags & MF_DELIVERMSG) { + kprintf("sys_call: MF_DELIVERMSG on for %s / %d\n", + caller_ptr->p_name, caller_ptr->p_endpoint); + minix_panic("MF_DELIVERMSG on", NO_NUM); + } +#endif #if 0 if(src_dst_e != 4 && src_dst_e != 5 && @@ -163,12 +205,10 @@ long bit_map; /* notification event set or flags */ } #endif -#if 1 +#if DEBUG_SCHED_CHECK if (RTS_ISSET(caller_ptr, SLOT_FREE)) { kprintf("called by the dead?!?\n"); - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.deadproc++; return EINVAL; } #endif @@ -188,12 +228,10 @@ long bit_map; /* notification event set or flags */ { if (call_nr != RECEIVE) { -#if DEBUG_ENABLE_IPC_WARNINGS +#if 0 kprintf("sys_call: trap %d by %d with bad endpoint %d\n", call_nr, proc_nr(caller_ptr), src_dst_e); #endif - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.bad_endpoint++; return EINVAL; } src_dst_p = src_dst_e; @@ -202,12 +240,10 @@ long bit_map; /* notification event set or flags */ { /* Require a valid source and/or destination process. */ if(!isokendpt(src_dst_e, &src_dst_p)) { -#if DEBUG_ENABLE_IPC_WARNINGS +#if 0 kprintf("sys_call: trap %d by %d with bad endpoint %d\n", call_nr, proc_nr(caller_ptr), src_dst_e); #endif - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.bad_endpoint++; return EDEADSRCDST; } @@ -221,10 +257,8 @@ long bit_map; /* notification event set or flags */ #if DEBUG_ENABLE_IPC_WARNINGS kprintf( "sys_call: ipc mask denied trap %d from %d to %d\n", - call_nr, proc_nr(caller_ptr), src_dst_p); + call_nr, caller_ptr->p_endpoint, src_dst_e); #endif - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.dst_not_allowed++; return(ECALLDENIED); /* call denied by ipc mask */ } } @@ -237,8 +271,6 @@ long bit_map; /* notification event set or flags */ kprintf("sys_call: trap %d not allowed, caller %d, src_dst %d\n", call_nr, proc_nr(caller_ptr), src_dst_p); #endif - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.bad_call++; return(ETRAPDENIED); /* trap denied by mask or kernel */ } @@ -251,8 +283,6 @@ long bit_map; /* notification event set or flags */ kprintf("sys_call: trap %d not allowed, caller %d, src_dst %d\n", call_nr, proc_nr(caller_ptr), src_dst_p); #endif - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.call_not_allowed++; return(ETRAPDENIED); /* trap denied by mask or kernel */ } @@ -261,8 +291,6 @@ long bit_map; /* notification event set or flags */ kprintf("sys_call: trap %d not allowed, caller %d, src_dst %d\n", call_nr, proc_nr(caller_ptr), src_dst_e); #endif - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.call_not_allowed++; return(ETRAPDENIED); /* trap denied by mask or kernel */ } @@ -283,61 +311,6 @@ long bit_map; /* notification event set or flags */ msg_size = sizeof(*m_ptr); } - /* If the call involves a message buffer, i.e., for SEND, SENDREC, - * or RECEIVE, check the message pointer. This check allows a message to be - * anywhere in data or stack or gap. It will have to be made more elaborate - * for machines which don't have the gap mapped. - * - * We use msg_size decided above. - */ - if (call_nr == SEND || call_nr == SENDREC || - call_nr == RECEIVE || call_nr == SENDA || call_nr == SENDNB) { - int r; - phys_bytes lin; - - /* Map to linear address. */ - if(msg_size > 0 && - (lin = umap_local(caller_ptr, D, (vir_bytes) m_ptr, msg_size)) == 0) { - kprintf("umap_local failed for %s / %d on 0x%lx size %d\n", - caller_ptr->p_name, caller_ptr->p_endpoint, - m_ptr, msg_size); - return EFAULT; - } - - /* Check if message pages in calling process are mapped. - * We don't have to check the recipient if this is a send, - * because this code will do that before its receive() starts. - * - * It is important the range is verified as _writable_, because - * the kernel will want to write to the SENDA buffer in the future, - * and those pages may not be shared between processes. - */ - - if(vm_running && msg_size > 0 && - (r=vm_checkrange(caller_ptr, caller_ptr, lin, msg_size, 1, 0)) != OK) { - if(r != VMSUSPEND) { - kprintf("SYSTEM:sys_call:vm_checkrange: err %d\n", r); - return r; - } - - /* We can't go ahead with this call. Caller is suspended - * and we have to save the state in its process struct. - */ - caller_ptr->p_vmrequest.saved.sys_call.call_nr = call_nr; - caller_ptr->p_vmrequest.saved.sys_call.m_ptr = m_ptr; - caller_ptr->p_vmrequest.saved.sys_call.src_dst_e = src_dst_e; - caller_ptr->p_vmrequest.saved.sys_call.bit_map = bit_map; - caller_ptr->p_vmrequest.type = VMSTYPE_SYS_CALL; - - kprintf("SYSTEM: %s:%d: suspending call 0x%lx on ipc buffer 0x%lx length 0x%lx\n", - caller_ptr->p_name, caller_ptr->p_endpoint, call_nr, m_ptr, msg_size); - - /* vm_checkrange() will have suspended caller with VMREQUEST. */ - return OK; - } - - } - /* Check for a possible deadlock for blocking SEND(REC) and RECEIVE. */ if (call_nr == SEND || call_nr == SENDREC || call_nr == RECEIVE) { if (group_size = deadlock(call_nr, caller_ptr, src_dst_p)) { @@ -345,8 +318,6 @@ long bit_map; /* notification event set or flags */ kprintf("sys_call: trap %d from %d to %d deadlocked, group size %d\n", call_nr, proc_nr(caller_ptr), src_dst_p, group_size); #endif - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.deadlock++; return(ELOCKED); } } @@ -362,7 +333,7 @@ long bit_map; /* notification event set or flags */ switch(call_nr) { case SENDREC: /* A flag is set so that notifications cannot interrupt SENDREC. */ - caller_ptr->p_misc_flags |= REPLY_PENDING; + caller_ptr->p_misc_flags |= MF_REPLY_PEND; /* fall through */ case SEND: result = mini_send(caller_ptr, src_dst_e, m_ptr, 0); @@ -371,11 +342,11 @@ long bit_map; /* notification event set or flags */ /* fall through for SENDREC */ case RECEIVE: if (call_nr == RECEIVE) - caller_ptr->p_misc_flags &= ~REPLY_PENDING; + caller_ptr->p_misc_flags &= ~MF_REPLY_PEND; result = mini_receive(caller_ptr, src_dst_e, m_ptr, 0); break; case NOTIFY: - result = mini_notify(caller_ptr, src_dst_p); + result = mini_notify(caller_ptr, src_dst_e); break; case SENDNB: result = mini_send(caller_ptr, src_dst_e, m_ptr, NON_BLOCKING); @@ -460,22 +431,6 @@ int src_dst; /* src or dst process */ return(0); /* not a deadlock */ } -/*===========================================================================* - * sys_call_restart * - *===========================================================================*/ -PUBLIC void sys_call_restart(caller) -struct proc *caller; -{ - int r; - kprintf("restarting sys_call code 0x%lx, " - "m_ptr 0x%lx, srcdst %d, bitmap 0x%lx, but not really\n", - caller->p_vmrequest.saved.sys_call.call_nr, - caller->p_vmrequest.saved.sys_call.m_ptr, - caller->p_vmrequest.saved.sys_call.src_dst_e, - caller->p_vmrequest.saved.sys_call.bit_map); - caller->p_reg.retreg = r; -} - /*===========================================================================* * mini_send * *===========================================================================*/ @@ -492,14 +447,19 @@ int flags; register struct proc *dst_ptr; register struct proc **xpp; int dst_p; + phys_bytes linaddr; + vir_bytes addr; + int r; + if(!(linaddr = umap_local(caller_ptr, D, (vir_bytes) m_ptr, + sizeof(message)))) { + return EFAULT; + } dst_p = _ENDPOINT_P(dst_e); dst_ptr = proc_addr(dst_p); if (RTS_ISSET(dst_ptr, NO_ENDPOINT)) { - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.dst_died++; return EDSTDIED; } @@ -508,18 +468,20 @@ int flags; */ if (WILLRECEIVE(dst_ptr, caller_ptr->p_endpoint)) { /* Destination is indeed waiting for this message. */ - CopyMess(caller_ptr->p_nr, caller_ptr, m_ptr, dst_ptr, - dst_ptr->p_messbuf); + vmassert(!(dst_ptr->p_misc_flags & MF_DELIVERMSG)); + if((r=QueueMess(caller_ptr->p_endpoint, linaddr, dst_ptr)) != OK) + return r; RTS_UNSET(dst_ptr, RECEIVING); } else { if(flags & NON_BLOCKING) { - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.not_ready++; return(ENOTREADY); } /* Destination is not waiting. Block and dequeue caller. */ - caller_ptr->p_messbuf = m_ptr; + PHYS_COPY_CATCH(linaddr, vir2phys(&caller_ptr->p_sendmsg), + sizeof(message), addr); + + if(addr) { return EFAULT; } RTS_SET(caller_ptr, SENDING); caller_ptr->p_sendto_e = dst_e; @@ -552,6 +514,18 @@ int flags; sys_map_t *map; bitchunk_t *chunk; int i, r, src_id, src_proc_nr, src_p; + phys_bytes linaddr; + + vmassert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG)); + + if(!(linaddr = umap_local(caller_ptr, D, (vir_bytes) m_ptr, + sizeof(message)))) { + return EFAULT; + } + + /* This is where we want our message. */ + caller_ptr->p_delivermsg_lin = linaddr; + caller_ptr->p_delivermsg_vir = (vir_bytes) m_ptr; if(src_e == ANY) src_p = ANY; else @@ -559,8 +533,6 @@ int flags; okendpt(src_e, &src_p); if (RTS_ISSET(proc_addr(src_p), NO_ENDPOINT)) { - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.src_died++; return ESRCDIED; } } @@ -573,10 +545,11 @@ int flags; if (!RTS_ISSET(caller_ptr, SENDING)) { /* Check if there are pending notifications, except for SENDREC. */ - if (! (caller_ptr->p_misc_flags & REPLY_PENDING)) { + if (! (caller_ptr->p_misc_flags & MF_REPLY_PEND)) { map = &priv(caller_ptr)->s_notify_pending; for (chunk=&map->chunk[0]; chunk<&map->chunk[NR_SYS_CHUNKS]; chunk++) { + endpoint_t hisep; /* Find a pending notification from the requested source. */ if (! *chunk) continue; /* no bits in chunk */ @@ -593,8 +566,13 @@ int flags; *chunk &= ~(1 << i); /* no longer pending */ /* Found a suitable source, deliver the notification message. */ - BuildMess(&m, src_proc_nr, caller_ptr); /* assemble message */ - CopyMess(src_proc_nr, proc_addr(HARDWARE), &m, caller_ptr, m_ptr); + BuildNotifyMessage(&m, src_proc_nr, caller_ptr); /* assemble message */ + hisep = proc_addr(src_proc_nr)->p_endpoint; + vmassert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG)); + vmassert(src_e == ANY || hisep == src_e); + if((r=QueueMess(hisep, vir2phys(&m), caller_ptr)) != OK) { + minix_panic("mini_receive: local QueueMess failed", NO_NUM); + } return(OK); /* report success */ } } @@ -603,20 +581,20 @@ int flags; xpp = &caller_ptr->p_caller_q; while (*xpp != NIL_PROC) { if (src_e == ANY || src_p == proc_nr(*xpp)) { -#if 1 +#if DEBUG_SCHED_CHECK if (RTS_ISSET(*xpp, SLOT_FREE) || RTS_ISSET(*xpp, NO_ENDPOINT)) { kprintf("%d: receive from %d; found dead %d (%s)?\n", caller_ptr->p_endpoint, src_e, (*xpp)->p_endpoint, (*xpp)->p_name); - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.deadproc++; return EINVAL; } #endif /* Found acceptable message. Copy it and update status. */ - CopyMess((*xpp)->p_nr, *xpp, (*xpp)->p_messbuf, caller_ptr, m_ptr); + vmassert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG)); + QueueMess((*xpp)->p_endpoint, + vir2phys(&(*xpp)->p_sendmsg), caller_ptr); RTS_UNSET(*xpp, SENDING); *xpp = (*xpp)->p_q_link; /* remove from queue */ return(OK); /* report success */ @@ -635,7 +613,6 @@ int flags; } else { - caller_ptr->p_messbuf = m_ptr; r= try_async(caller_ptr); } if (r == OK) @@ -648,12 +625,9 @@ int flags; */ if ( ! (flags & NON_BLOCKING)) { caller_ptr->p_getfrom_e = src_e; - caller_ptr->p_messbuf = m_ptr; RTS_SET(caller_ptr, RECEIVING); return(OK); } else { - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.not_ready++; return(ENOTREADY); } } @@ -661,26 +635,41 @@ int flags; /*===========================================================================* * mini_notify * *===========================================================================*/ -PRIVATE int mini_notify(caller_ptr, dst) +PUBLIC int mini_notify(caller_ptr, dst_e) register struct proc *caller_ptr; /* sender of the notification */ -int dst; /* which process to notify */ +endpoint_t dst_e; /* which process to notify */ { - register struct proc *dst_ptr = proc_addr(dst); + register struct proc *dst_ptr; int src_id; /* source id for late delivery */ message m; /* the notification message */ + int r; + int proc_nr; + int dst_p; + + vmassert(intr_disabled()); + + if (!isokendpt(dst_e, &dst_p)) { + util_stacktrace(); + kprintf("mini_notify: bogus endpoint %d\n", dst_e); + return EDEADSRCDST; + } + + dst_ptr = proc_addr(dst_p); /* Check to see if target is blocked waiting for this message. A process * can be both sending and receiving during a SENDREC system call. */ if (WILLRECEIVE(dst_ptr, caller_ptr->p_endpoint) && - ! (dst_ptr->p_misc_flags & REPLY_PENDING)) { + ! (dst_ptr->p_misc_flags & MF_REPLY_PEND)) { /* Destination is indeed waiting for a message. Assemble a notification * message and deliver it. Copy from pseudo-source HARDWARE, since the * message is in the kernel's address space. */ - BuildMess(&m, proc_nr(caller_ptr), dst_ptr); - CopyMess(proc_nr(caller_ptr), proc_addr(HARDWARE), &m, - dst_ptr, dst_ptr->p_messbuf); + BuildNotifyMessage(&m, proc_nr(caller_ptr), dst_ptr); + vmassert(!(dst_ptr->p_misc_flags & MF_DELIVERMSG)); + if((r=QueueMess(caller_ptr->p_endpoint, vir2phys(&m), dst_ptr)) != OK) { + minix_panic("mini_notify: local QueueMess failed", NO_NUM); + } RTS_UNSET(dst_ptr, RECEIVING); return(OK); } @@ -725,21 +714,20 @@ struct proc *caller_ptr; asynmsg_t *table; size_t size; { - int i, dst_p, done, do_notify; + int i, dst_p, done, do_notify, r; unsigned flags; struct proc *dst_ptr; struct priv *privp; message *m_ptr; asynmsg_t tabent; vir_bytes table_v = (vir_bytes) table; + vir_bytes linaddr; privp= priv(caller_ptr); if (!(privp->s_flags & SYS_PROC)) { kprintf( "mini_senda: warning caller has no privilege structure\n"); - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.no_priv++; return EPERM; } @@ -753,6 +741,13 @@ size_t size; return OK; } + if(!(linaddr = umap_local(caller_ptr, D, (vir_bytes) table, + size * sizeof(*table)))) { + printf("mini_senda: umap_local failed; 0x%lx len 0x%lx\n", + table, size * sizeof(*table)); + return EFAULT; + } + /* Limit size to something reasonable. An arbitrary choice is 16 * times the number of process table entries. * @@ -761,8 +756,6 @@ size_t size; */ if (size > 16*(NR_TASKS + NR_PROCS)) { - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.bad_size++; return EDOM; } @@ -784,8 +777,6 @@ size_t size; if (flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY) || !(flags & AMF_VALID)) { - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.bad_senda++; return EINVAL; } @@ -799,9 +790,6 @@ size_t size; if (!isokendpt(tabent.dst, &dst_p)) { /* Bad destination, report the error */ - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.bad_endpoint++; - tabent.result= EDEADSRCDST; A_INSERT(i, result); tabent.flags= flags | AMF_DONE; @@ -815,9 +803,6 @@ size_t size; if (!may_send_to(caller_ptr, dst_p)) { /* Send denied by IPC mask */ - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.dst_not_allowed++; - tabent.result= ECALLDENIED; A_INSERT(i, result); tabent.flags= flags | AMF_DONE; @@ -838,9 +823,6 @@ size_t size; /* NO_ENDPOINT should be removed */ if (dst_ptr->p_rts_flags & NO_ENDPOINT) { - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.dst_died++; - tabent.result= EDSTDIED; A_INSERT(i, result); tabent.flags= flags | AMF_DONE; @@ -864,12 +846,13 @@ size_t size; m_ptr= &table[i].msg; /* Note: pointer in the * caller's address space. */ - CopyMess(caller_ptr->p_nr, caller_ptr, m_ptr, dst_ptr, - dst_ptr->p_messbuf); + /* Copy message from sender. */ + tabent.result= QueueMess(caller_ptr->p_endpoint, + linaddr + (vir_bytes) &table[i].msg - + (vir_bytes) table, dst_ptr); + if(tabent.result == OK) + RTS_UNSET(dst_ptr, RECEIVING); - RTS_UNSET(dst_ptr, RECEIVING); - - tabent.result= OK; A_INSERT(i, result); tabent.flags= flags | AMF_DONE; A_INSERT(i, flags); @@ -892,13 +875,6 @@ size_t size; { privp->s_asyntab= (vir_bytes)table; privp->s_asynsize= size; -#if 0 - if(caller_ptr->p_endpoint > INIT_PROC_NR) { - kprintf("kernel: %s (%d) asynsend table at 0x%lx, %d\n", - caller_ptr->p_name, caller_ptr->p_endpoint, - table, size); - } -#endif } return OK; } @@ -913,7 +889,7 @@ struct proc *caller_ptr; int r; struct priv *privp; struct proc *src_ptr; - + /* Try all privilege structures */ for (privp = BEG_PRIV_ADDR; privp < END_PRIV_ADDR; ++privp) { @@ -921,13 +897,10 @@ struct proc *caller_ptr; continue; if (privp->s_asynsize == 0) continue; -#if 0 - kprintf("try_async: found asyntable for proc %d\n", - privp->s_proc_nr); -#endif src_ptr= proc_addr(privp->s_proc_nr); if (!may_send_to(src_ptr, proc_nr(caller_ptr))) continue; + vmassert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG)); r= try_one(src_ptr, caller_ptr); if (r == OK) return r; @@ -957,6 +930,7 @@ struct proc *dst_ptr; asynmsg_t tabent; vir_bytes table_v; struct proc *caller_ptr; + int r; privp= priv(src_ptr); size= privp->s_asynsize; @@ -986,8 +960,6 @@ struct proc *dst_ptr; { kprintf("try_one: bad bits in table\n"); privp->s_asynsize= 0; - if (src_ptr->p_endpoint == ipc_stats_target) - ipc_stats.bad_senda++; return EINVAL; } @@ -1015,10 +987,11 @@ struct proc *dst_ptr; m_ptr= &table_ptr[i].msg; /* Note: pointer in the * caller's address space. */ - CopyMess(src_ptr->p_nr, src_ptr, m_ptr, dst_ptr, - dst_ptr->p_messbuf); + A_RETRIEVE(i, msg); + r = QueueMess(src_ptr->p_endpoint, vir2phys(&tabent.msg), + dst_ptr); - tabent.result= OK; + tabent.result= r; A_INSERT(i, result); tabent.flags= flags | AMF_DONE; A_INSERT(i, flags); @@ -1034,7 +1007,7 @@ struct proc *dst_ptr; return EAGAIN; } -/*===========================================================================* + /*===========================================================================* * lock_notify * *===========================================================================*/ PUBLIC int lock_notify(src_e, dst_e) @@ -1047,60 +1020,23 @@ int dst_e; /* (endpoint) who is to be notified */ * the first kernel entry (hardware interrupt, trap, or exception). Locking * is done by temporarily disabling interrupts. */ - int result, src, dst; + int result, src_p; - if(!isokendpt(src_e, &src) || !isokendpt(dst_e, &dst)) - return EDEADSRCDST; + vmassert(!intr_disabled()); - /* Exception or interrupt occurred, thus already locked. */ - if (k_reenter >= 0) { - result = mini_notify(proc_addr(src), dst); + if (!isokendpt(src_e, &src_p)) { + kprintf("lock_notify: bogus src: %d\n", src_e); + return EDEADSRCDST; } - /* Call from task level, locking is required. */ - else { lock; - result = mini_notify(proc_addr(src), dst); + vmassert(intr_disabled()); + result = mini_notify(proc_addr(src_p), dst_e); + vmassert(intr_disabled()); unlock; - } - return(result); -} - -/*===========================================================================* - * soft_notify * - *===========================================================================*/ -PUBLIC int soft_notify(dst_e) -int dst_e; /* (endpoint) who is to be notified */ -{ - int dst, u = 0; - struct proc *dstp, *sys = proc_addr(SYSTEM); + vmassert(!intr_disabled()); -/* Delayed interface to notify() from SYSTEM that is safe/easy to call - * from more places than notify(). - */ - if(!intr_disabled()) { lock; u = 1; } - - { - if(!isokendpt(dst_e, &dst)) - minix_panic("soft_notify to dead ep", dst_e); - - dstp = proc_addr(dst); - - if(!dstp->p_softnotified) { - dstp->next_soft_notify = softnotify; - softnotify = dstp; - dstp->p_softnotified = 1; - - if (RTS_ISSET(sys, RECEIVING)) { - sys->p_messbuf->m_source = SYSTEM; - RTS_UNSET(sys, RECEIVING); - } - } - } - - if(u) { unlock; } - - return OK; + return(result); } /*===========================================================================* @@ -1117,15 +1053,19 @@ register struct proc *rp; /* this process is now runnable */ int q; /* scheduling queue to use */ int front; /* add to front or back */ + NOREC_ENTER(enqueuefunc); + #if DEBUG_SCHED_CHECK if(!intr_disabled()) { minix_panic("enqueue with interrupts enabled", NO_NUM); } - CHECK_RUNQUEUES; if (rp->p_ready) minix_panic("enqueue already ready process", NO_NUM); #endif /* Determine where to insert to process. */ sched(rp, &q, &front); + vmassert(q >= 0); + vmassert(q < IDLE_Q || rp->p_endpoint == IDLE); + /* Now add the process to the queue. */ if (rdy_head[q] == NIL_PROC) { /* add to empty queue */ rdy_head[q] = rdy_tail[q] = rp; /* create a new queue */ @@ -1141,19 +1081,25 @@ register struct proc *rp; /* this process is now runnable */ rp->p_nextready = NIL_PROC; /* mark new end */ } +#if DEBUG_SCHED_CHECK + rp->p_ready = 1; + CHECK_RUNQUEUES; +#endif + /* Now select the next process to run, if there isn't a current * process yet or current process isn't ready any more, or * it's PREEMPTIBLE. */ - if(!proc_ptr || proc_ptr->p_rts_flags || - (priv(proc_ptr)->s_flags & PREEMPTIBLE)) { + vmassert(proc_ptr); + if((proc_ptr->p_priority > rp->p_priority) && + (priv(proc_ptr)->s_flags & PREEMPTIBLE)) pick_proc(); - } #if DEBUG_SCHED_CHECK - rp->p_ready = 1; CHECK_RUNQUEUES; #endif + + NOREC_RETURN(enqueuefunc, ); } /*===========================================================================* @@ -1170,14 +1116,17 @@ register struct proc *rp; /* this process is no longer runnable */ register struct proc **xpp; /* iterate over queue */ register struct proc *prev_xp; + NOREC_ENTER(dequeuefunc); + +#if DEBUG_STACK_CHECK /* Side-effect for kernel: check if the task's stack still is ok? */ if (iskernelp(rp)) { if (*priv(rp)->s_stack_guard != STACK_GUARD) minix_panic("stack overrun by task", proc_nr(rp)); } +#endif #if DEBUG_SCHED_CHECK - CHECK_RUNQUEUES; if(!intr_disabled()) { minix_panic("dequeue with interrupts enabled", NO_NUM); } if (! rp->p_ready) minix_panic("dequeue() already unready process", NO_NUM); #endif @@ -1193,17 +1142,23 @@ register struct proc *rp; /* this process is no longer runnable */ *xpp = (*xpp)->p_nextready; /* replace with next chain */ if (rp == rdy_tail[q]) /* queue tail removed */ rdy_tail[q] = prev_xp; /* set new tail */ + +#if DEBUG_SCHED_CHECK + rp->p_ready = 0; + CHECK_RUNQUEUES; +#endif if (rp == proc_ptr || rp == next_ptr) /* active process removed */ - pick_proc(); /* pick new process to run */ + pick_proc(); /* pick new process to run */ break; } prev_xp = *xpp; /* save previous in chain */ } #if DEBUG_SCHED_CHECK - rp->p_ready = 0; CHECK_RUNQUEUES; #endif + + NOREC_RETURN(dequeuefunc, ); } /*===========================================================================* @@ -1249,25 +1204,29 @@ PRIVATE void pick_proc() * clock task can tell who to bill for system time. */ register struct proc *rp; /* process to run */ - int q; /* iterate over queues */ + int q; /* iterate over queues */ + + NOREC_ENTER(pick); /* Check each of the scheduling queues for ready processes. The number of * queues is defined in proc.h, and priorities are set in the task table. * The lowest queue contains IDLE, which is always ready. */ for (q=0; q < NR_SCHED_QUEUES; q++) { - if ( (rp = rdy_head[q]) != NIL_PROC) { - next_ptr = rp; /* run process 'rp' next */ -#if 0 - if(rp->p_endpoint != 4 && rp->p_endpoint != 5 && rp->p_endpoint != IDLE && rp->p_endpoint != SYSTEM) - kprintf("[run %s]", rp->p_name); -#endif - if (priv(rp)->s_flags & BILLABLE) - bill_ptr = rp; /* bill for system time */ - return; - } + int found = 0; + if(!(rp = rdy_head[q])) { + TRACE(VF_PICKPROC, printf("queue %d empty\n", q);); + continue; + } + TRACE(VF_PICKPROC, printf("found %s / %d on queue %d\n", + rp->p_name, rp->p_endpoint, q);); + next_ptr = rp; /* run process 'rp' next */ + vmassert(proc_ptr != next_ptr); + vmassert(!next_ptr->p_rts_flags); + if (priv(rp)->s_flags & BILLABLE) + bill_ptr = rp; /* bill for system time */ + NOREC_RETURN(pick, ); } - minix_panic("no ready process", NO_NUM); } /*===========================================================================* @@ -1286,9 +1245,11 @@ timer_t *tp; /* watchdog timer pointer */ clock_t next_period; /* time of next period */ int ticks_added = 0; /* total time added */ + vmassert(!intr_disabled()); + + lock; for (rp=BEG_PROC_ADDR; rpp_priority > rp->p_max_priority) { /* update priority? */ if (rp->p_rts_flags == 0) dequeue(rp); /* take off queue */ ticks_added += rp->p_quantum_size; /* do accounting */ @@ -1299,12 +1260,9 @@ timer_t *tp; /* watchdog timer pointer */ ticks_added += rp->p_quantum_size - rp->p_ticks_left; rp->p_ticks_left = rp->p_quantum_size; /* give new quantum */ } - unlock; } } -#if DEBUG - kprintf("ticks_added: %d\n", ticks_added); -#endif + unlock; /* Now schedule a new watchdog timer to balance the queues again. The * period depends on the total amount of quantum ticks added. @@ -1328,37 +1286,6 @@ message *m_ptr; /* pointer to message buffer */ return(result); } -/*===========================================================================* - * lock_enqueue * - *===========================================================================*/ -PUBLIC void lock_enqueue(rp) -struct proc *rp; /* this process is now runnable */ -{ -/* Safe gateway to enqueue() for tasks. */ - lock; - enqueue(rp); - unlock; -} - -/*===========================================================================* - * lock_dequeue * - *===========================================================================*/ -PUBLIC void lock_dequeue(rp) -struct proc *rp; /* this process is no longer runnable */ -{ -/* Safe gateway to dequeue() for tasks. */ - if (k_reenter >= 0) { - /* We're in an exception or interrupt, so don't lock (and ... - * don't unlock). - */ - dequeue(rp); - } else { - lock; - dequeue(rp); - unlock; - } -} - /*===========================================================================* * endpoint_lookup * *===========================================================================*/ @@ -1401,24 +1328,18 @@ int *p, fatalflag; *p = _ENDPOINT_P(e); if(!isokprocn(*p)) { #if DEBUG_ENABLE_IPC_WARNINGS -#if 0 kprintf("kernel:%s:%d: bad endpoint %d: proc %d out of range\n", file, line, e, *p); -#endif #endif } else if(isemptyn(*p)) { -#if DEBUG_ENABLE_IPC_WARNINGS #if 0 kprintf("kernel:%s:%d: bad endpoint %d: proc %d empty\n", file, line, e, *p); -#endif #endif } else if(proc_addr(*p)->p_endpoint != e) { #if DEBUG_ENABLE_IPC_WARNINGS -#if 0 kprintf("kernel:%s:%d: bad endpoint %d: proc %d has ept %d (generation %d vs. %d)\n", file, line, e, *p, proc_addr(*p)->p_endpoint, _ENDPOINT_G(e), _ENDPOINT_G(proc_addr(*p)->p_endpoint)); -#endif #endif } else ok = 1; if(!ok && fatalflag) { diff --git a/kernel/proc.h b/kernel/proc.h index aa3752f00..ac07514b0 100755 --- a/kernel/proc.h +++ b/kernel/proc.h @@ -10,6 +10,7 @@ * struct proc, be sure to change sconst.h to match. */ #include +#include #include "const.h" #include "priv.h" @@ -39,7 +40,6 @@ struct proc { struct proc *p_nextready; /* pointer to next ready process */ struct proc *p_caller_q; /* head of list of procs wishing to send */ struct proc *p_q_link; /* link to next proc wishing to send */ - message *p_messbuf; /* pointer to passed message buffer */ int p_getfrom_e; /* from whom does process want to receive? */ int p_sendto_e; /* to whom does process want to send? */ @@ -49,6 +49,11 @@ struct proc { endpoint_t p_endpoint; /* endpoint number, generation-aware */ + message p_sendmsg; /* Message from this process if SENDING */ + message p_delivermsg; /* Message for this process if MF_DELIVERMSG */ + vir_bytes p_delivermsg_vir; /* Virtual addr this proc wants message at */ + vir_bytes p_delivermsg_lin; /* Linear addr this proc wants message at */ + /* If handler functions detect a process wants to do something with * memory that isn't present, VM has to fix it. Until it has asked * what needs to be done and fixed it, save necessary state here. @@ -60,28 +65,12 @@ struct proc { struct proc *nextrestart; /* next in vmrestart chain */ struct proc *nextrequestor; /* next in vmrequest chain */ #define VMSTYPE_SYS_NONE 0 -#define VMSTYPE_SYS_MESSAGE 1 -#define VMSTYPE_SYS_CALL 2 -#define VMSTYPE_MSGCOPY 3 +#define VMSTYPE_KERNELCALL 1 +#define VMSTYPE_DELIVERMSG 2 int type; /* suspended operation */ union { /* VMSTYPE_SYS_MESSAGE */ message reqmsg; /* suspended request message */ - - /* VMSTYPE_SYS_CALL */ - struct { - int call_nr; - message *m_ptr; - int src_dst_e; - long bit_map; - } sys_call; - - /* VMSTYPE_MSGCOPY */ - struct { - struct proc *dst; - vir_bytes dst_v; - message msgbuf; - } msgcopy; } saved; /* Parameters of request to VM */ @@ -92,10 +81,9 @@ struct proc { /* VM result when available */ int vmresult; - /* Target gets this set. (But caller and target can be - * the same, so we can't put this in the 'saved' union.) - */ - struct proc *requestor; +#if DEBUG_VMASSERT + char stacktrace[200]; +#endif /* If the suspended operation is a sys_call, its details are * stored here. @@ -110,21 +98,26 @@ struct proc { #define PMAGIC 0xC0FFEE1 int p_magic; /* check validity of proc pointers */ #endif + +#if DEBUG_TRACE + int p_schedules; +#endif }; /* Bits for the runtime flags. A process is runnable iff p_rts_flags == 0. */ -#define SLOT_FREE 0x01 /* process slot is free */ -#define NO_PRIORITY 0x02 /* process has been stopped */ -#define SENDING 0x04 /* process blocked trying to send */ -#define RECEIVING 0x08 /* process blocked trying to receive */ -#define SIGNALED 0x10 /* set when new kernel signal arrives */ -#define SIG_PENDING 0x20 /* unready while signal being processed */ -#define P_STOP 0x40 /* set when process is being traced */ -#define NO_PRIV 0x80 /* keep forked system process from running */ -#define NO_ENDPOINT 0x100 /* process cannot send or receive messages */ -#define VMINHIBIT 0x200 /* not scheduled until pagetable set by VM */ -#define PAGEFAULT 0x400 /* process has unhandled pagefault */ -#define VMREQUEST 0x800 /* originator of vm memory request */ +#define SLOT_FREE 0x01 /* process slot is free */ +#define NO_PRIORITY 0x02 /* process has been stopped */ +#define SENDING 0x04 /* process blocked trying to send */ +#define RECEIVING 0x08 /* process blocked trying to receive */ +#define SIGNALED 0x10 /* set when new kernel signal arrives */ +#define SIG_PENDING 0x20 /* unready while signal being processed */ +#define P_STOP 0x40 /* set when process is being traced */ +#define NO_PRIV 0x80 /* keep forked system process from running */ +#define NO_ENDPOINT 0x100 /* process cannot send or receive messages */ +#define VMINHIBIT 0x200 /* not scheduled until pagetable set by VM */ +#define PAGEFAULT 0x400 /* process has unhandled pagefault */ +#define VMREQUEST 0x800 /* originator of vm memory request */ +#define VMREQTARGET 0x1000 /* target of vm memory request */ /* These runtime flags can be tested and manipulated by these macros. */ @@ -134,49 +127,62 @@ struct proc { /* Set flag and dequeue if the process was runnable. */ #define RTS_SET(rp, f) \ do { \ + vmassert(intr_disabled()); \ if(!(rp)->p_rts_flags) { dequeue(rp); } \ (rp)->p_rts_flags |= (f); \ + vmassert(intr_disabled()); \ } while(0) /* Clear flag and enqueue if the process was not runnable but is now. */ #define RTS_UNSET(rp, f) \ do { \ int rts; \ - rts = (rp)->p_rts_flags; \ + vmassert(intr_disabled()); \ + rts = (rp)->p_rts_flags; \ (rp)->p_rts_flags &= ~(f); \ if(rts && !(rp)->p_rts_flags) { enqueue(rp); } \ + vmassert(intr_disabled()); \ } while(0) /* Set flag and dequeue if the process was runnable. */ #define RTS_LOCK_SET(rp, f) \ do { \ - if(!(rp)->p_rts_flags) { lock_dequeue(rp); } \ + int u = 0; \ + if(!intr_disabled()) { u = 1; lock; } \ + if(!(rp)->p_rts_flags) { dequeue(rp); } \ (rp)->p_rts_flags |= (f); \ + if(u) { unlock; } \ } while(0) /* Clear flag and enqueue if the process was not runnable but is now. */ #define RTS_LOCK_UNSET(rp, f) \ do { \ int rts; \ - rts = (rp)->p_rts_flags; \ + int u = 0; \ + if(!intr_disabled()) { u = 1; lock; } \ + rts = (rp)->p_rts_flags; \ (rp)->p_rts_flags &= ~(f); \ - if(rts && !(rp)->p_rts_flags) { lock_enqueue(rp); } \ + if(rts && !(rp)->p_rts_flags) { enqueue(rp); } \ + if(u) { unlock; } \ } while(0) /* Set flags to this value. */ #define RTS_LOCK_SETFLAGS(rp, f) \ do { \ - if(!(rp)->p_rts_flags && (f)) { lock_dequeue(rp); } \ - (rp)->p_rts_flags = (f); \ + int u = 0; \ + if(!intr_disabled()) { u = 1; lock; } \ + if(!(rp)->p_rts_flags && (f)) { dequeue(rp); } \ + (rp)->p_rts_flags = (f); \ + if(u) { unlock; } \ } while(0) /* Misc flags */ -#define REPLY_PENDING 0x01 /* reply to IPC_REQUEST is pending */ -#define VIRT_TIMER 0x02 /* process-virtual timer is running */ -#define PROF_TIMER 0x04 /* process-virtual profile timer is running */ -#define MF_VM 0x08 /* process uses VM */ +#define MF_REPLY_PEND 0x01 /* reply to IPC_REQUEST is pending */ +#define MF_VIRT_TIMER 0x02 /* process-virtual timer is running */ +#define MF_PROF_TIMER 0x04 /* process-virtual profile timer is running */ #define MF_ASYNMSG 0x10 /* Asynchrous message pending */ #define MF_FULLVM 0x20 +#define MF_DELIVERMSG 0x40 /* Copy message for him before running */ /* Scheduling priorities for p_priority. Values must start at zero (highest * priority) and increment. Priorities of the processes in the boot image diff --git a/kernel/proto.h b/kernel/proto.h index 7536765c5..929950585 100755 --- a/kernel/proto.h +++ b/kernel/proto.h @@ -33,13 +33,12 @@ _PROTOTYPE( int sys_call, (int call_nr, int src_dst, message *m_ptr, long bit_map) ); _PROTOTYPE( void sys_call_restart, (struct proc *caller) ); _PROTOTYPE( int lock_notify, (int src, int dst) ); -_PROTOTYPE( int soft_notify, (int dst) ); +_PROTOTYPE( int mini_notify, (struct proc *src, endpoint_t dst) ); _PROTOTYPE( int lock_send, (int dst, message *m_ptr) ); -_PROTOTYPE( void lock_enqueue, (struct proc *rp) ); -_PROTOTYPE( void lock_dequeue, (struct proc *rp) ); _PROTOTYPE( void enqueue, (struct proc *rp) ); _PROTOTYPE( void dequeue, (struct proc *rp) ); _PROTOTYPE( void balance_queues, (struct timer *tp) ); +_PROTOTYPE( void schedcheck, (void) ); _PROTOTYPE( struct proc *endpoint_lookup, (endpoint_t ep) ); #if DEBUG_ENABLE_IPC_WARNINGS _PROTOTYPE( int isokendpt_f, (char *file, int line, endpoint_t e, int *p, int f)); @@ -91,6 +90,8 @@ _PROTOTYPE( void cons_seth, (int pos, int n) ); #define CHECK_RUNQUEUES check_runqueues_f(__FILE__, __LINE__) _PROTOTYPE( void check_runqueues_f, (char *file, int line) ); #endif +_PROTOTYPE( char *rtsflagstr, (int flags) ); +_PROTOTYPE( char *miscflagstr, (int flags) ); /* system/do_safecopy.c */ _PROTOTYPE( int verify_grant, (endpoint_t, endpoint_t, cp_grant_id_t, vir_bytes, @@ -106,18 +107,21 @@ _PROTOTYPE( void stop_profile_clock, (void) ); #endif /* functions defined in architecture-dependent files. */ -_PROTOTYPE( void phys_copy, (phys_bytes source, phys_bytes dest, +_PROTOTYPE( phys_bytes phys_copy, (phys_bytes source, phys_bytes dest, phys_bytes count) ); +_PROTOTYPE( void phys_copy_fault, (void)); #define virtual_copy(src, dst, bytes) virtual_copy_f(src, dst, bytes, 0) #define virtual_copy_vmcheck(src, dst, bytes) virtual_copy_f(src, dst, bytes, 1) _PROTOTYPE( int virtual_copy_f, (struct vir_addr *src, struct vir_addr *dst, vir_bytes bytes, int vmcheck) ); _PROTOTYPE( int data_copy, (endpoint_t from, vir_bytes from_addr, endpoint_t to, vir_bytes to_addr, size_t bytes)); +_PROTOTYPE( int data_copy_vmcheck, (endpoint_t from, vir_bytes from_addr, + endpoint_t to, vir_bytes to_addr, size_t bytes)); #define data_copy_to(d, p, v, n) data_copy(SYSTEM, (d), (p), (v), (n)); #define data_copy_from(d, p, v, n) data_copy((p), (v), SYSTEM, (d), (n)); _PROTOTYPE( void alloc_segments, (struct proc *rp) ); -_PROTOTYPE( void vm_init, (void) ); +_PROTOTYPE( void vm_init, (struct proc *first) ); _PROTOTYPE( void vm_map_range, (u32_t base, u32_t size, u32_t offset) ); _PROTOTYPE( int vm_copy, (vir_bytes src, struct proc *srcproc, vir_bytes dst, struct proc *dstproc, phys_bytes bytes)); @@ -130,7 +134,7 @@ _PROTOTYPE( phys_bytes umap_remote, (struct proc* rp, int seg, _PROTOTYPE( phys_bytes umap_virtual, (struct proc* rp, int seg, vir_bytes vir_addr, vir_bytes bytes) ); _PROTOTYPE( phys_bytes seg2phys, (U16_t) ); -_PROTOTYPE( void phys_memset, (phys_bytes source, unsigned long pattern, +_PROTOTYPE( int vm_phys_memset, (phys_bytes source, u8_t pattern, phys_bytes count) ); _PROTOTYPE( vir_bytes alloc_remote_segment, (u32_t *, segframe_t *, int, phys_bytes, vir_bytes, int)); @@ -164,5 +168,10 @@ _PROTOTYPE( int vm_checkrange, (struct proc *caller, struct proc *target, vir_bytes start, vir_bytes length, int writeflag, int checkonly)); _PROTOTYPE( void proc_stacktrace, (struct proc *proc) ); _PROTOTYPE( int vm_lookup, (struct proc *proc, vir_bytes virtual, vir_bytes *result, u32_t *ptent)); +_PROTOTYPE( int vm_suspend, (struct proc *caller, struct proc *target, + phys_bytes lin, phys_bytes size, int wrflag, int type)); +_PROTOTYPE( int delivermsg, (struct proc *target)); +_PROTOTYPE( phys_bytes arch_switch_copymsg, (struct proc *rp, message *m, + phys_bytes lin)); #endif /* PROTO_H */ diff --git a/kernel/system.c b/kernel/system.c index 992770ee7..cdc4cc656 100755 --- a/kernel/system.c +++ b/kernel/system.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -58,7 +59,6 @@ char *callnames[NR_SYS_CALLS]; call_vec[(call_nr-KERNEL_CALL)] = (handler) FORWARD _PROTOTYPE( void initialize, (void)); -FORWARD _PROTOTYPE( void softnotify_check, (void)); FORWARD _PROTOTYPE( struct proc *vmrestart_check, (message *)); /*===========================================================================* @@ -77,26 +77,18 @@ PUBLIC void sys_task() /* Initialize the system task. */ initialize(); + while (TRUE) { struct proc *restarting; restarting = vmrestart_check(&m); - softnotify_check(); - if(softnotify) - minix_panic("softnotify non-NULL before receive (1)", NO_NUM); if(!restarting) { int r; /* Get work. Block and wait until a request message arrives. */ - if(softnotify) - minix_panic("softnotify non-NULL before receive (2)", NO_NUM); if((r=receive(ANY, &m)) != OK) minix_panic("receive() failed", r); - if(m.m_source == SYSTEM) - continue; - if(softnotify) - minix_panic("softnotify non-NULL after receive", NO_NUM); - } + } sys_call_code = (unsigned) m.m_type; call_nr = sys_call_code - KERNEL_CALL; @@ -104,37 +96,13 @@ PUBLIC void sys_task() okendpt(who_e, &who_p); caller_ptr = proc_addr(who_p); - if (caller_ptr->p_endpoint == ipc_stats_target) - sys_stats.total= add64u(sys_stats.total, 1); - /* See if the caller made a valid request and try to handle it. */ if (call_nr < 0 || call_nr >= NR_SYS_CALLS) { /* check call number */ -#if DEBUG_ENABLE_IPC_WARNINGS kprintf("SYSTEM: illegal request %d from %d.\n", call_nr,m.m_source); -#endif - if (caller_ptr->p_endpoint == ipc_stats_target) - sys_stats.bad_req++; result = EBADREQUEST; /* illegal message type */ } else if (!GET_BIT(priv(caller_ptr)->s_k_call_mask, call_nr)) { -#if DEBUG_ENABLE_IPC_WARNINGS - static int curr= 0, limit= 100, extra= 20; - - if (curr < limit+extra) - { - kprintf("SYSTEM: request %d from %d denied.\n", - call_nr, m.m_source); - } else if (curr == limit+extra) - { - kprintf("sys_task: no debug output for a while\n"); - } - else if (curr == 2*limit-1) - limit *= 2; - curr++; -#endif - if (caller_ptr->p_endpoint == ipc_stats_target) - sys_stats.not_allowed++; result = ECALLDENIED; /* illegal message type */ } else { @@ -146,15 +114,20 @@ PUBLIC void sys_task() * until VM tells us it's allowed. VM has been notified * and we must wait for its reply to restart the call. */ + vmassert(RTS_ISSET(caller_ptr, VMREQUEST)); + vmassert(caller_ptr->p_vmrequest.type == VMSTYPE_KERNELCALL); memcpy(&caller_ptr->p_vmrequest.saved.reqmsg, &m, sizeof(m)); - caller_ptr->p_vmrequest.type = VMSTYPE_SYS_MESSAGE; } else if (result != EDONTREPLY) { /* Send a reply, unless inhibited by a handler function. * Use the kernel function lock_send() to prevent a system * call trap. */ - if(restarting) - RTS_LOCK_UNSET(restarting, VMREQUEST); + if(restarting) { + vmassert(!RTS_ISSET(restarting, VMREQUEST)); +#if 0 + vmassert(!RTS_ISSET(restarting, VMREQTARGET)); +#endif + } m.m_type = result; /* report status of call */ if(WILLRECEIVE(caller_ptr, SYSTEM)) { if (OK != (s=lock_send(m.m_source, &m))) { @@ -222,7 +195,6 @@ PRIVATE void initialize(void) map(SYS_NEWMAP, do_newmap); /* set up a process memory map */ map(SYS_SEGCTL, do_segctl); /* add segment and get selector */ map(SYS_MEMSET, do_memset); /* write char to memory area */ - map(SYS_VM_SETBUF, do_vm_setbuf); /* PM passes buffer for page tables */ map(SYS_VMCTL, do_vmctl); /* various VM process settings */ /* Copying. */ @@ -350,7 +322,11 @@ PUBLIC void send_sig(int proc_nr, int sig_nr) rp = proc_addr(proc_nr); sigaddset(&priv(rp)->s_sig_pending, sig_nr); - soft_notify(rp->p_endpoint); + if(!intr_disabled()) { + lock_notify(SYSTEM, rp->p_endpoint); + } else { + mini_notify(proc_addr(SYSTEM), rp->p_endpoint); + } } /*===========================================================================* @@ -467,7 +443,9 @@ register struct proc *rc; /* slot of process to clean up */ if(isemptyp(rc)) minix_panic("clear_proc: empty process", rc->p_endpoint); - if(rc->p_endpoint == PM_PROC_NR || rc->p_endpoint == VFS_PROC_NR) { + if(rc->p_endpoint == PM_PROC_NR || rc->p_endpoint == VFS_PROC_NR || + rc->p_endpoint == VM_PROC_NR) + { /* This test is great for debugging system processes dying, * but as this happens normally on reboot, not good permanent code. */ @@ -543,13 +521,6 @@ register struct proc *rc; /* slot of process to clean up */ #endif } } - - /* No pending soft notifies. */ - for(np = softnotify; np; np = np->next_soft_notify) { - if(np == rc) { - minix_panic("dying proc was on next_soft_notify", np->p_endpoint); - } - } } /*===========================================================================* @@ -583,28 +554,6 @@ int access; /* does grantee want to CPF_READ or _WRITE? */ return umap_virtual(proc_addr(proc_nr), D, v_offset, bytes); } -/*===========================================================================* - * softnotify_check * - *===========================================================================*/ -PRIVATE void softnotify_check(void) -{ - struct proc *np, *nextnp; - - if(!softnotify) - return; - - for(np = softnotify; np; np = nextnp) { - if(!np->p_softnotified) - minix_panic("softnotify but no p_softnotified", NO_NUM); - lock_notify(SYSTEM, np->p_endpoint); - nextnp = np->next_soft_notify; - np->next_soft_notify = NULL; - np->p_softnotified = 0; - } - - softnotify = NULL; -} - /*===========================================================================* * vmrestart_check * *===========================================================================*/ @@ -618,23 +567,18 @@ PRIVATE struct proc *vmrestart_check(message *m) if(!(restarting = vmrestart)) return NULL; - if(restarting->p_rts_flags & SLOT_FREE) - minix_panic("SYSTEM: VMREQUEST set for empty process", NO_NUM); + vmassert(!RTS_ISSET(restarting, SLOT_FREE)); + vmassert(RTS_ISSET(restarting, VMREQUEST)); type = restarting->p_vmrequest.type; restarting->p_vmrequest.type = VMSTYPE_SYS_NONE; vmrestart = restarting->p_vmrequest.nextrestart; - if(!RTS_ISSET(restarting, VMREQUEST)) - minix_panic("SYSTEM: VMREQUEST not set for process on vmrestart queue", - restarting->p_endpoint); - switch(type) { - case VMSTYPE_SYS_MESSAGE: + case VMSTYPE_KERNELCALL: memcpy(m, &restarting->p_vmrequest.saved.reqmsg, sizeof(*m)); - if(m->m_source != restarting->p_endpoint) - minix_panic("SYSTEM: vmrestart source doesn't match", - NO_NUM); + restarting->p_vmrequest.saved.reqmsg.m_source = NONE; + vmassert(m->m_source == restarting->p_endpoint); /* Original caller could've disappeared in the meantime. */ if(!isokendpt(m->m_source, &who_p)) { kprintf("SYSTEM: ignoring call %d from dead %d\n", @@ -653,26 +597,6 @@ PRIVATE struct proc *vmrestart_check(message *m) } } return restarting; - case VMSTYPE_SYS_CALL: - kprintf("SYSTEM: restart sys_call\n"); - /* Restarting a kernel trap. */ - sys_call_restart(restarting); - - /* Handled; restart system loop. */ - return NULL; - case VMSTYPE_MSGCOPY: - /* Do delayed message copy. */ - if((r=data_copy(SYSTEM, - (vir_bytes) &restarting->p_vmrequest.saved.msgcopy.msgbuf, - restarting->p_vmrequest.saved.msgcopy.dst->p_endpoint, - (vir_bytes) restarting->p_vmrequest.saved.msgcopy.dst_v, - sizeof(message))) != OK) { - minix_panic("SYSTEM: delayed msgcopy failed", r); - } - RTS_LOCK_UNSET(restarting, VMREQUEST); - - /* Handled; restart system loop. */ - return NULL; default: minix_panic("strange restart type", type); } diff --git a/kernel/system.h b/kernel/system.h index 14f55df3c..d35c7a474 100644 --- a/kernel/system.h +++ b/kernel/system.h @@ -91,9 +91,6 @@ _PROTOTYPE( int do_memset, (message *m_ptr) ); #define do_memset do_unused #endif -_PROTOTYPE( int do_vm_setbuf, (message *m_ptr) ); -_PROTOTYPE( int do_vm_map, (message *m_ptr) ); - _PROTOTYPE( int do_abort, (message *m_ptr) ); #if ! USE_ABORT #define do_abort do_unused diff --git a/kernel/system/Makefile b/kernel/system/Makefile index 496663d1d..a93e64f53 100644 --- a/kernel/system/Makefile +++ b/kernel/system/Makefile @@ -52,7 +52,6 @@ OBJECTS = \ $(SYSTEM)(do_sigreturn.o) \ $(SYSTEM)(do_abort.o) \ $(SYSTEM)(do_getinfo.o) \ - $(SYSTEM)(do_vm_setbuf.o) \ $(SYSTEM)(do_sprofile.o) \ $(SYSTEM)(do_cprofile.o) \ $(SYSTEM)(do_profbuf.o) \ @@ -166,9 +165,6 @@ $(SYSTEM)(do_vm.o): do_vm.o do_vm.o: do_vm.c $(CC) do_vm.c -$(SYSTEM)(do_vm_setbuf.o): do_vm_setbuf.c - $(CC) do_vm_setbuf.c - $(SYSTEM)(do_sprofile.o): do_sprofile.c $(CC) do_sprofile.c diff --git a/kernel/system/do_devio.c b/kernel/system/do_devio.c index ee7e0a912..65834be38 100644 --- a/kernel/system/do_devio.c +++ b/kernel/system/do_devio.c @@ -63,19 +63,8 @@ register message *m_ptr; /* pointer to request message */ } if (i >= nr_io_range) { - static int curr= 0, limit= 100, extra= 20; - - if (curr < limit+extra) - { kprintf("do_devio: port 0x%x (size %d) not allowed\n", m_ptr->DIO_PORT, size); - } else if (curr == limit+extra) - { - kprintf("do_devio: no debug output for a while\n"); - } - else if (curr == 2*limit-1) - limit *= 2; - curr++; return EPERM; } } @@ -83,19 +72,8 @@ register message *m_ptr; /* pointer to request message */ doit: if (m_ptr->DIO_PORT & (size-1)) { - static int curr= 0, limit= 100, extra= 20; - - if (curr < limit+extra) - { kprintf("do_devio: unaligned port 0x%x (size %d)\n", m_ptr->DIO_PORT, size); - } else if (curr == limit+extra) - { - kprintf("do_devio: no debug output for a while\n"); - } - else if (curr == 2*limit-1) - limit *= 2; - curr++; return EPERM; } diff --git a/kernel/system/do_exec.c b/kernel/system/do_exec.c index c37eeb7c7..a608c21cf 100644 --- a/kernel/system/do_exec.c +++ b/kernel/system/do_exec.c @@ -31,6 +31,11 @@ register message *m_ptr; /* pointer to request message */ rp = proc_addr(proc_nr); + if(rp->p_misc_flags & MF_DELIVERMSG) { + rp->p_misc_flags &= ~MF_DELIVERMSG; + rp->p_delivermsg_lin = 0; + } + /* Save command name for debugging, ps(1) output, etc. */ if(data_copy(who_e, (vir_bytes) m_ptr->PR_NAME_PTR, SYSTEM, (vir_bytes) rp->p_name, (phys_bytes) P_NAME_LEN - 1) != OK) diff --git a/kernel/system/do_fork.c b/kernel/system/do_fork.c index 2e5ee9135..aa94a3418 100644 --- a/kernel/system/do_fork.c +++ b/kernel/system/do_fork.c @@ -9,6 +9,7 @@ */ #include "../system.h" +#include "../vm.h" #include #include @@ -33,10 +34,25 @@ register message *m_ptr; /* pointer to request message */ if(!isokendpt(m_ptr->PR_ENDPT, &p_proc)) return EINVAL; + rpp = proc_addr(p_proc); rpc = proc_addr(m_ptr->PR_SLOT); if (isemptyp(rpp) || ! isemptyp(rpc)) return(EINVAL); + vmassert(!(rpp->p_misc_flags & MF_DELIVERMSG)); + + /* needs to be receiving so we know where the message buffer is */ + if(!RTS_ISSET(rpp, RECEIVING)) { + printf("kernel: fork not done synchronously?\n"); + return EINVAL; + } + + /* memory becomes readonly */ + if (priv(rpp)->s_asynsize > 0) { + printf("kernel: process with waiting asynsend table can't fork\n"); + return EINVAL; + } + map_ptr= (struct mem_map *) m_ptr->PR_MEM_PTR; /* Copy parent 'proc' struct to child. And reinitialize some fields. */ @@ -59,7 +75,7 @@ register message *m_ptr; /* pointer to request message */ rpc->p_reg.psw &= ~TRACEBIT; /* clear trace bit */ - rpc->p_misc_flags &= ~(VIRT_TIMER | PROF_TIMER); + rpc->p_misc_flags &= ~(MF_VIRT_TIMER | MF_PROF_TIMER); rpc->p_virt_left = 0; /* disable, clear the process-virtual timers */ rpc->p_prof_left = 0; @@ -81,9 +97,11 @@ register message *m_ptr; /* pointer to request message */ /* Calculate endpoint identifier, so caller knows what it is. */ m_ptr->PR_ENDPT = rpc->p_endpoint; + m_ptr->PR_FORK_MSGADDR = (char *) rpp->p_delivermsg_vir; /* Install new map */ r = newmap(rpc, map_ptr); + FIXLINMSG(rpc); /* Don't schedule process in VM mode until it has a new pagetable. */ if(m_ptr->PR_FORK_FLAGS & PFF_VMINHIBIT) { diff --git a/kernel/system/do_getinfo.c b/kernel/system/do_getinfo.c index e40889e1c..ecafdc2cb 100644 --- a/kernel/system/do_getinfo.c +++ b/kernel/system/do_getinfo.c @@ -28,9 +28,8 @@ register message *m_ptr; /* pointer to request message */ */ size_t length; vir_bytes src_vir; - int proc_nr, nr_e, nr; + int proc_nr, nr_e, nr, r; struct proc *caller; - phys_bytes ph; int wipe_rnd_bin = -1; caller = proc_addr(who_p); @@ -67,19 +66,6 @@ register message *m_ptr; /* pointer to request message */ src_vir = (vir_bytes) irq_hooks; break; } - case GET_SCHEDINFO: { - /* This is slightly complicated because we need two data structures - * at once, otherwise the scheduling information may be incorrect. - * Copy the queue heads and fall through to copy the process table. - */ - if((ph=umap_local(caller, D, (vir_bytes) m_ptr->I_VAL_PTR2,length)) == 0) - return EFAULT; - length = sizeof(struct proc *) * NR_SCHED_QUEUES; - CHECKRANGE_OR_SUSPEND(proc_addr(who_p), ph, length, 1); - data_copy(SYSTEM, (vir_bytes) rdy_head, - who_e, (vir_bytes) m_ptr->I_VAL_PTR2, length); - /* fall through to GET_PROCTAB */ - } case GET_PROCTAB: { length = sizeof(struct proc) * (NR_PROCS + NR_TASKS); src_vir = (vir_bytes) proc; @@ -174,15 +160,16 @@ register message *m_ptr; /* pointer to request message */ /* Try to make the actual copy for the requested data. */ if (m_ptr->I_VAL_LEN > 0 && length > m_ptr->I_VAL_LEN) return (E2BIG); - if((ph=umap_local(caller, D, (vir_bytes) m_ptr->I_VAL_PTR,length)) == 0) - return EFAULT; - CHECKRANGE_OR_SUSPEND(caller, ph, length, 1); - if(data_copy(SYSTEM, src_vir, who_e, (vir_bytes) m_ptr->I_VAL_PTR, length) == OK) { + r = data_copy_vmcheck(SYSTEM, src_vir, who_e, + (vir_bytes) m_ptr->I_VAL_PTR, length); + + if(r != OK) return r; + if(wipe_rnd_bin >= 0 && wipe_rnd_bin < RANDOM_SOURCES) { krandom.bin[wipe_rnd_bin].r_size = 0; krandom.bin[wipe_rnd_bin].r_next = 0; } - } + return(OK); } diff --git a/kernel/system/do_irqctl.c b/kernel/system/do_irqctl.c index 041b77b4c..bc3a43324 100644 --- a/kernel/system/do_irqctl.c +++ b/kernel/system/do_irqctl.c @@ -139,10 +139,16 @@ irq_hook_t *hook; */ int proc_nr; + vmassert(intr_disabled()); + /* As a side-effect, the interrupt handler gathers random information by * timestamping the interrupt events. This is used for /dev/random. */ +#if 0 get_randomness(&krandom, hook->irq); +#else + FIXME("get_randomness disabled"); +#endif /* Check if the handler is still alive. * If it's dead, this should never happen, as processes that die @@ -158,7 +164,8 @@ irq_hook_t *hook; priv(proc_addr(proc_nr))->s_int_pending |= (1 << hook->notify_id); /* Build notification message and return. */ - lock_notify(HARDWARE, hook->proc_nr_e); + vmassert(intr_disabled()); + mini_notify(proc_addr(HARDWARE), hook->proc_nr_e); return(hook->policy & IRQ_REENABLE); } diff --git a/kernel/system/do_memset.c b/kernel/system/do_memset.c index 511507042..1359112b6 100644 --- a/kernel/system/do_memset.c +++ b/kernel/system/do_memset.c @@ -8,6 +8,7 @@ */ #include "../system.h" +#include "../vm.h" #if USE_MEMSET @@ -18,10 +19,8 @@ PUBLIC int do_memset(m_ptr) register message *m_ptr; { /* Handle sys_memset(). This writes a pattern into the specified memory. */ - unsigned long p; unsigned char c = m_ptr->MEM_PATTERN; - p = c | (c << 8) | (c << 16) | (c << 24); - phys_memset((phys_bytes) m_ptr->MEM_PTR, p, (phys_bytes) m_ptr->MEM_COUNT); + vm_phys_memset((phys_bytes) m_ptr->MEM_PTR, c, (phys_bytes) m_ptr->MEM_COUNT); return(OK); } diff --git a/kernel/system/do_safecopy.c b/kernel/system/do_safecopy.c index 55744b714..2e7d7f8ee 100644 --- a/kernel/system/do_safecopy.c +++ b/kernel/system/do_safecopy.c @@ -61,22 +61,11 @@ endpoint_t *e_granter; /* new granter (magic grants) */ if(!HASGRANTTABLE(granter_proc)) return EPERM; if(priv(granter_proc)->s_grant_entries <= grant) { - static int curr= 0, limit= 100, extra= 20; - - if (curr < limit+extra) - { kprintf( "verify_grant: grant verify failed in ep %d proc %d: " "grant %d out of range for table size %d\n", granter, proc_nr, grant, priv(granter_proc)->s_grant_entries); - } else if (curr == limit+extra) - { - kprintf("verify_grant: no debug output for a while\n"); - } - else if (curr == 2*limit-1) - limit *= 2; - curr++; return(EPERM); } @@ -219,23 +208,9 @@ int access; /* CPF_READ for a copy from granter to grantee, CPF_WRITE /* Verify permission exists. */ if((r=verify_grant(granter, grantee, grantid, bytes, access, g_offset, &v_offset, &new_granter)) != OK) { - static int curr= 0, limit= 100, extra= 20; - - if (curr < limit+extra) - { -#if 0 kprintf( "grant %d verify to copy %d->%d by %d failed: err %d\n", grantid, *src, *dst, grantee, r); -#endif - } else if (curr == limit+extra) - { - kprintf( - "do_safecopy`safecopy: no debug output for a while\n"); - } - else if (curr == 2*limit-1) - limit *= 2; - curr++; return r; } diff --git a/kernel/system/do_sigsend.c b/kernel/system/do_sigsend.c index ba340dec4..879a05ad0 100644 --- a/kernel/system/do_sigsend.c +++ b/kernel/system/do_sigsend.c @@ -29,18 +29,13 @@ message *m_ptr; /* pointer to request message */ struct sigcontext sc, *scp; struct sigframe fr, *frp; int proc_nr, r; - phys_bytes ph; if (!isokendpt(m_ptr->SIG_ENDPT, &proc_nr)) return(EINVAL); if (iskerneln(proc_nr)) return(EPERM); rp = proc_addr(proc_nr); - ph = umap_local(proc_addr(who_p), D, (vir_bytes) m_ptr->SIG_CTXT_PTR, sizeof(struct sigmsg)); - if(!ph) return EFAULT; - CHECKRANGE_OR_SUSPEND(proc_addr(who_p), ph, sizeof(struct sigmsg), 1); - /* Get the sigmsg structure into our address space. */ - if((r=data_copy(who_e, (vir_bytes) m_ptr->SIG_CTXT_PTR, + if((r=data_copy_vmcheck(who_e, (vir_bytes) m_ptr->SIG_CTXT_PTR, SYSTEM, (vir_bytes) &smsg, (phys_bytes) sizeof(struct sigmsg))) != OK) return r; @@ -54,12 +49,9 @@ message *m_ptr; /* pointer to request message */ sc.sc_flags = 0; /* unused at this time */ sc.sc_mask = smsg.sm_mask; - ph = umap_local(rp, D, (vir_bytes) scp, sizeof(struct sigcontext)); - if(!ph) return EFAULT; - CHECKRANGE_OR_SUSPEND(rp, ph, sizeof(struct sigcontext), 1); /* Copy the sigcontext structure to the user's stack. */ - if((r=data_copy(SYSTEM, (vir_bytes) &sc, m_ptr->SIG_ENDPT, (vir_bytes) scp, - (vir_bytes) sizeof(struct sigcontext))) != OK) + if((r=data_copy_vmcheck(SYSTEM, (vir_bytes) &sc, m_ptr->SIG_ENDPT, + (vir_bytes) scp, (vir_bytes) sizeof(struct sigcontext))) != OK) return r; /* Initialize the sigframe structure. */ @@ -73,11 +65,9 @@ message *m_ptr; /* pointer to request message */ fr.sf_signo = smsg.sm_signo; fr.sf_retadr = (void (*)()) smsg.sm_sigreturn; - ph = umap_local(rp, D, (vir_bytes) frp, sizeof(struct sigframe)); - if(!ph) return EFAULT; - CHECKRANGE_OR_SUSPEND(rp, ph, sizeof(struct sigframe), 1); /* Copy the sigframe structure to the user's stack. */ - if((r=data_copy(SYSTEM, (vir_bytes) &fr, m_ptr->SIG_ENDPT, (vir_bytes) frp, + if((r=data_copy_vmcheck(SYSTEM, (vir_bytes) &fr, + m_ptr->SIG_ENDPT, (vir_bytes) frp, (vir_bytes) sizeof(struct sigframe))) != OK) return r; diff --git a/kernel/system/do_sysctl.c b/kernel/system/do_sysctl.c index ff35a6621..69dc4f862 100644 --- a/kernel/system/do_sysctl.c +++ b/kernel/system/do_sysctl.c @@ -16,7 +16,6 @@ PUBLIC int do_sysctl(m_ptr) register message *m_ptr; /* pointer to request message */ { - phys_bytes ph; vir_bytes len, buf; static char mybuf[DIAG_BUFSIZE]; struct proc *caller, *target; @@ -33,10 +32,7 @@ register message *m_ptr; /* pointer to request message */ caller->p_endpoint, len); return EINVAL; } - if((ph=umap_local(caller, D, buf, len)) == 0) - return EFAULT; - CHECKRANGE_OR_SUSPEND(caller, ph, len, 1); - if((s=data_copy(who_e, buf, SYSTEM, (vir_bytes) mybuf, len)) != OK) { + if((s=data_copy_vmcheck(who_e, buf, SYSTEM, (vir_bytes) mybuf, len)) != OK) { kprintf("do_sysctl: diag for %d: len %d: copy failed: %d\n", caller->p_endpoint, len, s); return s; diff --git a/kernel/system/do_umap.c b/kernel/system/do_umap.c index 7c235ba46..39d59fc1f 100644 --- a/kernel/system/do_umap.c +++ b/kernel/system/do_umap.c @@ -48,19 +48,15 @@ register message *m_ptr; /* pointer to request message */ case LOCAL_SEG: phys_addr = lin_addr = umap_local(targetpr, seg_index, offset, count); if(!lin_addr) return EFAULT; - CHECKRANGE_OR_SUSPEND(targetpr, lin_addr, count, 1); naughty = 1; break; case REMOTE_SEG: phys_addr = lin_addr = umap_remote(targetpr, seg_index, offset, count); if(!lin_addr) return EFAULT; - CHECKRANGE_OR_SUSPEND(targetpr, lin_addr, count, 1); naughty = 1; break; - case GRANT_SEG: - naughty = 1; case LOCAL_VM_SEG: - if(seg_index == MEM_GRANT || seg_type == GRANT_SEG) { + if(seg_index == MEM_GRANT) { vir_bytes newoffset; endpoint_t newep; int new_proc_nr; @@ -93,7 +89,6 @@ register message *m_ptr; /* pointer to request message */ kprintf("SYSTEM:do_umap: umap_local failed\n"); return EFAULT; } - CHECKRANGE_OR_SUSPEND(targetpr, lin_addr, count, 1); if(vm_lookup(targetpr, lin_addr, &phys_addr, NULL) != OK) { kprintf("SYSTEM:do_umap: vm_lookup failed\n"); return EFAULT; diff --git a/kernel/system/do_vm_setbuf.c b/kernel/system/do_vm_setbuf.c deleted file mode 100644 index 484c43559..000000000 --- a/kernel/system/do_vm_setbuf.c +++ /dev/null @@ -1,29 +0,0 @@ -/* The system call implemented in this file: - * m_type: SYS_VM_SETBUF - * - * The parameters for this system call are: - * m4_l1: Start of the buffer - * m4_l2: Length of the buffer - * m4_l3: End of main memory - */ -#include "../system.h" - -#define VM_DEBUG 0 /* enable/ disable debug output */ - -/*===========================================================================* - * do_vm_setbuf * - *===========================================================================*/ -PUBLIC int do_vm_setbuf(m_ptr) -message *m_ptr; /* pointer to request message */ -{ - vm_base= m_ptr->m4_l1; - vm_size= m_ptr->m4_l2; - vm_mem_high= m_ptr->m4_l3; - -#if VM_DEBUG - kprintf("do_vm_setbuf: got 0x%x @ 0x%x for 0x%x\n", - vm_size, vm_base, vm_mem_high); -#endif - - return OK; -} diff --git a/kernel/system/do_vmctl.c b/kernel/system/do_vmctl.c index f522a20f8..03ed97c29 100644 --- a/kernel/system/do_vmctl.c +++ b/kernel/system/do_vmctl.c @@ -21,12 +21,10 @@ register message *m_ptr; /* pointer to request message */ { int proc_nr, i; endpoint_t ep = m_ptr->SVMCTL_WHO; - struct proc *p, *rp; + struct proc *p, *rp, *target; if(ep == SELF) { ep = m_ptr->m_source; } - vm_init(); - if(!isokendpt(ep, &proc_nr)) { kprintf("do_vmctl: unexpected endpoint %d from VM\n", ep); return EINVAL; @@ -42,14 +40,35 @@ register message *m_ptr; /* pointer to request message */ /* Send VM the information about the memory request. */ if(!(rp = vmrequest)) return ESRCH; - if(!RTS_ISSET(rp, VMREQUEST)) - minix_panic("do_vmctl: no VMREQUEST set", NO_NUM); + vmassert(RTS_ISSET(rp, VMREQUEST)); + +#if 0 + printf("kernel: vm request sent by: %s / %d about %d; 0x%lx-0x%lx, wr %d, stack: %s ", + rp->p_name, rp->p_endpoint, rp->p_vmrequest.who, + rp->p_vmrequest.start, + rp->p_vmrequest.start + rp->p_vmrequest.length, + rp->p_vmrequest.writeflag, rp->p_vmrequest.stacktrace); + printf("type %d\n", rp->p_vmrequest.type); +#endif + +#if DEBUG_VMASSERT + okendpt(rp->p_vmrequest.who, &proc_nr); + target = proc_addr(proc_nr); +#if 0 + if(!RTS_ISSET(target, VMREQTARGET)) { + printf("set stack: %s\n", rp->p_vmrequest.stacktrace); + minix_panic("VMREQTARGET not set for target", + NO_NUM); + } +#endif +#endif /* Reply with request fields. */ m_ptr->SVMCTL_MRG_ADDR = (char *) rp->p_vmrequest.start; m_ptr->SVMCTL_MRG_LEN = rp->p_vmrequest.length; m_ptr->SVMCTL_MRG_WRITE = rp->p_vmrequest.writeflag; m_ptr->SVMCTL_MRG_EP = rp->p_vmrequest.who; + m_ptr->SVMCTL_MRG_REQUESTOR = (void *) rp->p_endpoint; rp->p_vmrequest.vmresult = VMSUSPEND; /* Remove from request chain. */ @@ -57,46 +76,61 @@ register message *m_ptr; /* pointer to request message */ return OK; case VMCTL_MEMREQ_REPLY: - if(!(rp = p->p_vmrequest.requestor)) - minix_panic("do_vmctl: no requestor set", ep); - p->p_vmrequest.requestor = NULL; - if(!RTS_ISSET(rp, VMREQUEST)) - minix_panic("do_vmctl: no VMREQUEST set", ep); - if(rp->p_vmrequest.vmresult != VMSUSPEND) - minix_panic("do_vmctl: result not VMSUSPEND set", - rp->p_vmrequest.vmresult); - rp->p_vmrequest.vmresult = m_ptr->SVMCTL_VALUE; - if(rp->p_vmrequest.vmresult == VMSUSPEND) - minix_panic("VM returned VMSUSPEND?", NO_NUM); - if(rp->p_vmrequest.vmresult != OK) + vmassert(RTS_ISSET(p, VMREQUEST)); + vmassert(p->p_vmrequest.vmresult == VMSUSPEND); + okendpt(p->p_vmrequest.who, &proc_nr); + target = proc_addr(proc_nr); + p->p_vmrequest.vmresult = m_ptr->SVMCTL_VALUE; + vmassert(p->p_vmrequest.vmresult != VMSUSPEND); + if(p->p_vmrequest.vmresult != OK) kprintf("SYSTEM: VM replied %d to mem request\n", - rp->p_vmrequest.vmresult); + p->p_vmrequest.vmresult); - /* Put on restart chain. */ - rp->p_vmrequest.nextrestart = vmrestart; - vmrestart = rp; +#if 0 + printf("memreq reply: vm request sent by: %s / %d about %d; 0x%lx-0x%lx, wr %d, stack: %s ", + p->p_name, p->p_endpoint, p->p_vmrequest.who, + p->p_vmrequest.start, + p->p_vmrequest.start + p->p_vmrequest.length, + p->p_vmrequest.writeflag, p->p_vmrequest.stacktrace); + printf("type %d\n", p->p_vmrequest.type); + + vmassert(RTS_ISSET(target, VMREQTARGET)); + RTS_LOCK_UNSET(target, VMREQTARGET); +#endif + + if(p->p_vmrequest.type == VMSTYPE_KERNELCALL) { + /* Put on restart chain. */ + p->p_vmrequest.nextrestart = vmrestart; + vmrestart = p; + } else if(p->p_vmrequest.type == VMSTYPE_DELIVERMSG) { + vmassert(p->p_misc_flags & MF_DELIVERMSG); + vmassert(p == target); + vmassert(RTS_ISSET(p, VMREQUEST)); + RTS_LOCK_UNSET(p, VMREQUEST); + } else { #if DEBUG_VMASSERT - /* Sanity check. */ - if(rp->p_vmrequest.vmresult == OK) { - if(CHECKRANGE(p, - rp->p_vmrequest.start, - rp->p_vmrequest.length, - rp->p_vmrequest.writeflag) != OK) { -kprintf("SYSTEM: request %d:0x%lx-0x%lx, wrflag %d, failed\n", - rp->p_endpoint, - rp->p_vmrequest.start, rp->p_vmrequest.start + rp->p_vmrequest.length, - rp->p_vmrequest.writeflag); - - minix_panic("SYSTEM: fail but VM said OK", NO_NUM); - } - } + printf("suspended with stack: %s\n", + p->p_vmrequest.stacktrace); #endif + minix_panic("strange request type", + p->p_vmrequest.type); + } + return OK; -#if VM_KERN_NOPAGEZERO - case VMCTL_NOPAGEZERO: + case VMCTL_ENABLE_PAGING: + if(vm_running) + minix_panic("do_vmctl: paging already enabled", NO_NUM); + vm_init(p); + if(!vm_running) + minix_panic("do_vmctl: paging enabling failed", NO_NUM); + vmassert(p->p_delivermsg_lin == + umap_local(p, D, p->p_delivermsg_vir, sizeof(message))); + if(newmap(p, (struct mem_map *) m_ptr->SVMCTL_VALUE) != OK) + minix_panic("do_vmctl: newmap failed", NO_NUM); + FIXLINMSG(p); + vmassert(p->p_delivermsg_lin); return OK; -#endif } /* Try architecture-specific vmctls. */ diff --git a/kernel/system/do_vtimer.c b/kernel/system/do_vtimer.c index 8904ab60d..50a957ab7 100644 --- a/kernel/system/do_vtimer.c +++ b/kernel/system/do_vtimer.c @@ -46,10 +46,10 @@ message *m_ptr; /* pointer to request message */ * VT_VIRTUAL and VT_PROF multiple times below. */ if (m_ptr->VT_WHICH == VT_VIRTUAL) { - pt_flag = VIRT_TIMER; + pt_flag = MF_VIRT_TIMER; pt_left = &rp->p_virt_left; } else { /* VT_PROF */ - pt_flag = PROF_TIMER; + pt_flag = MF_PROF_TIMER; pt_left = &rp->p_prof_left; } @@ -101,15 +101,15 @@ struct proc *rp; /* pointer to the process */ */ /* Check if the virtual timer expired. If so, send a SIGVTALRM signal. */ - if ((rp->p_misc_flags & VIRT_TIMER) && rp->p_virt_left <= 0) { - rp->p_misc_flags &= ~VIRT_TIMER; + if ((rp->p_misc_flags & MF_VIRT_TIMER) && rp->p_virt_left <= 0) { + rp->p_misc_flags &= ~MF_VIRT_TIMER; rp->p_virt_left = 0; cause_sig(rp->p_nr, SIGVTALRM); } /* Check if the profile timer expired. If so, send a SIGPROF signal. */ - if ((rp->p_misc_flags & PROF_TIMER) && rp->p_prof_left <= 0) { - rp->p_misc_flags &= ~PROF_TIMER; + if ((rp->p_misc_flags & MF_PROF_TIMER) && rp->p_prof_left <= 0) { + rp->p_misc_flags &= ~MF_PROF_TIMER; rp->p_prof_left = 0; cause_sig(rp->p_nr, SIGPROF); } diff --git a/kernel/table.c b/kernel/table.c index 01a0c0edb..960801981 100755 --- a/kernel/table.c +++ b/kernel/table.c @@ -35,7 +35,7 @@ /* Define stack sizes for the kernel tasks included in the system image. */ #define NO_STACK 0 -#define SMALL_STACK (256 * sizeof(char *)) +#define SMALL_STACK (1024 * sizeof(char *)) #define IDL_S SMALL_STACK /* 3 intr, 3 temps, 4 db for Intel */ #define HRD_S NO_STACK /* dummy task, uses kernel stack */ #define TSK_S SMALL_STACK /* system and clock task */ @@ -48,6 +48,7 @@ PUBLIC char *t_stack[TOT_STACK_SPACE / sizeof(char *)]; #define IDL_F (SYS_PROC | PREEMPTIBLE | BILLABLE) /* idle task */ #define TSK_F (SYS_PROC) /* kernel tasks */ #define SRV_F (SYS_PROC | PREEMPTIBLE) /* system services */ +#define VM_F (SYS_PROC) /* vm */ #define USR_F (BILLABLE | PREEMPTIBLE | PROC_FULLVM) /* user processes */ #define SVM_F (SRV_F | PROC_FULLVM) /* servers with VM */ @@ -91,6 +92,7 @@ PRIVATE int ds_c[] = { SYS_ALL_CALLS }, vm_c[] = { SYS_ALL_CALLS }, drv_c[] = { DRV_C }, + usr_c[] = { SYS_SYSCTL }, tty_c[] = { DRV_C, SYS_PHYSCOPY, SYS_ABORT, SYS_IOPENABLE, SYS_READBIOS }, mem_c[] = { DRV_C, SYS_PHYSCOPY, SYS_PHYSVCOPY, SYS_IOPENABLE }; @@ -115,16 +117,16 @@ PUBLIC struct boot_image image[] = { {CLOCK,clock_task,TSK_F, 8, TASK_Q, TSK_S, TSK_T, 0, no_c,"clock" }, {SYSTEM, sys_task,TSK_F, 8, TASK_Q, TSK_S, TSK_T, 0, no_c,"system"}, {HARDWARE, 0,TSK_F, 8, TASK_Q, HRD_S, 0, 0, no_c,"kernel"}, -{PM_PROC_NR, 0,SVM_F, 32, 4, 0, SRV_T, SRV_M, c(pm_c),"pm" }, -{FS_PROC_NR, 0,SVM_F, 32, 5, 0, SRV_T, SRV_M, c(fs_c),"vfs" }, +{PM_PROC_NR, 0,SRV_F, 32, 4, 0, SRV_T, SRV_M, c(pm_c),"pm" }, +{FS_PROC_NR, 0,SRV_F, 32, 5, 0, SRV_T, SRV_M, c(fs_c),"vfs" }, {RS_PROC_NR, 0,SVM_F, 4, 4, 0, SRV_T, SYS_M, c(rs_c),"rs" }, {MEM_PROC_NR, 0,SVM_F, 4, 3, 0, SRV_T, SYS_M,c(mem_c),"memory"}, -{LOG_PROC_NR, 0,SVM_F, 4, 2, 0, SRV_T, SYS_M,c(drv_c),"log" }, +{LOG_PROC_NR, 0,SRV_F, 4, 2, 0, SRV_T, SYS_M,c(drv_c),"log" }, {TTY_PROC_NR, 0,SVM_F, 4, 1, 0, SRV_T, SYS_M,c(tty_c),"tty" }, {DS_PROC_NR, 0,SVM_F, 4, 4, 0, SRV_T, SYS_M, c(ds_c),"ds" }, {MFS_PROC_NR, 0,SVM_F, 32, 5, 0, SRV_T, SRV_M, c(fs_c),"mfs" }, -{VM_PROC_NR, 0,SRV_F, 32, 2, 0, SRV_T, SRV_M, c(vm_c),"vm" }, -{INIT_PROC_NR, 0,USR_F, 8, USER_Q, 0, USR_T, USR_M, no_c,"init" }, +{VM_PROC_NR, 0,VM_F, 32, 2, 0, SRV_T, SRV_M, c(vm_c),"vm" }, +{INIT_PROC_NR, 0,USR_F, 8, USER_Q, 0, USR_T, USR_M, c(usr_c),"init" }, }; /* Verify the size of the system image table at compile time. Also verify that @@ -137,5 +139,3 @@ PUBLIC struct boot_image image[] = { extern int dummy[(NR_BOOT_PROCS==sizeof(image)/ sizeof(struct boot_image))?1:-1]; extern int dummy[(BITCHUNK_BITS > NR_BOOT_PROCS - 1) ? 1 : -1]; - -PUBLIC endpoint_t ipc_stats_target= NONE; diff --git a/kernel/vm.h b/kernel/vm.h index 9e0b615da..6c3473f2b 100644 --- a/kernel/vm.h +++ b/kernel/vm.h @@ -2,18 +2,19 @@ #ifndef _VM_H #define _VM_H 1 -#define CHECKRANGE_OR_SUSPEND(pr, start, length, wr) { int mr; \ - if(vm_running && (mr=vm_checkrange(proc_addr(who_p), pr, start, length, wr, 0)) != OK) { \ - return mr; \ - } } +/* Pseudo error codes */ +#define VMSUSPEND -996 +#define EFAULT_SRC -995 +#define EFAULT_DST -994 -#define CHECKRANGE(pr, start, length, wr) \ - vm_checkrange(proc_addr(who_p), pr, start, length, wr, 1) +#define FIXLINMSG(prp) { prp->p_delivermsg_lin = umap_local(prp, D, prp->p_delivermsg_vir, sizeof(message)); } -/* Pseudo error code indicating a process request has to be - * restarted after an OK from VM. - */ -#define VMSUSPEND -996 +#define PHYS_COPY_CATCH(src, dst, size, a) { \ + vmassert(intr_disabled()); \ + catch_pagefaults++; \ + a = phys_copy(src, dst, size); \ + catch_pagefaults--; \ + } #endif