From ebbce7507b6f317e3069bef69e28e52e04c9ba13 Mon Sep 17 00:00:00 2001 From: Tomas Hruby Date: Fri, 6 Nov 2009 09:08:26 +0000 Subject: [PATCH] Complete ovehaul of mode switching code - after a trap to kernel, the code automatically switches to kernel stack, in the future local to the CPU - k_reenter variable replaced by a test whether the CS is kernel cs or not. The information is passed further if needed. Removes a global variable which would need to be cpu local - no need for global variables describing the exception or trap context. This information is kept on stack and a pointer to this structure is passed to the C code as a single structure - removed loadedcr3 variable and its use replaced by reading the %cr3 register - no need to redisable interrupts in restart() as they are already disabled. - unified handling of traps that push and don't push errorcode - removed save() function as the process context is not saved directly to process table but saved as required by the trap code. Essentially it means that save() code is inlined everywhere not only in the exception handling routine - returning from syscall is more arch independent - it sets the retger in C - top of the x86 stack contains the current CPU id and pointer to the currently scheduled process (the one right interrupted) so the mode switch code can find where to save the context without need to use proc_ptr which will be cpu local in the future and therefore difficult to access in assembler and expensive to access in general - some more clean up of level0 code. No need to read-back the argument passed in %eax from the proc structure. The mode switch code does not clobber %the general registers and hence we can just call what is in %eax - many assebly macros in sconst.h as they will be reused by the apic assembly --- kernel/arch/i386/exception.c | 88 ++++---- kernel/arch/i386/klib386.S | 36 +++- kernel/arch/i386/memory.c | 1 - kernel/arch/i386/mpx386.S | 389 ++++++++++++++++------------------- kernel/arch/i386/protect.c | 41 +--- kernel/arch/i386/proto.h | 57 ++++- kernel/arch/i386/sconst.h | 106 ++++++++++ kernel/arch/i386/system.c | 27 +++ kernel/glo.h | 1 - kernel/proc.c | 7 +- kernel/proto.h | 4 +- 11 files changed, 442 insertions(+), 315 deletions(-) diff --git a/kernel/arch/i386/exception.c b/kernel/arch/i386/exception.c index 79ab15860..5f6dd1071 100755 --- a/kernel/arch/i386/exception.c +++ b/kernel/arch/i386/exception.c @@ -16,21 +16,21 @@ extern int vm_copy_in_progress, catch_pagefaults; extern struct proc *vm_copy_from, *vm_copy_to; -void pagefault(vir_bytes old_eip, struct proc *pr, int trap_errno, - u32_t *old_eipptr, u32_t *old_eaxptr, u32_t pagefaultcr2) +void pagefault( struct proc *pr, + struct exception_frame * frame, + int is_nested) { int s; vir_bytes ph; u32_t pte; int procok = 0, pcok = 0, rangeok = 0; int in_physcopy = 0; - vir_bytes test_eip; - vmassert(old_eipptr); - vmassert(old_eaxptr); + reg_t pagefaultcr2; - vmassert(*old_eipptr == old_eip); - vmassert(old_eipptr != &old_eip); + vmassert(frame); + + pagefaultcr2 = read_cr2(); #if 0 printf("kernel: pagefault in pr %d, addr 0x%lx, his cr3 0x%lx, actual cr3 0x%lx\n", @@ -41,18 +41,21 @@ void pagefault(vir_bytes old_eip, struct proc *pr, int trap_errno, vmassert(pr->p_seg.p_cr3 == read_cr3()); } - test_eip = k_reenter ? old_eip : pr->p_reg.pc; - - in_physcopy = (test_eip > (vir_bytes) phys_copy) && - (test_eip < (vir_bytes) phys_copy_fault); + in_physcopy = (frame->eip > (vir_bytes) phys_copy) && + (frame->eip < (vir_bytes) phys_copy_fault); - if((k_reenter || iskernelp(pr)) && + if((is_nested || iskernelp(pr)) && catch_pagefaults && in_physcopy) { #if 0 printf("pf caught! addr 0x%lx\n", pagefaultcr2); #endif - *old_eipptr = (u32_t) phys_copy_fault; - *old_eaxptr = pagefaultcr2; + if (is_nested) { + frame->eip = (reg_t) phys_copy_fault_in_kernel; + } + else { + pr->p_reg.pc = (reg_t) phys_copy_fault; + pr->p_reg.retreg = pagefaultcr2; + } return; } @@ -61,19 +64,19 @@ void pagefault(vir_bytes old_eip, struct proc *pr, int trap_errno, * have page faults. VM does have its own page table but also * can't have page faults (because VM has to handle them). */ - if(k_reenter || (pr->p_endpoint <= INIT_PROC_NR && + if(is_nested || (pr->p_endpoint <= INIT_PROC_NR && !(pr->p_misc_flags & MF_FULLVM)) || pr->p_endpoint == VM_PROC_NR) { /* Page fault we can't / don't want to * handle. */ - kprintf("pagefault for process %d ('%s'), pc = 0x%x, addr = 0x%x, flags = 0x%x, k_reenter %d\n", + kprintf("pagefault for process %d ('%s'), pc = 0x%x, addr = 0x%x, flags = 0x%x, is_nested %d\n", pr->p_endpoint, pr->p_name, pr->p_reg.pc, - pagefaultcr2, trap_errno, k_reenter); + pagefaultcr2, frame->errcode, is_nested); proc_stacktrace(pr); if(pr->p_endpoint != SYSTEM) { proc_stacktrace(proc_addr(SYSTEM)); } - kprintf("pc of pagefault: 0x%lx\n", test_eip); + kprintf("pc of pagefault: 0x%lx\n", frame->eip); minix_panic("page fault in system process", pr->p_endpoint); return; @@ -90,7 +93,7 @@ void pagefault(vir_bytes old_eip, struct proc *pr, int trap_errno, * handled. */ pr->p_pagefault.pf_virtual = pagefaultcr2; - pr->p_pagefault.pf_flags = trap_errno; + pr->p_pagefault.pf_flags = frame->errcode; pr->p_nextpagefault = pagefaults; pagefaults = pr; @@ -102,16 +105,7 @@ void pagefault(vir_bytes old_eip, struct proc *pr, int trap_errno, /*===========================================================================* * exception * *===========================================================================*/ -PUBLIC void exception(vec_nr, trap_errno, old_eip, old_cs, old_eflags, - old_eipptr, old_eaxptr, pagefaultcr2) -unsigned vec_nr; -u32_t trap_errno; -u32_t old_eip; -U16_t old_cs; -u32_t old_eflags; -u32_t *old_eipptr; -u32_t *old_eaxptr; -u32_t pagefaultcr2; +PUBLIC void exception_handler(int is_nested, struct exception_frame * frame) { /* An exception or unexpected interrupt has occurred. */ @@ -144,41 +138,35 @@ struct proc *t; register struct ex_s *ep; struct proc *saved_proc; - if(k_reenter > 2) { - /* This can't end well. */ - minix_panic("exception: k_reenter too high", k_reenter); - } - /* Save proc_ptr, because it may be changed by debug statements. */ saved_proc = proc_ptr; - ep = &ex_data[vec_nr]; + ep = &ex_data[frame->vector]; - if (vec_nr == 2) { /* spurious NMI on some machines */ + if (frame->vector == 2) { /* spurious NMI on some machines */ kprintf("got spurious NMI\n"); return; } - if(vec_nr == PAGE_FAULT_VECTOR) { - pagefault(old_eip, saved_proc, trap_errno, - old_eipptr, old_eaxptr, pagefaultcr2); + if(frame->vector == PAGE_FAULT_VECTOR) { + pagefault(saved_proc, frame, is_nested); return; } - /* If an exception occurs while running a process, the k_reenter variable - * will be zero. Exceptions in interrupt handlers or system traps will make - * k_reenter larger than zero. + /* If an exception occurs while running a process, the is_nested variable + * will be zero. Exceptions in interrupt handlers or system traps will make + * is_nested non-zero. */ - if (k_reenter == 0 && ! iskernelp(saved_proc)) { + if (is_nested == 0 && ! iskernelp(saved_proc)) { #if 0 { kprintf( "vec_nr= %d, trap_errno= 0x%lx, eip= 0x%lx, cs= 0x%x, eflags= 0x%lx\n", - vec_nr, (unsigned long)trap_errno, - (unsigned long)old_eip, old_cs, - (unsigned long)old_eflags); - printseg("cs: ", 1, saved_proc, old_cs); + frame->vector, (unsigned long)frame->errcode, + (unsigned long)frame->eip, frame->cs, + (unsigned long)frame->eflags); + printseg("cs: ", 1, saved_proc, frame->cs); printseg("ds: ", 0, saved_proc, saved_proc->p_reg.ds); if(saved_proc->p_reg.ds != saved_proc->p_reg.ss) { printseg("ss: ", 0, saved_proc, saved_proc->p_reg.ss); @@ -193,13 +181,13 @@ struct proc *t; /* Exception in system code. This is not supposed to happen. */ if (ep->msg == NIL_PTR || machine.processor < ep->minprocessor) - kprintf("\nIntel-reserved exception %d\n", vec_nr); + kprintf("\nIntel-reserved exception %d\n", frame->vector); else kprintf("\n%s\n", ep->msg); - kprintf("k_reenter = %d ", k_reenter); + kprintf("is_nested = %d ", is_nested); kprintf("vec_nr= %d, trap_errno= 0x%x, eip= 0x%x, cs= 0x%x, eflags= 0x%x\n", - vec_nr, trap_errno, old_eip, old_cs, old_eflags); + frame->vector, frame->errcode, frame->eip, frame->cs, frame->eflags); /* TODO should we enable this only when compiled for some debug mode? */ if (saved_proc) { kprintf("scheduled was: process %d (%s), ", proc_nr(saved_proc), saved_proc->p_name); diff --git a/kernel/arch/i386/klib386.S b/kernel/arch/i386/klib386.S index d4c226631..9a0eb5295 100644 --- a/kernel/arch/i386/klib386.S +++ b/kernel/arch/i386/klib386.S @@ -26,6 +26,7 @@ .globl phys_outsb /* likewise byte by byte */ .globl phys_copy /* copy data from anywhere to anywhere in memory */ .globl phys_copy_fault /* phys_copy pagefault */ +.globl phys_copy_fault_in_kernel /* phys_copy pagefault in kernel */ .globl phys_memset /* write pattern anywhere in memory */ .globl mem_rdw /* copy one word from [segment:offset] */ .globl reset /* reset the system */ @@ -33,6 +34,7 @@ .globl level0 /* call a function at level 0 */ .globl read_cpu_flags /* read the cpu flags */ .globl read_cr0 /* read cr0 */ +.globl read_cr2 /* read cr2 */ .globl getcr3val .globl write_cr0 /* write a value in cr0 */ .globl read_cr4 @@ -343,6 +345,13 @@ phys_copy_fault: /* kernel can send us here */ pop %esi ret +phys_copy_fault_in_kernel: /* kernel can send us here */ + pop %es + pop %edi + pop %esi + mov %cr2, %eax + ret + /*===========================================================================*/ /* phys_memset */ /*===========================================================================*/ @@ -436,7 +445,7 @@ idle_task: */ push $halt call level0 /* level0(halt) */ - pop %eax + add $4, %esp jmp idle_task halt: sti @@ -453,18 +462,23 @@ halt: * things that are only possible at the most privileged CPU level. */ level0: - mov 4(%esp), %eax - cmpb $-1, k_reenter + /* check whether we are already running in kernel, the kernel cs + * selector has 3 lower bits zeroed */ + mov %cs, %ax + cmpw $CS_SELECTOR, %ax jne 0f - int $LEVEL0_VECTOR - ret -0: + /* call the function directly as if it was a normal function call */ + mov 4(%esp), %eax call *%eax ret - + /* if not runnig in the kernel yet, trap to kernel */ +0: + mov 4(%esp), %eax + int $LEVEL0_VECTOR + ret /*===========================================================================*/ /* read_flags */ @@ -521,6 +535,14 @@ write_cr0: pop %ebp ret +/*===========================================================================*/ +/* read_cr2 */ +/*===========================================================================*/ +/* PUBLIC reg_t read_cr2(void); */ +read_cr2: + mov %cr2, %eax + ret + /*===========================================================================*/ /* read_cr4 */ /*===========================================================================*/ diff --git a/kernel/arch/i386/memory.c b/kernel/arch/i386/memory.c index a875a590b..ccb3f9d1e 100644 --- a/kernel/arch/i386/memory.c +++ b/kernel/arch/i386/memory.c @@ -922,7 +922,6 @@ int vmcheck; /* if nonzero, can return VMSUSPEND */ target->p_endpoint, target->p_name); #endif - vmassert(k_reenter == -1); vmassert(proc_ptr->p_endpoint == SYSTEM); vm_suspend(caller, target, lin, bytes, wr, VMSTYPE_KERNELCALL); diff --git a/kernel/arch/i386/mpx386.S b/kernel/arch/i386/mpx386.S index 8536bd813..83c73d76e 100644 --- a/kernel/arch/i386/mpx386.S +++ b/kernel/arch/i386/mpx386.S @@ -1,4 +1,4 @@ -/* +/* * This file, mpx386.s, is included by mpx.s when Minix is compiled for * 32-bit Intel CPUs. The alternative mpx88.s is compiled for 16-bit CPUs. * @@ -82,14 +82,9 @@ begbss: */ .globl restart -.globl save .globl reload_cr3 .globl write_cr3 -.globl errexception -.globl exception1 -.globl exception - .globl divide_error .globl single_step_exception .globl nmi @@ -129,8 +124,6 @@ begbss: .globl hwint14 .globl hwint15 -.globl s_call -.globl p_s_call .globl level0_call /* Exported variables. */ @@ -200,7 +193,7 @@ copygdt: mov %ax, %fs mov %ax, %gs mov %ax, %ss - mov $k_stktop, %esp /* set sp to point to the top of kernel stack */ + mov $k_boot_stktop, %esp /* set sp to point to the top of kernel stack */ /* Save boot parameters into these global variables for i386 code */ movl %edx, params_size @@ -242,18 +235,33 @@ csinit: /* interrupt handlers for 386 32-bit protected mode */ /*===========================================================================*/ +#define PIC_IRQ_HANDLER(irq) \ + push $irq ;\ + call irq_handle /* intr_handle(irq_handlers[irq]) */ ;\ + add $4, %esp ; + /*===========================================================================*/ /* hwint00 - 07 */ /*===========================================================================*/ /* Note this is a macro, it just looks like a subroutine. */ -#define hwint_master(irq) \ - call save /* save interrupted process state */;\ - push $irq ;\ - call irq_handle /* irq_handle(irq) */;\ - pop %ecx ;\ - movb $END_OF_INT, %al ;\ - outb $INT_CTL /* reenable master 8259 */;\ - ret /* restart (another) process */ + +#define hwint_master(irq) \ + TEST_INT_IN_KERNEL(4, 0f) ;\ + \ + SAVE_PROCESS_CTX(0) ;\ + movl $0, %ebp /* for stack trace */ ;\ + PIC_IRQ_HANDLER(irq) ;\ + movb $END_OF_INT, %al ;\ + outb $INT_CTL /* reenable interrupts in master pic */ ;\ + jmp restart ;\ + \ +0: \ + pusha ;\ + PIC_IRQ_HANDLER(irq) ;\ + movb $END_OF_INT, %al ;\ + outb $INT_CTL /* reenable interrupts in master pic */ ;\ + popa ;\ + iret ; /* Each of these entry points is an expansion of the hwint_master macro */ .balign 16 @@ -301,14 +309,24 @@ hwint07: /*===========================================================================*/ /* Note this is a macro, it just looks like a subroutine. */ #define hwint_slave(irq) \ - call save /* save interrupted process state */;\ - push $irq ;\ - call irq_handle /* irq_handle(irq) */;\ - pop %ecx ;\ - movb $END_OF_INT, %al ;\ - outb $INT_CTL /* reenable master 8259 */;\ - outb $INT2_CTL /* reenable slave 8259 */;\ - ret /* restart (another) process */ + TEST_INT_IN_KERNEL(4, 0f) ;\ + \ + SAVE_PROCESS_CTX(0) ;\ + movl $0, %ebp /* for stack trace */ ;\ + PIC_IRQ_HANDLER(irq) ;\ + movb $END_OF_INT, %al ;\ + outb $INT_CTL /* reenable interrupts in master pic */ ;\ + outb $INT2_CTL /* reenable slave 8259 */ ;\ + jmp restart ;\ + \ +0: \ + pusha ;\ + PIC_IRQ_HANDLER(irq) ;\ + movb $END_OF_INT, %al ;\ + outb $INT_CTL /* reenable interrupts in master pic */ ;\ + outb $INT2_CTL /* reenable slave 8259 */ ;\ + popa ;\ + iret ; /* Each of these entry points is an expansion of the hwint_slave macro */ .balign 16 @@ -351,229 +369,185 @@ hwint15: /* Interrupt routine for irq 15 */ hwint_slave(15) -/*===========================================================================*/ -/* save */ -/*===========================================================================*/ /* - * Save for protected mode. - * This is much simpler than for 8086 mode, because the stack already points - * into the process table, or has already been switched to the kernel stack. + * syscall is only from a process to kernel */ +.align 16 +.globl syscall_entry +syscall_entry: -.balign 16 -save: - cld /* set direction flag to a known value */ - pushal /* save "general" registers */ - pushw %ds /* save ds */ - pushw %es /* save es */ - pushw %fs /* save fs */ - pushw %gs /* save gs */ - mov %ss, %dx /* ss is kernel data segment */ - mov %dx, %ds /* load rest of kernel segments */ - mov %dx, %es /* kernel does not use fs, gs */ - mov %esp, %eax /* prepare to return */ - incb k_reenter /* from -1 if not reentering */ - jne set_restart1 /* stack is already kernel stack */ - mov $k_stktop, %esp - push $restart /* build return address for int handler */ - xor %ebp, %ebp /* for stacktrace */ - jmp *RETADR-P_STACKBASE(%eax) - -.balign 4 -set_restart1: - push $restart1 - jmp *RETADR-P_STACKBASE(%eax) + SAVE_PROCESS_CTX(0) -/*===========================================================================*/ -/* _s_call */ -/*===========================================================================*/ -.balign 16 -s_call: -p_s_call: - cld /* set direction flag to a known value */ - sub $4, %esp /* skip RETADR */ - pusha /* save "general" registers */ - pushw %ds - pushw %es - pushw %fs - pushw %gs - - mov %ss, %si /* ss is kernel data segment */ - mov %si, %ds /* load rest of kernel segments */ - mov %si, %es /* kernel does not use fs, gs */ - incb k_reenter /* increment kernel entry count */ - mov %esp, %esi /* assumes P_STACKBASE == 0 */ - mov $k_stktop, %esp - xor %ebp, %ebp /* for stacktrace */ -/* end of inline save */ -/* now set up parameters for sys_call() */ - push %edx /* event set or flags bit map */ - push %ebx /* pointer to user message */ - push %eax /* source / destination */ - push %ecx /* call number (ipc primitive to use) */ - - call sys_call /* sys_call(call_nr, src_dst, m_ptr, bit_map) */ -/* caller is now explicitly in proc_ptr */ - mov %eax, AXREG(%esi) - -/* Fall into code to restart proc/task running. */ + /* save the pointer to the current process */ + push %ebp + + /* + * pass the syscall arguments from userspace to the handler. + * SAVE_PROCESS_CTX() does not clobber these registers, they are still + * set as the userspace have set them + */ + push %edx + push %ebx + push %eax + push %ecx + + /* for stack trace */ + movl $0, %ebp + + call sys_call + + /* restore the current process pointer and save the return value */ + add $4 * 4, %esp + pop %esi + mov %eax, AXREG(%esi) + + jmp restart + + +.align 16 +/* + * called by the exception interrupt vectors. If the exception does not push + * errorcode, we assume that the vector handler pushed 0 instead. Next pushed + * thing is the vector number. From this point on we can continue as if every + * exception pushes an error code + */ +exception_entry: + /* + * check if it is a nested trap by comparing the saved code segment + * descriptor with the kernel CS first + */ + TEST_INT_IN_KERNEL(12, exception_entry_nested) + +exception_entry_from_user: + + cld + + SAVE_PROCESS_CTX(8) + + /* for stack trace clear %ebp */ + movl $0, %ebp + + /* + * push a pointer to the interrupt state pushed by the cpu and the + * vector number pushed by the vector handler just before calling + * exception_entry and call the exception handler. + */ + push %esp + push $0 /* it's not a nested exception */ + call exception_handler + + jmp restart + +exception_entry_nested: + + pusha + mov %esp, %eax + add $(8 * 4), %eax + push %eax + pushl $1 /* it's a nested exception */ + call exception_handler + add $8, %esp + popa + + /* clear the error code and the exception number */ + add $8, %esp + /* resume execution at the point of exception */ + iret /*===========================================================================*/ /* restart */ /*===========================================================================*/ restart: + call schedcheck -/* Restart the current process or the next process if it is set. */ + /* %eax is set by schedcheck() to the process to run */ + mov %eax, %esp /* will assume P_STACKBASE == 0 */ - cli - call schedcheck - movl proc_ptr, %esp /* will assume P_STACKBASE == 0 */ lldt P_LDT_SEL(%esp) /* enable process' segment descriptors */ cmpl $0, P_CR3(%esp) jz 0f + + /* + * test if the cr3 is loaded with the current value to avoid unnecessary + * TLB flushes + */ mov P_CR3(%esp), %eax - cmpl loadedcr3, %eax + mov %cr3, %ecx + cmp %ecx, %eax jz 0f mov %eax, %cr3 - mov %eax, loadedcr3 - mov proc_ptr, %eax - mov %eax, ptproc + mov %esp, ptproc movl $0, dirtypde 0: - lea P_STACKTOP(%esp), %eax /* arrange for next interrupt */ - movl %eax, tss+TSS3_S_SP0 /* to save state in process table */ -restart1: - decb k_reenter + popw %gs popw %fs popw %es popw %ds popal - add $4, %esp /* skip return adr */ + add $4, %esp /* skip return adr FIXME unused value */ iret /* continue process */ /*===========================================================================*/ /* exception handlers */ /*===========================================================================*/ + +#define EXCEPTION_ERR_CODE(vector) \ + push $vector ;\ + jmp exception_entry + +#define EXCEPTION_NO_ERR_CODE(vector) \ + pushl $0 ;\ + EXCEPTION_ERR_CODE(vector) + divide_error: - push $DIVIDE_VECTOR - jmp handle_exception + EXCEPTION_NO_ERR_CODE(DIVIDE_VECTOR) single_step_exception: - push $DEBUG_VECTOR - jmp handle_exception + EXCEPTION_NO_ERR_CODE(DEBUG_VECTOR) nmi: - push $NMI_VECTOR - jmp handle_exception + EXCEPTION_NO_ERR_CODE(NMI_VECTOR) breakpoint_exception: - push $BREAKPOINT_VECTOR - jmp handle_exception + EXCEPTION_NO_ERR_CODE(BREAKPOINT_VECTOR) overflow: - push $OVERFLOW_VECTOR - jmp handle_exception + EXCEPTION_NO_ERR_CODE(OVERFLOW_VECTOR) bounds_check: - push $BOUNDS_VECTOR - jmp handle_exception + EXCEPTION_NO_ERR_CODE(BOUNDS_VECTOR) inval_opcode: - push $INVAL_OP_VECTOR - jmp handle_exception + EXCEPTION_NO_ERR_CODE(INVAL_OP_VECTOR) copr_not_available: - push $COPROC_NOT_VECTOR - jmp handle_exception + EXCEPTION_NO_ERR_CODE(COPROC_NOT_VECTOR) double_fault: - push $DOUBLE_FAULT_VECTOR - jmp errexception + EXCEPTION_ERR_CODE(DOUBLE_FAULT_VECTOR) copr_seg_overrun: - push $COPROC_SEG_VECTOR - jmp handle_exception + EXCEPTION_NO_ERR_CODE(COPROC_SEG_VECTOR) inval_tss: - push $INVAL_TSS_VECTOR - jmp errexception + EXCEPTION_ERR_CODE(INVAL_TSS_VECTOR) segment_not_present: - push $SEG_NOT_VECTOR - jmp errexception + EXCEPTION_ERR_CODE(SEG_NOT_VECTOR) stack_exception: - push $STACK_FAULT_VECTOR - jmp errexception + EXCEPTION_ERR_CODE(STACK_FAULT_VECTOR) general_protection: - push $PROTECTION_VECTOR - jmp errexception + EXCEPTION_ERR_CODE(PROTECTION_VECTOR) page_fault: - push $PAGE_FAULT_VECTOR - push %eax - mov %cr2, %eax - movl %eax, %ss:pagefaultcr2 - pop %eax - jmp errexception + EXCEPTION_ERR_CODE(PAGE_FAULT_VECTOR) copr_error: - push $COPROC_ERR_VECTOR - jmp handle_exception + EXCEPTION_NO_ERR_CODE(COPROC_ERR_VECTOR) -/*===========================================================================*/ -/* handle_exception */ -/*===========================================================================*/ -/* This is called for all exceptions which do not push an error code. */ - -.balign 16 -handle_exception: - movl $0, %ss:trap_errno /* clear trap_errno */ - pop %ss:ex_number - jmp exception1 - -/*===========================================================================*/ -/* errexception */ -/*===========================================================================*/ -/* This is called for all exceptions which push an error code. */ - -.balign 16 -errexception: - pop %ss:ex_number - pop %ss:trap_errno -exception1: -/* Common for all exceptions. */ - movl %esp, %ss:old_eax_ptr /* where will eax be saved */ - subl $PCREG-AXREG, %ss:old_eax_ptr /* here */ - - push %eax /* eax is scratch register */ - - mov 0+4(%esp), %eax /* old eip */ - movl %eax, %ss:old_eip - mov %esp, %eax - add $4, %eax - mov %eax, %ss:old_eip_ptr - movzwl 4+4(%esp), %eax /* old cs */ - movl %eax, %ss:old_cs - mov 8+4(%esp), %eax /* old eflags */ - movl %eax, %ss:old_eflags - - pop %eax - call save - push pagefaultcr2 - push old_eax_ptr - push old_eip_ptr - push old_eflags - push old_cs - push old_eip - push trap_errno - push ex_number - call exception /* (ex_number, trap_errno, old_eip, */ - /* old_cs, old_eflags) */ - add $8*4, %esp - ret /*===========================================================================*/ /* write_cr3 */ @@ -583,10 +557,10 @@ write_cr3: push %ebp mov %esp, %ebp mov 8(%ebp), %eax - cmpl loadedcr3, %eax + mov %cr3, %ecx + cmp %ecx, %eax jz 0f mov %eax, %cr3 - mov %eax, loadedcr3 movl $0, dirtypde 0: pop %ebp @@ -597,13 +571,17 @@ write_cr3: /*===========================================================================*/ level0_call: /* - * which level0 function to call was passed here by putting it in eax, so - * we get that from the saved state. + * which level0 function to call was passed here by putting it in %eax */ - call save - movl proc_ptr, %eax - movl AXREG(%eax), %eax - jmp *%eax + SAVE_PROCESS_CTX(0) + /* for stack trace */ + movl $0, %ebp + /* + * the function to call is in %eax, set in userspace. SAVE_PROCESS_CTX() + * does not clobber this register so we can use it straightaway + */ + call *%eax + jmp restart /*===========================================================================*/ @@ -631,16 +609,11 @@ reload_cr3: .short 0x526F /* this must be the first data entry (magic #) */ .bss -k_stack: -.space K_STACK_BYTES /* kernel stack */ -k_stktop: -/* top of kernel stack */ -.lcomm ex_number, 4 -.lcomm trap_errno, 4 -.lcomm old_eip_ptr, 4 -.lcomm old_eax_ptr, 4 -.lcomm old_eip, 4 -.lcomm old_cs, 4 -.lcomm old_eflags, 4 -.lcomm pagefaultcr2, 4 -.lcomm loadedcr3, 4 +/* + * this stack is used temporarily for booting only. We switch to a proper kernel + * stack after the first trap to kernel + */ +.globl k_boot_stktop +k_boot_stack: +.space 4096 /* kernel stack */ /* FIXME use macro here */ +k_boot_stktop: /* top of kernel stack */ diff --git a/kernel/arch/i386/protect.c b/kernel/arch/i386/protect.c index c114baac2..3660ff52c 100755 --- a/kernel/arch/i386/protect.c +++ b/kernel/arch/i386/protect.c @@ -25,37 +25,6 @@ struct gatedesc_s { u16_t offset_high; }; -struct tss_s { - reg_t backlink; - reg_t sp0; /* stack pointer to use during interrupt */ - reg_t ss0; /* " segment " " " " */ - reg_t sp1; - reg_t ss1; - reg_t sp2; - reg_t ss2; - reg_t cr3; - reg_t ip; - reg_t flags; - reg_t ax; - reg_t cx; - reg_t dx; - reg_t bx; - reg_t sp; - reg_t bp; - reg_t si; - reg_t di; - reg_t es; - reg_t cs; - reg_t ss; - reg_t ds; - reg_t fs; - reg_t gs; - reg_t ldt; - u16_t trap; - u16_t iobase; -/* u8_t iomap[0]; */ -}; - PUBLIC struct segdesc_s gdt[GDT_SIZE]; /* used in klib.s and mpx.s */ PRIVATE struct gatedesc_s idt[IDT_SIZE]; /* zero-init so none present */ PUBLIC struct tss_s tss; /* zero init */ @@ -204,13 +173,7 @@ PUBLIC void prot_init(void) rp->p_seg.p_ldt_sel = ldt_index * DESC_SIZE; } - /* Build main TSS. - * This is used only to record the stack pointer to be used after an - * interrupt. - * The pointer is set up so that an interrupt automatically saves the - * current process's registers ip:cs:f:sp:ss in the correct slots in the - * process table. - */ + /* Build main TSS */ tss.ss0 = DS_SELECTOR; init_dataseg(&gdt[TSS_INDEX], vir2phys(&tss), sizeof(tss), INTR_PRIVILEGE); gdt[TSS_INDEX].access = PRESENT | (INTR_PRIVILEGE << DPL_SHIFT) | TSS_TYPE; @@ -249,7 +212,7 @@ PUBLIC void idt_init(void) { general_protection, PROTECTION_VECTOR, INTR_PRIVILEGE }, { page_fault, PAGE_FAULT_VECTOR, INTR_PRIVILEGE }, { copr_error, COPROC_ERR_VECTOR, INTR_PRIVILEGE }, - { s_call, SYS386_VECTOR, USER_PRIVILEGE },/* 386 system call */ + { syscall_entry, SYS386_VECTOR, USER_PRIVILEGE },/* 386 system call */ { level0_call, LEVEL0_VECTOR, TASK_PRIVILEGE }, { NULL, 0, 0} }; diff --git a/kernel/arch/i386/proto.h b/kernel/arch/i386/proto.h index 3e641523d..24cc63f95 100644 --- a/kernel/arch/i386/proto.h +++ b/kernel/arch/i386/proto.h @@ -20,7 +20,6 @@ _PROTOTYPE( void hwint13, (void) ); _PROTOTYPE( void hwint14, (void) ); _PROTOTYPE( void hwint15, (void) ); - /* Exception handlers (real or protected mode), in numerical order. */ void _PROTOTYPE( int00, (void) ), _PROTOTYPE( divide_error, (void) ); void _PROTOTYPE( int01, (void) ), _PROTOTYPE( single_step_exception, (void) ); @@ -41,7 +40,7 @@ void _PROTOTYPE( copr_error, (void) ); /* Software interrupt handlers, in numerical order. */ _PROTOTYPE( void trp, (void) ); -_PROTOTYPE( void s_call, (void) ), _PROTOTYPE( p_s_call, (void) ); +_PROTOTYPE( void syscall_entry, (void) ); _PROTOTYPE( void level0_call, (void) ); /* memory.c */ @@ -51,16 +50,25 @@ _PROTOTYPE( void vm_set_cr3, (struct proc *)); /* exception.c */ -_PROTOTYPE( void exception, (unsigned vec_nr, u32_t trap_errno, - u32_t old_eip, U16_t old_cs, u32_t old_eflags, - u32_t *old_eip_ptr, u32_t *old_eax_ptr, u32_t pagefaultcr2) ); +struct exception_frame { + reg_t vector; /* which interrupt vector was triggered */ + reg_t errcode; /* zero if no exception does not push err code */ + reg_t eip; + reg_t cs; + reg_t eflags; + reg_t esp; /* undefined if trap is nested */ + reg_t ss; /* undefined if trap is nested */ +}; + +_PROTOTYPE( void exception, (struct exception_frame * frame)); /* klib386.s */ _PROTOTYPE( void level0, (void (*func)(void)) ); _PROTOTYPE( void monitor, (void) ); _PROTOTYPE( void reset, (void) ); _PROTOTYPE( void int86, (void) ); -_PROTOTYPE( unsigned long read_cr0, (void) ); +_PROTOTYPE( reg_t read_cr0, (void) ); +_PROTOTYPE( reg_t read_cr2, (void) ); _PROTOTYPE( void write_cr0, (unsigned long value) ); _PROTOTYPE( unsigned long read_cr4, (void) ); _PROTOTYPE( void write_cr4, (unsigned long value) ); @@ -75,6 +83,39 @@ _PROTOTYPE( void reload_cr3, (void) ); _PROTOTYPE( void phys_memset, (phys_bytes ph, u32_t c, phys_bytes bytes) ); /* protect.c */ +struct tss_s { + reg_t backlink; + reg_t sp0; /* stack pointer to use during interrupt */ + reg_t ss0; /* " segment " " " " */ + reg_t sp1; + reg_t ss1; + reg_t sp2; + reg_t ss2; + reg_t cr3; + reg_t ip; + reg_t flags; + reg_t ax; + reg_t cx; + reg_t dx; + reg_t bx; + reg_t sp; + reg_t bp; + reg_t si; + reg_t di; + reg_t es; + reg_t cs; + reg_t ss; + reg_t ds; + reg_t fs; + reg_t gs; + reg_t ldt; + u16_t trap; + u16_t iobase; +/* u8_t iomap[0]; */ +}; + +EXTERN struct tss_s tss; + _PROTOTYPE( void prot_init, (void) ); _PROTOTYPE( void idt_init, (void) ); _PROTOTYPE( void init_codeseg, (struct segdesc_s *segdp, phys_bytes base, @@ -100,6 +141,10 @@ EXTERN struct gate_table_s gate_table_pic[]; /* copies an array of vectors to the IDT. The last vector must be zero filled */ _PROTOTYPE(void idt_copy_vectors, (struct gate_table_s * first)); +EXTERN void * k_boot_stktop; +_PROTOTYPE(void tss_init, (struct tss_s * tss, void * kernel_stack, unsigned cpu)); + + /* functions defined in architecture-independent kernel source. */ #include "../../proto.h" diff --git a/kernel/arch/i386/sconst.h b/kernel/arch/i386/sconst.h index 1837fa51a..2dbaa0163 100755 --- a/kernel/arch/i386/sconst.h +++ b/kernel/arch/i386/sconst.h @@ -1,3 +1,8 @@ +#ifndef __SCONST_H__ +#define __SCONST_H__ + +#include "../../const.h" + /* Miscellaneous constants used in assembler code. */ W = _WORD_SIZE /* Machine word size. */ @@ -26,3 +31,104 @@ P_CR3 = P_LDT_SEL+W P_LDT = P_CR3+W Msize = 9 /* size of a message in 32-bit words*/ + + +/* + * offset to current process pointer right after trap, we assume we always have + * error code on the stack + */ +#define CURR_PROC_PTR 20 + +/* + * tests whether the interrupt was triggered in kernel. If so, jump to the + * label. Displacement tell the macro ha far is the CS value saved by the trap + * from the current %esp. The kernel code segment selector has the lower 3 bits + * zeroed + */ +#define TEST_INT_IN_KERNEL(displ, label) \ + cmpl $CS_SELECTOR, displ(%esp) ;\ + je label ; + +/* + * saves the basic interrupt context (no error code) to the process structure + * + * displ is the displacement of %esp from the original stack after trap + * pptr is the process structure pointer + * tmp is an available temporary register + */ +#define SAVE_TRAP_CTX(displ, pptr, tmp) \ + movl (0 + displ)(%esp), tmp ;\ + movl tmp, PCREG(pptr) ;\ + movl (4 + displ)(%esp), tmp ;\ + movl tmp, CSREG(pptr) ;\ + movl (8 + displ)(%esp), tmp ;\ + movl tmp, PSWREG(pptr) ;\ + movl (12 + displ)(%esp), tmp ;\ + movl tmp, SPREG(pptr) ;\ + movl tmp, STREG(pptr) ;\ + movl (16 + displ)(%esp), tmp ;\ + movl tmp, SSREG(pptr) ; + +#define SAVE_SEGS(pptr) \ + mov %ds, %ss:DSREG(pptr) ;\ + mov %es, %ss:ESREG(pptr) ;\ + mov %fs, %ss:FSREG(pptr) ;\ + mov %gs, %ss:GSREG(pptr) ; + +#define RESTORE_SEGS(pptr) \ + movw %ss:DSREG(pptr), %ds ;\ + movw %ss:ESREG(pptr), %es ;\ + movw %ss:FSREG(pptr), %fs ;\ + movw %ss:GSREG(pptr), %gs ; + +/* + * restore kernel segments, %ss is kernnel data segment, %cs is aready set and + * %fs, %gs are not used + */ +#define RESTORE_KERNEL_SEGS \ + mov %ss, %si ;\ + mov %si, %ds ;\ + mov %si, %es ; + +#define SAVE_GP_REGS(pptr) \ + mov %eax, %ss:AXREG(pptr) ;\ + mov %ecx, %ss:CXREG(pptr) ;\ + mov %edx, %ss:DXREG(pptr) ;\ + mov %ebx, %ss:BXREG(pptr) ;\ + mov %esi, %ss:SIREG(pptr) ;\ + mov %edi, %ss:DIREG(pptr) ; + +#define RESTORE_GP_REGS(pptr) \ + movl %ss:AXREG(pptr), %eax ;\ + movl %ss:CXREG(pptr), %ecx ;\ + movl %ss:DXREG(pptr), %edx ;\ + movl %ss:BXREG(pptr), %ebx ;\ + movl %ss:SIREG(pptr), %esi ;\ + movl %ss:DIREG(pptr), %edi ; + +/* + * save the context of the interrupted process to the structure in the process + * table. It pushses the %ebp to stack to get a scratch register. After %esi is + * saved, we can use it to get the saved %ebp from stack and save it to the + * final location + * + * displ is the stack displacement. In case of an exception, there are two extra + * value on the stack - error code and the exception number + */ +#define SAVE_PROCESS_CTX(displ) \ + push %ebp ;\ + ;\ + movl (CURR_PROC_PTR + 4 + displ)(%esp), %ebp ;\ + ;\ + /* save the segment registers */ \ + SAVE_SEGS(%ebp) ;\ + \ + SAVE_GP_REGS(%ebp) ;\ + pop %esi /* get the orig %ebp and save it */ ;\ + mov %esi, %ss:BPREG(%ebp) ;\ + \ + RESTORE_KERNEL_SEGS ;\ + SAVE_TRAP_CTX(displ, %ebp, %esi) ;\ + ; + +#endif /* __SCONST_H__ */ diff --git a/kernel/arch/i386/system.c b/kernel/arch/i386/system.c index bfdd3f6f1..37cff7033 100644 --- a/kernel/arch/i386/system.c +++ b/kernel/arch/i386/system.c @@ -107,10 +107,28 @@ PUBLIC void arch_get_aout_headers(int i, struct exec *h) phys_copy(aout + i * A_MINHDR, vir2phys(h), (phys_bytes) A_MINHDR); } +PUBLIC void tss_init(struct tss_s * tss, void * kernel_stack, unsigned cpu) +{ + /* + * make space for process pointer and cpu id and point to the first + * usable word + */ + tss->sp0 = ((unsigned) kernel_stack) - 2 * sizeof(void *); + tss->ss0 = DS_SELECTOR; + + /* + * set the cpu id at the top of the stack so we know on which cpu is + * this stak in use when we trap to kernel + */ + *((reg_t *)(tss->sp0 + 1 * sizeof(reg_t))) = cpu; +} + PUBLIC void arch_init(void) { idt_init(); + tss_init(&tss, &k_boot_stktop, 0); + #if 0 /* Set CR0_EM until we get FP context switching */ write_cr0(read_cr0() | CR0_EM); @@ -389,3 +407,12 @@ PUBLIC void arch_do_syscall(struct proc *proc) /* Make the system call, for real this time. */ proc->p_reg.retreg = sys_call(call_nr, src_dst_e, m_ptr, bit_map); } + +PUBLIC struct proc * arch_finish_schedcheck(void) +{ + char * stk; + stk = (char *)tss.sp0; + /* set pointer to the process to run on the stack */ + *((reg_t *)stk) = (reg_t) proc_ptr; + return proc_ptr; +} diff --git a/kernel/glo.h b/kernel/glo.h index 25ecf45af..eef660117 100755 --- a/kernel/glo.h +++ b/kernel/glo.h @@ -36,7 +36,6 @@ EXTERN struct proc *bill_ptr; /* process to bill for clock ticks */ EXTERN struct proc *vmrestart; /* first process on vmrestart queue */ EXTERN struct proc *vmrequest; /* first process on vmrequest queue */ EXTERN struct proc *pagefaults; /* first process on pagefault queue */ -EXTERN char k_reenter; /* kernel reentry count (entry count less 1) */ EXTERN unsigned lost_ticks; /* clock ticks counted outside clock task */ diff --git a/kernel/proc.c b/kernel/proc.c index e3bdc05ae..f58a7332d 100755 --- a/kernel/proc.c +++ b/kernel/proc.c @@ -125,7 +125,7 @@ PRIVATE int QueueMess(endpoint_t ep, vir_bytes msg_lin, struct proc *dst) /*===========================================================================* * schedcheck * *===========================================================================*/ -PUBLIC void schedcheck(void) +PUBLIC struct proc * schedcheck(void) { /* This function is called an instant before proc_ptr is * to be scheduled again. @@ -214,7 +214,10 @@ PUBLIC void schedcheck(void) #if DEBUG_TRACE proc_ptr->p_schedules++; #endif - NOREC_RETURN(schedch, ); + + proc_ptr = arch_finish_schedcheck(); + + NOREC_RETURN(schedch, proc_ptr); } /*===========================================================================* diff --git a/kernel/proto.h b/kernel/proto.h index c44b8d31f..b442fe4da 100755 --- a/kernel/proto.h +++ b/kernel/proto.h @@ -36,7 +36,8 @@ _PROTOTYPE( int lock_send, (int dst, message *m_ptr) ); _PROTOTYPE( void enqueue, (struct proc *rp) ); _PROTOTYPE( void dequeue, (struct proc *rp) ); _PROTOTYPE( void balance_queues, (struct timer *tp) ); -_PROTOTYPE( void schedcheck, (void) ); +_PROTOTYPE( struct proc * schedcheck, (void) ); +_PROTOTYPE( struct proc * arch_finish_schedcheck, (void) ); _PROTOTYPE( struct proc *endpoint_lookup, (endpoint_t ep) ); #if DEBUG_ENABLE_IPC_WARNINGS _PROTOTYPE( int isokendpt_f, (char *file, int line, endpoint_t e, int *p, int f)); @@ -105,6 +106,7 @@ _PROTOTYPE( void stop_profile_clock, (void) ); _PROTOTYPE( phys_bytes phys_copy, (phys_bytes source, phys_bytes dest, phys_bytes count) ); _PROTOTYPE( void phys_copy_fault, (void)); +_PROTOTYPE( void phys_copy_fault_in_kernel, (void)); #define virtual_copy(src, dst, bytes) virtual_copy_f(src, dst, bytes, 0) #define virtual_copy_vmcheck(src, dst, bytes) virtual_copy_f(src, dst, bytes, 1) _PROTOTYPE( int virtual_copy_f, (struct vir_addr *src, struct vir_addr *dst, -- 2.44.0