From: Ben Gras Date: Thu, 3 Jun 2010 11:32:22 +0000 (+0000) Subject: kernel fpu context switching: fix race condition X-Git-Tag: v3.1.8~507 X-Git-Url: http://zhaoyanbai.com/repos/%22http:/www.isc.org/icons/index.html?a=commitdiff_plain;h=2f892aca9151eddc45d89da470685bc4413d6be9;p=minix.git kernel fpu context switching: fix race condition There seems to have been a broken assumption in the fpu context restoring code. It restores the context of the running process, without guarantee that the current process is the one that will be scheduled. This caused fpu saving for a different process to be triggered without fpu hardware being enabled, causing an fpu exception in the kernel. This practically only shows up with DEBUG_RACE on. Fix my thruby+me. The fix . is to only set the fpu-in-use-by-this-process flag in the exception handler, and then take care of fpu restoring when actually returning to userspace And the patch . translates fpu saving and restoring to c in arch_system.c, getting rid of a juicy chunk of assembly . makes osfxsr_feature private to arch_system.c . removes most of the arch dependent code from do_sigsend --- diff --git a/kernel/arch/i386/arch_system.c b/kernel/arch/i386/arch_system.c index c10b6e174..d336c6e70 100644 --- a/kernel/arch/i386/arch_system.c +++ b/kernel/arch/i386/arch_system.c @@ -11,6 +11,10 @@ #include #include #include +#include +#include + +#include #include "archconst.h" #include "proto.h" @@ -23,6 +27,9 @@ #include "apic.h" #endif +PRIVATE int osfxsr_feature; /* FXSAVE/FXRSTOR instructions support (SSEx) */ + + /* set MP and NE flags to handle FPU exceptions in native mode. */ #define CR0_MP_NE 0x0022 /* set CR4.OSFXSR[bit 9] if FXSR is supported. */ @@ -194,6 +201,59 @@ PRIVATE void fpu_init(void) } } +PUBLIC void save_fpu(struct proc *pr) +{ + if(!fpu_presence) + return; + + /* If the process hasn't touched the FPU, there is nothing to do. */ + + if(!(pr->p_misc_flags & MF_USED_FPU)) + return; + + /* Save changed FPU context. */ + + if(osfxsr_feature) { + fxsave(pr->p_fpu_state.fpu_save_area_p); + fninit(); + } else { + fnsave(pr->p_fpu_state.fpu_save_area_p); + } + + /* Clear MF_USED_FPU to signal there is no unsaved FPU state. */ + + pr->p_misc_flags &= ~MF_USED_FPU; +} + +PUBLIC void restore_fpu(struct proc *pr) +{ + /* If the process hasn't touched the FPU, enable the FPU exception + * and don't restore anything. + */ + if(!(pr->p_misc_flags & MF_USED_FPU)) { + write_cr0(read_cr0() | I386_CR0_TS); + return; + } + + /* If the process has touched the FPU, disable the FPU + * exception (both for the kernel and for the process once + * it's scheduled), and initialize or restore the FPU state. + */ + + clts(); + + if(!(pr->p_misc_flags & MF_FPU_INITIALIZED)) { + fninit(); + pr->p_misc_flags |= MF_FPU_INITIALIZED; + } else { + if(osfxsr_feature) { + fxrstor(pr->p_fpu_state.fpu_save_area_p); + } else { + frstor(pr->p_fpu_state.fpu_save_area_p); + } + } +} + PUBLIC void arch_init(void) { #ifdef CONFIG_APIC @@ -438,3 +498,36 @@ PUBLIC struct proc * arch_finish_switch_to_user(void) *((reg_t *)stk) = (reg_t) proc_ptr; return proc_ptr; } + +PUBLIC void fpu_sigcontext(struct proc *pr, struct sigframe *fr, struct sigcontext *sc) +{ + int fp_error; + + if (osfxsr_feature) { + fp_error = sc->sc_fpu_state.xfp_regs.fp_status & + ~sc->sc_fpu_state.xfp_regs.fp_control; + } else { + fp_error = sc->sc_fpu_state.fpu_regs.fp_status & + ~sc->sc_fpu_state.fpu_regs.fp_control; + } + + if (fp_error & 0x001) { /* Invalid op */ + /* + * swd & 0x240 == 0x040: Stack Underflow + * swd & 0x240 == 0x240: Stack Overflow + * User must clear the SF bit (0x40) if set + */ + fr->sf_code = FPE_FLTINV; + } else if (fp_error & 0x004) { + fr->sf_code = FPE_FLTDIV; /* Divide by Zero */ + } else if (fp_error & 0x008) { + fr->sf_code = FPE_FLTOVF; /* Overflow */ + } else if (fp_error & 0x012) { + fr->sf_code = FPE_FLTUND; /* Denormal, Underflow */ + } else if (fp_error & 0x020) { + fr->sf_code = FPE_FLTRES; /* Precision */ + } else { + fr->sf_code = 0; /* XXX - probably should be used for FPE_INTOVF or + * FPE_INTDIV */ + } +} diff --git a/kernel/arch/i386/klib.S b/kernel/arch/i386/klib.S index 88da36726..98d7171b2 100644 --- a/kernel/arch/i386/klib.S +++ b/kernel/arch/i386/klib.S @@ -50,6 +50,11 @@ .globl _fninit /* non-waiting FPU initialization */ .globl _fnstsw /* store status word (non-waiting) */ .globl _fnstcw /* store control word (non-waiting) */ +.globl _fxsave +.globl _fnsave +.globl _fxrstor +.globl _frstor +.globl _clts /* * The routines only guarantee to preserve the registers the C compiler @@ -655,6 +660,10 @@ _fninit: fninit ret +_clts: + clts + ret + _fnstsw: xor %eax, %eax @@ -671,6 +680,40 @@ _fnstcw: pop %eax ret +/*===========================================================================*/ +/* fxsave */ +/*===========================================================================*/ +_fxsave: + mov 4(%esp), %eax + fxsave (%eax) /* Do not change the operand! (gas2ack) */ + ret + +/*===========================================================================*/ +/* fnsave */ +/*===========================================================================*/ +_fnsave: + mov 4(%esp), %eax + fnsave (%eax) /* Do not change the operand! (gas2ack) */ + fwait /* required for compatibility with processors prior pentium */ + ret + +/*===========================================================================*/ +/* fxrstor */ +/*===========================================================================*/ +_fxrstor: + mov 4(%esp), %eax + fxrstor (%eax) /* Do not change the operand! (gas2ack) */ + ret + +/*===========================================================================*/ +/* frstor */ +/*===========================================================================*/ +_frstor: + mov 4(%esp), %eax + frstor (%eax) /* Do not change the operand! (gas2ack) */ + ret + + /*===========================================================================*/ /* read_cr0 */ /*===========================================================================*/ @@ -847,3 +890,4 @@ _switch_address_space: mov %edx, _ptproc 0: ret + diff --git a/kernel/arch/i386/mpx.S b/kernel/arch/i386/mpx.S index bc8e926ed..487078fc9 100644 --- a/kernel/arch/i386/mpx.S +++ b/kernel/arch/i386/mpx.S @@ -90,7 +90,7 @@ begbss: .globl _params_offset .globl _mon_ds .globl _switch_to_user -.globl _lazy_fpu +.globl _save_fpu .globl _hwint00 /* handlers for hardware interrupts */ .globl _hwint01 @@ -589,33 +589,16 @@ _inval_opcode: _copr_not_available: TEST_INT_IN_KERNEL(4, copr_not_available_in_kernel) - clts - cld /* set direction flag to a known value */ + cld /* set direction flag to a known value */ SAVE_PROCESS_CTX_NON_LAZY(0) /* stop user process cycles */ push %ebp + mov $0, %ebp call _context_stop pop %ebp lea P_MISC_FLAGS(%ebp), %ebx - movw (%ebx), %cx - and $MF_FPU_INITIALIZED, %cx - jnz 0f /* jump if FPU is already initialized */ - orw $MF_FPU_INITIALIZED, (%ebx) - fninit - jmp copr_return -0: /* load FPU context for current process */ - mov %ss:FP_SAVE_AREA_P(%ebp), %eax - cmp $0, _osfxsr_feature - jz fp_l_no_fxsr /* FXSR is not avaible. */ - - /* DO NOT CHANGE THE OPERAND!!! gas2ack does not handle it yet */ - fxrstor (%eax) - jmp copr_return -fp_l_no_fxsr: - /* DO NOT CHANGE THE OPERAND!!! gas2ack does not handle it yet */ - frstor (%eax) -copr_return: - orw $MF_USED_FPU, (%ebx) /* fpu was used during last execution */ + orw $MF_USED_FPU, (%ebx) + mov $0, %ebp jmp _switch_to_user copr_not_available_in_kernel: @@ -656,51 +639,6 @@ _machine_check: _simd_exception: EXCEPTION_NO_ERR_CODE(SIMD_EXCEPTION_VECTOR) -/*===========================================================================*/ -/* lazy_fpu */ -/*===========================================================================*/ -/* void lazy_fpu(struct proc *pptr) - * It's called, when we are on kernel stack. - * Actualy lazy code is just few lines, which check MF_USED_FPU, - * another part is save_init_fpu(). - */ -_lazy_fpu: - push %ebp - mov %esp, %ebp - push %eax - push %ebx - push %ecx - cmp $0, _fpu_presence /* Do we have FPU? */ - jz no_fpu_available - mov 8(%ebp), %eax /* Get pptr */ - lea P_MISC_FLAGS(%eax), %ebx - movw (%ebx), %cx - and $MF_USED_FPU, %cx - jz 0f /* Don't save FPU */ - mov %ss:FP_SAVE_AREA_P(%eax), %eax - cmp $0, _osfxsr_feature - jz fp_s_no_fxsr /* FXSR is not avaible. */ - - /* DO NOT CHANGE THE OPERAND!!! gas2ack does not handle it yet */ - fxsave (%eax) - fninit - jmp fp_saved -fp_s_no_fxsr: - /* DO NOT CHANGE THE OPERAND!!! gas2ack does not handle it yet */ - fnsave (%eax) - fwait /* required for compatibility with processors prior pentium */ -fp_saved: - andw $~MF_USED_FPU, (%ebx) -0: mov %cr0, %eax - or $0x00000008, %eax /* Set TS flag */ - mov %eax, %cr0 -no_fpu_available: - pop %ecx - pop %ebx - pop %eax - pop %ebp - ret - /*===========================================================================*/ /* reload_cr3 */ /*===========================================================================*/ diff --git a/kernel/arch/i386/proto.h b/kernel/arch/i386/proto.h index 041e9d447..3acb7c42c 100644 --- a/kernel/arch/i386/proto.h +++ b/kernel/arch/i386/proto.h @@ -86,6 +86,11 @@ _PROTOTYPE( void reload_ds, (void) ); _PROTOTYPE( void ia32_msr_read, (u32_t reg, u32_t * hi, u32_t * lo) ); _PROTOTYPE( void ia32_msr_write, (u32_t reg, u32_t hi, u32_t lo) ); _PROTOTYPE( void fninit, (void)); +_PROTOTYPE( void clts, (void)); +_PROTOTYPE( void fxsave, (void *)); +_PROTOTYPE( void fnsave, (void *)); +_PROTOTYPE( void fxrstor, (void *)); +_PROTOTYPE( void frstor, (void *)); _PROTOTYPE( unsigned short fnstsw, (void)); _PROTOTYPE( void fnstcw, (unsigned short* cw)); diff --git a/kernel/arch/i386/sconst.h b/kernel/arch/i386/sconst.h index 883078bce..0b8cc910a 100644 --- a/kernel/arch/i386/sconst.h +++ b/kernel/arch/i386/sconst.h @@ -140,10 +140,18 @@ SAVE_TRAP_CTX(displ, %ebp, %esi) ; #define SAVE_PROCESS_CTX(displ) \ - SAVE_PROCESS_CTX_NON_LAZY(displ) ;\ + SAVE_PROCESS_CTX_NON_LAZY(displ) ;\ + push %eax ;\ + push %ebx ;\ + push %ecx ;\ + push %edx ;\ push %ebp ;\ - call _lazy_fpu ;\ - add $4, %esp ; + call _save_fpu ;\ + pop %ebp ;\ + pop %edx ;\ + pop %ecx ;\ + pop %ebx ;\ + pop %eax ; /* * clear the IF flag in eflags which are stored somewhere in memory, e.g. on diff --git a/kernel/glo.h b/kernel/glo.h index 9ce8cfaec..e2e841411 100644 --- a/kernel/glo.h +++ b/kernel/glo.h @@ -45,7 +45,6 @@ EXTERN time_t boottime; EXTERN char params_buffer[512]; /* boot monitor parameters */ EXTERN int minix_panicing; EXTERN char fpu_presence; -EXTERN char osfxsr_feature; /* FXSAVE/FXRSTOR instructions support (SSEx) */ EXTERN int verboseboot; /* verbose boot, init'ed in cstart */ #define MAGICTEST 0xC0FFEE23 EXTERN u32_t magictest; /* global magic number */ diff --git a/kernel/proc.c b/kernel/proc.c index 737b93bad..85fb811f7 100644 --- a/kernel/proc.c +++ b/kernel/proc.c @@ -241,6 +241,7 @@ check_misc_flags: * restore_user_context() carries out the actual mode switch from kernel * to userspace. This function does not return */ + restore_fpu(proc_ptr); restore_user_context(proc_ptr); NOT_REACHABLE; } diff --git a/kernel/proc.h b/kernel/proc.h index a914829f3..f8a465647 100644 --- a/kernel/proc.h +++ b/kernel/proc.h @@ -146,6 +146,7 @@ struct proc { #define proc_is_preempted(p) ((p)->p_rts_flags & RTS_PREEMPTED) #define proc_no_quantum(p) ((p)->p_rts_flags & RTS_NO_QUANTUM) #define proc_ptr_ok(p) ((p)->p_magic == PMAGIC) +#define proc_used_fpu(p) ((p)->p_misc_flags & (MF_FPU_INITIALIZED|MF_USED_FPU)) /* test whether the process is scheduled by the kernel's default policy */ #define proc_kernel_scheduler(p) ((p)->p_scheduler == NULL || \ diff --git a/kernel/proto.h b/kernel/proto.h index cb2eba688..32ea89448 100644 --- a/kernel/proto.h +++ b/kernel/proto.h @@ -5,6 +5,7 @@ #include #include +#include #include /* Struct declarations. */ @@ -28,6 +29,9 @@ _PROTOTYPE( void cycles_accounting_init, (void) ); _PROTOTYPE( void context_stop, (struct proc * p) ); /* this is a wrapper to make calling it from assembly easier */ _PROTOTYPE( void context_stop_idle, (void) ); +_PROTOTYPE( void restore_fpu, (struct proc *) ); +_PROTOTYPE( void save_fpu, (struct proc *) ); +_PROTOTYPE( void fpu_sigcontext, (struct proc *, struct sigframe *fr, struct sigcontext *sc) ); /* main.c */ _PROTOTYPE( void main, (void) ); diff --git a/kernel/system/do_sigsend.c b/kernel/system/do_sigsend.c index 53b872fe9..656a86fb6 100644 --- a/kernel/system/do_sigsend.c +++ b/kernel/system/do_sigsend.c @@ -27,9 +27,6 @@ PUBLIC int do_sigsend(struct proc * caller, message * m_ptr) struct sigcontext sc, *scp; struct sigframe fr, *frp; int proc_nr, r; - #if (_MINIX_CHIP == _CHIP_INTEL) - unsigned short int fp_error; - #endif if (!isokendpt(m_ptr->SIG_ENDPT, &proc_nr)) return(EINVAL); if (iskerneln(proc_nr)) return(EPERM); @@ -69,38 +66,7 @@ PUBLIC int do_sigsend(struct proc * caller, message * m_ptr) rp->p_reg.fp = (reg_t) &frp->sf_fp; fr.sf_scp = scp; - #if (_MINIX_CHIP == _CHIP_INTEL) - if (osfxsr_feature == 1) { - fp_error = sc.sc_fpu_state.xfp_regs.fp_status & - ~sc.sc_fpu_state.xfp_regs.fp_control; - } else { - fp_error = sc.sc_fpu_state.fpu_regs.fp_status & - ~sc.sc_fpu_state.fpu_regs.fp_control; - } - - if (fp_error & 0x001) { /* Invalid op */ - /* - * swd & 0x240 == 0x040: Stack Underflow - * swd & 0x240 == 0x240: Stack Overflow - * User must clear the SF bit (0x40) if set - */ - fr.sf_code = FPE_FLTINV; - } else if (fp_error & 0x004) { - fr.sf_code = FPE_FLTDIV; /* Divide by Zero */ - } else if (fp_error & 0x008) { - fr.sf_code = FPE_FLTOVF; /* Overflow */ - } else if (fp_error & 0x012) { - fr.sf_code = FPE_FLTUND; /* Denormal, Underflow */ - } else if (fp_error & 0x020) { - fr.sf_code = FPE_FLTRES; /* Precision */ - } else { - fr.sf_code = 0; /* XXX - probably should be used for FPE_INTOVF or - * FPE_INTDIV */ - } - -#else - fr.sf_code = 0; -#endif + fpu_sigcontext(rp, &fr, &sc); fr.sf_signo = smsg.sm_signo; fr.sf_retadr = (void (*)()) smsg.sm_sigreturn;