From a149be43fc2f771206b864277deeae4ea533d8af Mon Sep 17 00:00:00 2001 From: Ben Gras Date: Thu, 19 Apr 2012 15:06:47 +0200 Subject: [PATCH] use linker to align fpu state save area --- include/arch/i386/include/archtypes.h | 11 +------- kernel/arch/i386/arch_system.c | 37 +++++++++++++++++++++++---- kernel/arch/i386/procoffsets.cf | 1 - kernel/proc.c | 22 +++------------- kernel/proc.h | 1 - kernel/proto.h | 1 + kernel/system/do_fork.c | 10 +++----- kernel/system/do_mcontext.c | 6 ++--- kernel/system/do_sigreturn.c | 3 +-- kernel/system/do_sigsend.c | 3 +-- kernel/system/do_update.c | 31 ---------------------- 11 files changed, 45 insertions(+), 81 deletions(-) diff --git a/include/arch/i386/include/archtypes.h b/include/arch/i386/include/archtypes.h index fb424c977..75e0f1eea 100644 --- a/include/arch/i386/include/archtypes.h +++ b/include/arch/i386/include/archtypes.h @@ -26,19 +26,10 @@ typedef struct segframe { reg_t p_ldt_sel; /* selector in gdt with ldt base and limit */ reg_t p_cr3; /* page table root */ u32_t *p_cr3_v; + char *fpu_state; struct segdesc_s p_ldt[LDT_SIZE]; /* CS, DS and remote */ } segframe_t; -/* fpu_state_s is used in kernel proc table. - * Any changes in this structure requires changes in sconst.h, - * since this structure is used in proc structure. */ -struct fpu_state_s { - union fpu_state_u *fpu_save_area_p; /* 16-aligned fpu_save_area */ - /* fpu_image includes 512 bytes of image itself and - * additional 15 bytes required for manual 16-byte alignment. */ - char fpu_image[527]; -}; - #define INMEMORY(p) (!p->p_seg.p_cr3 || get_cpulocal_var(ptproc) == p) typedef u32_t atomic_t; /* access to an aligned 32bit value is atomic on i386 */ diff --git a/kernel/arch/i386/arch_system.c b/kernel/arch/i386/arch_system.c index 453047ef5..8e1c0de02 100644 --- a/kernel/arch/i386/arch_system.c +++ b/kernel/arch/i386/arch_system.c @@ -275,6 +275,8 @@ void fpu_init(void) void save_local_fpu(struct proc *pr, int retain) { + char *state = pr->p_seg.fpu_state; + /* Save process FPU context. If the 'retain' flag is set, keep the FPU * state as is. If the flag is not set, the state is undefined upon * return, and the caller is responsible for reloading a proper state. @@ -283,12 +285,14 @@ void save_local_fpu(struct proc *pr, int retain) if(!is_fpu()) return; + assert(state); + if(osfxsr_feature) { - fxsave(pr->p_fpu_state.fpu_save_area_p); + fxsave(state); } else { - fnsave(pr->p_fpu_state.fpu_save_area_p); + fnsave(state); if (retain) - (void) frstor(pr->p_fpu_state.fpu_save_area_p); + (void) frstor(state); } } @@ -322,18 +326,41 @@ void save_fpu(struct proc *pr) } } +/* reserve a chunk of memory for fpu state; every one has to + * be FPUALIGN-aligned. + */ +static char fpu_state[NR_PROCS][FPU_XFP_SIZE] __aligned(FPUALIGN); + +void arch_proc_init(int nr, struct proc *pr) +{ + if(nr < 0) return; + char *v; + + assert(nr < NR_PROCS); + + v = fpu_state[nr]; + + /* verify alignment */ + assert(!((vir_bytes)v % FPUALIGN)); + + pr->p_seg.fpu_state = v; +} + int restore_fpu(struct proc *pr) { int failed; + char *state = pr->p_seg.fpu_state; + + assert(state); if(!proc_used_fpu(pr)) { fninit(); pr->p_misc_flags |= MF_FPU_INITIALIZED; } else { if(osfxsr_feature) { - failed = fxrstor(pr->p_fpu_state.fpu_save_area_p); + failed = fxrstor(state); } else { - failed = frstor(pr->p_fpu_state.fpu_save_area_p); + failed = frstor(state); } if (failed) return EINVAL; diff --git a/kernel/arch/i386/procoffsets.cf b/kernel/arch/i386/procoffsets.cf index 9584de24b..835a6f649 100644 --- a/kernel/arch/i386/procoffsets.cf +++ b/kernel/arch/i386/procoffsets.cf @@ -21,7 +21,6 @@ member CSREG p_reg.cs member PSWREG p_reg.psw member SPREG p_reg.sp member SSREG p_reg.ss -member FP_SAVE_AREA_P p_fpu_state member P_LDT_SEL p_seg.p_ldt_sel member P_CR3 p_seg.p_cr3 member P_CR3_V p_seg.p_cr3_v diff --git a/kernel/proc.c b/kernel/proc.c index 892b8b35a..6c1f5098e 100644 --- a/kernel/proc.c +++ b/kernel/proc.c @@ -136,6 +136,9 @@ void proc_init(void) rp->p_scheduler = NULL; /* no user space scheduler */ rp->p_priority = 0; /* no priority */ rp->p_quantum_size_ms = 0; /* no quantum size */ + + /* arch-specific initialization */ + arch_proc_init(i, rp); } for (sp = BEG_PRIV_ADDR, i = 0; sp < END_PRIV_ADDR; ++sp, ++i) { sp->s_proc_nr = NONE; /* initialize as free */ @@ -155,25 +158,6 @@ void proc_init(void) ip->p_rts_flags |= RTS_PROC_STOP; set_idle_name(ip->p_name, i); } - -#if (_MINIX_CHIP == _CHIP_INTEL) - for (rp = BEG_PROC_ADDR; rp < END_PROC_ADDR; ++rp) { - /* - * FXSR requires 16-byte alignment of memory image, but - * unfortunately a.out does not preserve the alignment while - * linking. Thus we have to do manual alignment. - */ - phys_bytes aligned_fp_area; - aligned_fp_area = - (phys_bytes) &rp->p_fpu_state.fpu_image; - if(aligned_fp_area % FPUALIGN) { - aligned_fp_area += FPUALIGN - - (aligned_fp_area % FPUALIGN); - } - rp->p_fpu_state.fpu_save_area_p = - (void *) aligned_fp_area; - } -#endif } static void switch_address_space_idle(void) diff --git a/kernel/proc.h b/kernel/proc.h index e2442e445..ea546ef33 100644 --- a/kernel/proc.h +++ b/kernel/proc.h @@ -21,7 +21,6 @@ struct proc { struct stackframe_s p_reg; /* process' registers saved in stack frame */ - struct fpu_state_s p_fpu_state; /* process' fpu_regs saved lazily */ struct segframe p_seg; /* segment descriptors */ proc_nr_t p_nr; /* number of this process (for fast access) */ struct priv *p_priv; /* system privileges structure */ diff --git a/kernel/proto.h b/kernel/proto.h index 8c66598b4..19eb2f52a 100644 --- a/kernel/proto.h +++ b/kernel/proto.h @@ -55,6 +55,7 @@ int mini_notify(const struct proc *src, endpoint_t dst); void enqueue(struct proc *rp); void dequeue(struct proc *rp); void switch_to_user(void); +void arch_proc_init(int nr, struct proc *rp); struct proc * arch_finish_switch_to_user(void); struct proc *endpoint_lookup(endpoint_t ep); #if DEBUG_ENABLE_IPC_WARNINGS diff --git a/kernel/system/do_fork.c b/kernel/system/do_fork.c index e6788f72f..6d588b107 100644 --- a/kernel/system/do_fork.c +++ b/kernel/system/do_fork.c @@ -27,7 +27,7 @@ int do_fork(struct proc * caller, message * m_ptr) /* Handle sys_fork(). PR_ENDPT has forked. The child is PR_SLOT. */ #if (_MINIX_CHIP == _CHIP_INTEL) reg_t old_ldt_sel; - void *old_fpu_save_area_p; + char *old_fpu_save_area_p; #endif register struct proc *rpc; /* child process pointer */ struct proc *rpp; /* parent process pointer */ @@ -59,16 +59,14 @@ int do_fork(struct proc * caller, message * m_ptr) gen = _ENDPOINT_G(rpc->p_endpoint); #if (_MINIX_CHIP == _CHIP_INTEL) old_ldt_sel = rpc->p_seg.p_ldt_sel; /* backup local descriptors */ - old_fpu_save_area_p = rpc->p_fpu_state.fpu_save_area_p; + old_fpu_save_area_p = rpc->p_seg.fpu_state; #endif *rpc = *rpp; /* copy 'proc' struct */ #if (_MINIX_CHIP == _CHIP_INTEL) rpc->p_seg.p_ldt_sel = old_ldt_sel; /* restore descriptors */ - rpc->p_fpu_state.fpu_save_area_p = old_fpu_save_area_p; + rpc->p_seg.fpu_state = old_fpu_save_area_p; if(proc_used_fpu(rpp)) - memcpy(rpc->p_fpu_state.fpu_save_area_p, - rpp->p_fpu_state.fpu_save_area_p, - FPU_XFP_SIZE); + memcpy(rpc->p_seg.fpu_state, rpp->p_seg.fpu_state, FPU_XFP_SIZE); #endif if(++gen >= _ENDPOINT_MAX_GENERATION) /* increase generation */ gen = 1; /* generation number wraparound */ diff --git a/kernel/system/do_mcontext.c b/kernel/system/do_mcontext.c index b3b2e22ba..da0b91014 100644 --- a/kernel/system/do_mcontext.c +++ b/kernel/system/do_mcontext.c @@ -45,8 +45,7 @@ int do_getmcontext(struct proc * caller, message * m_ptr) /* make sure that the FPU context is saved into proc structure first */ save_fpu(rp); mc.mc_fpu_flags = rp->p_misc_flags & MF_FPU_INITIALIZED; - memcpy(&(mc.mc_fpu_state), rp->p_fpu_state.fpu_save_area_p, - FPU_XFP_SIZE); + memcpy(&(mc.mc_fpu_state), rp->p_seg.fpu_state, FPU_XFP_SIZE); } #endif @@ -84,8 +83,7 @@ int do_setmcontext(struct proc * caller, message * m_ptr) /* Copy FPU state */ if (mc.mc_fpu_flags & MF_FPU_INITIALIZED) { rp->p_misc_flags |= MF_FPU_INITIALIZED; - memcpy(rp->p_fpu_state.fpu_save_area_p, &(mc.mc_fpu_state), - FPU_XFP_SIZE); + memcpy(rp->p_seg.fpu_state, &(mc.mc_fpu_state), FPU_XFP_SIZE); } else rp->p_misc_flags &= ~MF_FPU_INITIALIZED; /* force reloading FPU in either case */ diff --git a/kernel/system/do_sigreturn.c b/kernel/system/do_sigreturn.c index 19a908587..6edc492fb 100644 --- a/kernel/system/do_sigreturn.c +++ b/kernel/system/do_sigreturn.c @@ -55,8 +55,7 @@ int do_sigreturn(struct proc * caller, message * m_ptr) #if (_MINIX_CHIP == _CHIP_INTEL) if(sc.sc_flags & MF_FPU_INITIALIZED) { - memcpy(rp->p_fpu_state.fpu_save_area_p, &sc.sc_fpu_state, - FPU_XFP_SIZE); + memcpy(rp->p_seg.fpu_state, &sc.sc_fpu_state, FPU_XFP_SIZE); rp->p_misc_flags |= MF_FPU_INITIALIZED; /* Restore math usage flag. */ /* force reloading FPU */ release_fpu(rp); diff --git a/kernel/system/do_sigsend.c b/kernel/system/do_sigsend.c index e2bfba8a5..c712f2cb8 100644 --- a/kernel/system/do_sigsend.c +++ b/kernel/system/do_sigsend.c @@ -46,8 +46,7 @@ int do_sigsend(struct proc * caller, message * m_ptr) if(proc_used_fpu(rp)) { /* save the FPU context before saving it to the sig context */ save_fpu(rp); - memcpy(&sc.sc_fpu_state, rp->p_fpu_state.fpu_save_area_p, - FPU_XFP_SIZE); + memcpy(&sc.sc_fpu_state, rp->p_seg.fpu_state, FPU_XFP_SIZE); } #endif diff --git a/kernel/system/do_update.c b/kernel/system/do_update.c index 76fc469ea..127157509 100644 --- a/kernel/system/do_update.c +++ b/kernel/system/do_update.c @@ -22,8 +22,6 @@ static void adjust_proc_slot(struct proc *rp, struct proc *from_rp); static void adjust_priv_slot(struct priv *privp, struct priv *from_privp); -static void swap_fpu_state(struct proc *a_rp, struct proc *b_orig_rp, - struct proc *b_copy_rp); static void swap_proc_slot_pointer(struct proc **rpp, struct proc *src_rp, struct proc *dst_rp); @@ -110,10 +108,6 @@ int do_update(struct proc * caller, message * m_ptr) adjust_priv_slot(priv(src_rp), &orig_src_priv); adjust_priv_slot(priv(dst_rp), &orig_dst_priv); - /* Swap FPU state. Can only be done after adjusting the process slots. */ - swap_fpu_state(src_rp, dst_rp, &orig_dst_proc); - swap_fpu_state(dst_rp, src_rp, &orig_src_proc); - /* Swap global process slot addresses. */ swap_proc_slot_pointer(get_cpulocal_var_ptr(ptproc), src_rp, dst_rp); @@ -152,11 +146,6 @@ static void adjust_proc_slot(struct proc *rp, struct proc *from_rp) priv(rp)->s_proc_nr = from_rp->p_nr; rp->p_caller_q = from_rp->p_caller_q; -#if (_MINIX_CHIP == _CHIP_INTEL) - /* Preserve FPU pointer. */ - rp->p_fpu_state.fpu_save_area_p = from_rp->p_fpu_state.fpu_save_area_p; -#endif - /* preserve scheduling */ rp->p_scheduler = from_rp->p_scheduler; #ifdef CONFIG_SMP @@ -179,26 +168,6 @@ static void adjust_priv_slot(struct priv *privp, struct priv *from_privp) privp->s_alarm_timer = from_privp->s_alarm_timer; } -/*===========================================================================* - * swap_fpu_state * - *===========================================================================*/ -static void swap_fpu_state(struct proc *a_rp, struct proc *b_orig_rp, - struct proc *b_copy_rp) -{ - /* Copy the FPU state from process B's copied slot, using B's original FPU - * save area alignment, into process A's slot. - */ -#if (_MINIX_CHIP == _CHIP_INTEL) - int align; - - align = (int) ((char *) b_orig_rp->p_fpu_state.fpu_save_area_p - - (char *) &b_orig_rp->p_fpu_state.fpu_image); - - memcpy(a_rp->p_fpu_state.fpu_save_area_p, - b_copy_rp->p_fpu_state.fpu_image + align, FPU_XFP_SIZE); -#endif -} - /*===========================================================================* * swap_proc_slot_pointer * *===========================================================================*/ -- 2.44.0