From 2d72cbec418fc500da13b6bbea46af899db2b73a Mon Sep 17 00:00:00 2001 From: Ben Gras Date: Sun, 10 Jun 2012 17:50:17 +0000 Subject: [PATCH] SYSENTER/SYSCALL support . add cpufeature detection of both . use it for both ipc and kernelcall traps, using a register for call number . SYSENTER/SYSCALL does not save any context, therefore userland has to save it . to accomodate multiple kernel entry/exit types, the entry type is recorded in the process struct. hitherto all types were interrupt (soft int, exception, hard int); now SYSENTER/SYSCALL is new, with the difference that context is not fully restored from proc struct when running the process again. this can't be done as some information is missing. . complication: cases in which the kernel has to fully change process context (i.e. sigreturn). in that case the exit type is changed from SYSENTER/SYSEXIT to soft-int (i.e. iret) and context is fully restored from the proc struct. this does mean the PC and SP must change, as the sysenter/sysexit userland code will otherwise try to restore its own context. this is true in the sigreturn case. . override all usage by setting libc_ipc=1 --- include/arch/i386/include/archtypes.h | 1 + include/arch/i386/include/asm.h | 7 ++ include/arch/i386/include/interrupt.h | 6 +- include/arch/i386/include/vm.h | 3 + include/minix/com.h | 1 + include/minix/const.h | 4 + include/minix/cpufeature.h | 4 + include/minix/ipc.h | 48 +++++---- include/minix/type.h | 7 +- kernel/arch/i386/Makefile.inc | 2 +- kernel/arch/i386/apic.c | 6 +- kernel/arch/i386/apic_asm.S | 4 +- kernel/arch/i386/arch_system.c | 99 +++++++++++++++++- kernel/arch/i386/arch_watchdog.c | 5 - kernel/arch/i386/exception.c | 5 - kernel/arch/i386/include/arch_proto.h | 43 +++++++- kernel/arch/i386/include/archconst.h | 22 ++++ kernel/arch/i386/memory.c | 36 ++++++- kernel/arch/i386/mpx.S | 128 +++++++++++++++++++++--- kernel/arch/i386/procoffsets.cf | 2 + kernel/arch/i386/protect.c | 48 ++++++++- kernel/arch/i386/sconst.h | 3 +- kernel/arch/i386/usermapped_data_arch.c | 33 ++++++ kernel/arch/i386/usermapped_glo_ipc.S | 94 +++++++++++++++++ kernel/main.c | 4 +- kernel/proc.h | 1 + kernel/proto.h | 1 + kernel/system/do_sigreturn.c | 4 +- kernel/system/do_vmctl.c | 3 + lib/libc/arch/i386/sys-minix/_ipc.S | 26 ++--- lib/libc/arch/i386/sys-minix/_senda.S | 2 +- lib/libc/sys-minix/init.c | 16 ++- lib/libc/sys-minix/syscall.c | 2 +- lib/libexec/exec_general.c | 1 + lib/libminlib/i386/_cpufeature.c | 39 ++++++++ lib/libsys/kernel_call.c | 2 +- lib/libsys/taskcall.c | 2 +- servers/vm/main.c | 10 ++ 38 files changed, 644 insertions(+), 80 deletions(-) create mode 100644 kernel/arch/i386/usermapped_data_arch.c create mode 100644 kernel/arch/i386/usermapped_glo_ipc.S diff --git a/include/arch/i386/include/archtypes.h b/include/arch/i386/include/archtypes.h index cc3a6b478..22a2ed2bc 100644 --- a/include/arch/i386/include/archtypes.h +++ b/include/arch/i386/include/archtypes.h @@ -33,6 +33,7 @@ typedef struct segframe { reg_t p_cr3; /* page table root */ u32_t *p_cr3_v; char *fpu_state; + int p_kern_trap_style; } segframe_t; struct cpu_info { diff --git a/include/arch/i386/include/asm.h b/include/arch/i386/include/asm.h index cb15ecf55..1b4c91de2 100644 --- a/include/arch/i386/include/asm.h +++ b/include/arch/i386/include/asm.h @@ -218,6 +218,13 @@ #ifdef __minix #define IMPORT(sym) \ .extern _C_LABEL(sym) + +#define KERVEC_ORIG 32 /* syscall trap to kernel */ +#define IPCVEC_ORIG 33 /* ipc trap to kernel */ + +#define KERVEC_UM 34 /* syscall trap to kernel, user-mapped code */ +#define IPCVEC_UM 35 /* ipc trap to kernel, user-mapped code */ + #endif #endif /* !_I386_ASM_H_ */ diff --git a/include/arch/i386/include/interrupt.h b/include/arch/i386/include/interrupt.h index f7442f897..b725875c6 100644 --- a/include/arch/i386/include/interrupt.h +++ b/include/arch/i386/include/interrupt.h @@ -25,8 +25,10 @@ #define OVERFLOW_VECTOR 4 /* from INTO */ /* Fixed system call vector. */ -#define KERN_CALL_VECTOR 32 /* system calls are made with int SYSVEC */ -#define IPC_VECTOR 33 /* interrupt vector for ipc */ +#define KERN_CALL_VECTOR_ORIG 32 /* system calls are made with int SYSVEC */ +#define IPC_VECTOR_ORIG 33 /* interrupt vector for ipc */ +#define KERN_CALL_VECTOR_UM 34 /* user-mapped equivalent */ +#define IPC_VECTOR_UM 35 /* user-mapped equivalent */ /* Hardware interrupt numbers. */ #ifndef USE_APIC diff --git a/include/arch/i386/include/vm.h b/include/arch/i386/include/vm.h index 71104e0a2..5c0c3c109 100644 --- a/include/arch/i386/include/vm.h +++ b/include/arch/i386/include/vm.h @@ -73,6 +73,7 @@ i386/vm.h /* CPUID flags */ #define CPUID1_EDX_FPU (1L) /* FPU presence */ #define CPUID1_EDX_PSE (1L << 3) /* Page Size Extension */ +#define CPUID1_EDX_SYSENTER (1L << 11) /* Intel SYSENTER */ #define CPUID1_EDX_PGE (1L << 13) /* Page Global (bit) Enable */ #define CPUID1_EDX_APIC_ON_CHIP (1L << 9) /* APIC is present on the chip */ #define CPUID1_EDX_TSC (1L << 4) /* Timestamp counter present */ @@ -85,6 +86,8 @@ i386/vm.h #define CPUID1_ECX_SSE4_1 (1L << 19) #define CPUID1_ECX_SSE4_2 (1L << 20) +#define CPUID_EF_EDX_SYSENTER (1L << 11) /* Intel SYSENTER */ + #ifndef __ASSEMBLY__ #include diff --git a/include/minix/com.h b/include/minix/com.h index 525e4bbc2..7ff70e30c 100644 --- a/include/minix/com.h +++ b/include/minix/com.h @@ -629,6 +629,7 @@ #define VMCTL_VMINHIBIT_SET 30 #define VMCTL_VMINHIBIT_CLEAR 31 #define VMCTL_CLEARMAPCACHE 32 +#define VMCTL_BOOTINHIBIT_CLEAR 33 /* Codes and field names for SYS_SYSCTL. */ #define SYSCTL_CODE m1_i1 /* SYSCTL_CODE_* below */ diff --git a/include/minix/const.h b/include/minix/const.h index 2a2d634dc..0afebcf6c 100644 --- a/include/minix/const.h +++ b/include/minix/const.h @@ -173,4 +173,8 @@ /* magic value to put in struct proc entries for sanity checks. */ #define PMAGIC 0xC0FFEE1 +/* MINIX_KERNFLAGS flags */ +#define MKF_I386_INTEL_SYSENTER (1L << 0) /* SYSENTER available and supported */ +#define MKF_I386_AMD_SYSCALL (1L << 1) /* SYSCALL available and supported */ + #endif /* _MINIX_CONST_H */ diff --git a/include/minix/cpufeature.h b/include/minix/cpufeature.h index e5f983893..4ceae56c5 100644 --- a/include/minix/cpufeature.h +++ b/include/minix/cpufeature.h @@ -21,6 +21,10 @@ #define _CPUF_I386_HTT 13 /* Supports HTT */ #define _CPUF_I386_HTT_MAX_NUM 14 /* Maximal num of threads */ +#define _CPUF_I386_MTRR 15 +#define _CPUF_I386_SYSENTER 16 /* Intel SYSENTER instrs */ +#define _CPUF_I386_SYSCALL 17 /* AMD SYSCALL instrs */ + int _cpufeature(int featureno); #endif diff --git a/include/minix/ipc.h b/include/minix/ipc.h index e07cdefb3..48a92da52 100644 --- a/include/minix/ipc.h +++ b/include/minix/ipc.h @@ -5,6 +5,7 @@ #endif #include #include +#include /*==========================================================================* * Types relating to messages. * @@ -152,24 +153,37 @@ typedef struct asynmsg #define AMF_NOTIFY_ERR 020 /* Send a notification when AMF_DONE is set and * delivery of the message failed */ -/* Hide names to avoid name space pollution. */ -#define echo _echo -#define notify _notify -#define sendrec _sendrec -#define receive _receive -#define send _send -#define sendnb _sendnb -#define senda _senda - -int echo(message *m_ptr); -int notify(endpoint_t dest); -int sendrec(endpoint_t src_dest, message *m_ptr); -int receive(endpoint_t src, message *m_ptr, int *status_ptr); -int send(endpoint_t dest, message *m_ptr); -int sendnb(endpoint_t dest, message *m_ptr); -int senda(asynmsg_t *table, size_t count); +int _send_orig(endpoint_t dest, message *m_ptr); +int _receive_orig(endpoint_t src, message *m_ptr, int *status_ptr); +int _sendrec_orig(endpoint_t src_dest, message *m_ptr); +int _sendnb_orig(endpoint_t dest, message *m_ptr); +int _notify_orig(endpoint_t dest); +int _senda_orig(asynmsg_t *table, size_t count); +int _do_kernel_call_orig(message *m_ptr); + int _minix_kernel_info_struct(struct minix_kerninfo **); -int _do_kernel_call(message *m_ptr); +struct minix_ipcvecs { + int (*send_ptr)(endpoint_t dest, message *m_ptr); + int (*receive_ptr)(endpoint_t src, message *m_ptr, int *st); + int (*sendrec_ptr)(endpoint_t src_dest, message *m_ptr); + int (*sendnb_ptr)(endpoint_t dest, message *m_ptr); + int (*notify_ptr)(endpoint_t dest); + int (*do_kernel_call_ptr)(message *m_ptr); + int (*senda_ptr)(asynmsg_t *table, size_t count); +}; + +/* kernel-set IPC vectors retrieved by a constructor in libc/sys-minix/init.c */ +extern struct minix_ipcvecs _minix_ipcvecs; + +#define CHOOSETRAP(name) (_minix_ipcvecs. name ## _ptr) + +#define send CHOOSETRAP(send) +#define receive CHOOSETRAP(receive) +#define sendrec CHOOSETRAP(sendrec) +#define sendnb CHOOSETRAP(sendnb) +#define notify CHOOSETRAP(notify) +#define do_kernel_call CHOOSETRAP(do_kernel_call) +#define senda CHOOSETRAP(senda) #endif /* _IPC_H */ diff --git a/include/minix/type.h b/include/minix/type.h index dd1473209..dbcb23e1f 100644 --- a/include/minix/type.h +++ b/include/minix/type.h @@ -168,8 +168,8 @@ struct minix_kerninfo { */ #define KERNINFO_MAGIC 0xfc3b84bf u32_t kerninfo_magic; - u32_t minix_feature_flags; - u32_t flags_unused1; + u32_t minix_feature_flags; /* features in minix kernel */ + u32_t ki_flags; /* what is present in this struct */ u32_t flags_unused2; u32_t flags_unused3; u32_t flags_unused4; @@ -177,7 +177,10 @@ struct minix_kerninfo { struct machine *machine; struct kmessages *kmessages; struct loadinfo *loadinfo; + struct minix_ipcvecs *minix_ipcvecs; } __packed; +#define MINIX_KIF_IPCVECS (1L << 0) + #endif /* _TYPE_H */ diff --git a/kernel/arch/i386/Makefile.inc b/kernel/arch/i386/Makefile.inc index 25f637c53..c9b9375cc 100644 --- a/kernel/arch/i386/Makefile.inc +++ b/kernel/arch/i386/Makefile.inc @@ -36,7 +36,7 @@ SRCS+= mpx.S arch_clock.c arch_do_vmctl.c arch_system.c \ do_iopenable.c do_readbios.c do_sdevio.c exception.c i8259.c io_inb.S \ io_inl.S io_intr.S io_inw.S io_outb.S io_outl.S io_outw.S klib.S klib16.S memory.c \ oxpcie.c protect.c direct_tty_utils.c arch_reset.c \ - pg_utils.c + pg_utils.c usermapped_glo_ipc.S usermapped_data_arch.c OBJS.kernel+= ${UNPAGED_OBJS} diff --git a/kernel/arch/i386/apic.c b/kernel/arch/i386/apic.c index 597650020..9ec3190c2 100644 --- a/kernel/arch/i386/apic.c +++ b/kernel/arch/i386/apic.c @@ -835,8 +835,10 @@ static struct gate_table_s gate_table_ioapic[] = { }; static struct gate_table_s gate_table_common[] = { - { ipc_entry, IPC_VECTOR, USER_PRIVILEGE }, - { kernel_call_entry, KERN_CALL_VECTOR, USER_PRIVILEGE }, + { ipc_entry_softint_orig, IPC_VECTOR_ORIG, USER_PRIVILEGE }, + { kernel_call_entry_orig, KERN_CALL_VECTOR_ORIG, USER_PRIVILEGE }, + { ipc_entry_softint_um, IPC_VECTOR_UM, USER_PRIVILEGE }, + { kernel_call_entry_um, KERN_CALL_VECTOR_UM, USER_PRIVILEGE }, { NULL, 0, 0} }; diff --git a/kernel/arch/i386/apic_asm.S b/kernel/arch/i386/apic_asm.S index 33a379d15..ae9e0b6ba 100644 --- a/kernel/arch/i386/apic_asm.S +++ b/kernel/arch/i386/apic_asm.S @@ -18,7 +18,7 @@ ENTRY(apic_hwint##irq) \ TEST_INT_IN_KERNEL(4, 0f) ;\ \ - SAVE_PROCESS_CTX(0) ;\ + SAVE_PROCESS_CTX(0, KTS_INT_HARD) ;\ push %ebp ;\ call _C_LABEL(context_stop) ;\ add $4, %esp ;\ @@ -47,7 +47,7 @@ ENTRY(apic_hwint##irq) \ #define lapic_intr(func) \ TEST_INT_IN_KERNEL(4, 0f) ;\ \ - SAVE_PROCESS_CTX(0) ;\ + SAVE_PROCESS_CTX(0, KTS_INT_HARD) ;\ push %ebp ;\ call _C_LABEL(context_stop) ;\ add $4, %esp ;\ diff --git a/kernel/arch/i386/arch_system.c b/kernel/arch/i386/arch_system.c index 2296bf835..4b1812be8 100644 --- a/kernel/arch/i386/arch_system.c +++ b/kernel/arch/i386/arch_system.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -149,6 +150,7 @@ static char fpu_state[NR_PROCS][FPU_XFP_SIZE] __aligned(FPUALIGN); void arch_proc_reset(struct proc *pr) { char *v = NULL; + struct stackframe_s reg; assert(pr->p_nr < NR_PROCS); @@ -161,11 +163,11 @@ void arch_proc_reset(struct proc *pr) } /* Clear process state. */ - memset(&pr->p_reg, 0, sizeof(pr->p_reg)); + memset(®, 0, sizeof(pr->p_reg)); if(iskerneln(pr->p_nr)) - pr->p_reg.psw = INIT_TASK_PSW; + reg.psw = INIT_TASK_PSW; else - pr->p_reg.psw = INIT_PSW; + reg.psw = INIT_PSW; pr->p_seg.fpu_state = v; @@ -178,6 +180,9 @@ void arch_proc_reset(struct proc *pr) pr->p_reg.ss = pr->p_reg.es = pr->p_reg.ds = USER_DS_SELECTOR; + + /* set full context and make sure it gets restored */ + arch_proc_setcontext(pr, ®, 0); } void arch_set_secondary_ipc_return(struct proc *p, u32_t val) @@ -512,6 +517,94 @@ struct proc * arch_finish_switch_to_user(void) return p; } +void arch_proc_setcontext(struct proc *p, struct stackframe_s *state, int isuser) +{ + if(isuser) { + /* Restore user bits of psw from sc, maintain system bits + * from proc. + */ + state->psw = (state->psw & X86_FLAGS_USER) | + (p->p_reg.psw & ~X86_FLAGS_USER); + } + + /* someone wants to totally re-initialize process state */ + assert(sizeof(p->p_reg) == sizeof(*state)); + memcpy(&p->p_reg, state, sizeof(*state)); + + /* further code is instructed to not touch the context + * any more + */ + p->p_misc_flags |= MF_CONTEXT_SET; + + /* on x86 this requires returning using iret (KTS_INT) + * so that the full context is restored instead of relying on + * the userspace doing it (as it would do on SYSEXIT). + * as ESP and EIP are also reset, userspace won't try to + * restore bogus context after returning. + * + * if the process is not blocked, or the kernel will ignore + * our trap style, we needn't panic but things will probably + * not go well for the process (restored context will be ignored) + * and the situation should be debugged. + */ + if(!(p->p_rts_flags)) { + printf("WARNINIG: setting full context of runnable process\n"); + print_proc(p); + util_stacktrace(); + } + if(p->p_seg.p_kern_trap_style == KTS_NONE) + printf("WARNINIG: setting full context of out-of-kernel process\n"); + p->p_seg.p_kern_trap_style = KTS_FULLCONTEXT; +} + +void restore_user_context(struct proc *p) +{ + int trap_style = p->p_seg.p_kern_trap_style; +#if 0 +#define TYPES 10 + static int restores[TYPES], n = 0; + + p->p_seg.p_kern_trap_style = KTS_NONE; + + if(trap_style >= 0 && trap_style < TYPES) + restores[trap_style]++; + + if(!(n++ % 500000)) { + int t; + for(t = 0; t < TYPES; t++) + if(restores[t]) + printf("%d: %d ", t, restores[t]); + printf("\n"); + } +#endif + + if(trap_style == KTS_SYSENTER) { + restore_user_context_sysenter(p); + NOT_REACHABLE; + } + + if(trap_style == KTS_SYSCALL) { + restore_user_context_syscall(p); + NOT_REACHABLE; + } + + switch(trap_style) { + case KTS_NONE: + panic("no entry trap style known"); + case KTS_INT_HARD: + case KTS_INT_UM: + case KTS_FULLCONTEXT: + case KTS_INT_ORIG: + restore_user_context_int(p); + NOT_REACHABLE; + default: + panic("unknown trap style recorded"); + NOT_REACHABLE; + } + + NOT_REACHABLE; +} + void fpu_sigcontext(struct proc *pr, struct sigframe *fr, struct sigcontext *sc) { int fp_error; diff --git a/kernel/arch/i386/arch_watchdog.c b/kernel/arch/i386/arch_watchdog.c index b6884e4d4..80647af0d 100644 --- a/kernel/arch/i386/arch_watchdog.c +++ b/kernel/arch/i386/arch_watchdog.c @@ -9,11 +9,6 @@ #define CPUID_UNHALTED_CORE_CYCLES_AVAILABLE 0 -#define INTEL_MSR_PERFMON_CRT0 0xc1 -#define INTEL_MSR_PERFMON_SEL0 0x186 - -#define INTEL_MSR_PERFMON_SEL0_ENABLE (1 << 22) - /* * Intel architecture performance counters watchdog */ diff --git a/kernel/arch/i386/exception.c b/kernel/arch/i386/exception.c index e10d6dd73..d885cfcd1 100644 --- a/kernel/arch/i386/exception.c +++ b/kernel/arch/i386/exception.c @@ -248,11 +248,6 @@ void exception_handler(int is_nested, struct exception_frame * frame) frame->vector, (unsigned long)frame->errcode, (unsigned long)frame->eip, frame->cs, (unsigned long)frame->eflags); - printseg("cs: ", 1, saved_proc, frame->cs); - printseg("ds: ", 0, saved_proc, saved_proc->p_reg.ds); - if(saved_proc->p_reg.ds != saved_proc->p_reg.ss) { - printseg("ss: ", 0, saved_proc, saved_proc->p_reg.ss); - } proc_stacktrace(saved_proc); } diff --git a/kernel/arch/i386/include/arch_proto.h b/kernel/arch/i386/include/arch_proto.h index 3693ac460..9748b6c52 100644 --- a/kernel/arch/i386/include/arch_proto.h +++ b/kernel/arch/i386/include/arch_proto.h @@ -47,10 +47,25 @@ void alignment_check(void); void machine_check(void); void simd_exception(void); +void restore_user_context_int(struct proc *); +void restore_user_context_sysenter(struct proc *); +void restore_user_context_syscall(struct proc *); + /* Software interrupt handlers, in numerical order. */ void trp(void); -void ipc_entry(void); -void kernel_call_entry(void); +void ipc_entry_softint_orig(void); +void ipc_entry_softint_um(void); +void ipc_entry_sysenter(void); +void ipc_entry_syscall_cpu0(void); +void ipc_entry_syscall_cpu1(void); +void ipc_entry_syscall_cpu2(void); +void ipc_entry_syscall_cpu3(void); +void ipc_entry_syscall_cpu4(void); +void ipc_entry_syscall_cpu5(void); +void ipc_entry_syscall_cpu6(void); +void ipc_entry_syscall_cpu7(void); +void kernel_call_entry_orig(void); +void kernel_call_entry_um(void); void level0_call(void); /* exception.c */ @@ -111,6 +126,30 @@ void x86_load_es(u32_t); void x86_load_fs(u32_t); void x86_load_gs(u32_t); +/* ipc functions in usermapped_ipc.S */ +int usermapped_send_softint(endpoint_t dest, message *m_ptr); +int usermapped_receive_softint(endpoint_t src, message *m_ptr, int *status_ptr); +int usermapped_sendrec_softint(endpoint_t src_dest, message *m_ptr); +int usermapped_sendnb_softint(endpoint_t dest, message *m_ptr); +int usermapped_notify_softint(endpoint_t dest); +int usermapped_do_kernel_call_softint(message *m_ptr); +int usermapped_senda_softint(asynmsg_t *table, size_t count); + +int usermapped_send_syscall(endpoint_t dest, message *m_ptr); +int usermapped_receive_syscall(endpoint_t src, message *m_ptr, int *status_ptr); +int usermapped_sendrec_syscall(endpoint_t src_dest, message *m_ptr); +int usermapped_sendnb_syscall(endpoint_t dest, message *m_ptr); +int usermapped_notify_syscall(endpoint_t dest); +int usermapped_do_kernel_call_syscall(message *m_ptr); +int usermapped_senda_syscall(asynmsg_t *table, size_t count); + +int usermapped_send_sysenter(endpoint_t dest, message *m_ptr); +int usermapped_receive_sysenter(endpoint_t src, message *m_ptr, int *status_ptr); +int usermapped_sendrec_sysenter(endpoint_t src_dest, message *m_ptr); +int usermapped_sendnb_sysenter(endpoint_t dest, message *m_ptr); +int usermapped_notify_sysenter(endpoint_t dest); +int usermapped_do_kernel_call_sysenter(message *m_ptr); +int usermapped_senda_sysenter(asynmsg_t *table, size_t count); void switch_k_stack(void * esp, void (* continuation)(void)); diff --git a/kernel/arch/i386/include/archconst.h b/kernel/arch/i386/include/archconst.h index 6ae9de784..88fff3c59 100644 --- a/kernel/arch/i386/include/archconst.h +++ b/kernel/arch/i386/include/archconst.h @@ -150,4 +150,26 @@ #define PG_ALLOCATEME ((phys_bytes)-1) +/* MSRs */ +#define INTEL_MSR_PERFMON_CRT0 0xc1 +#define INTEL_MSR_SYSENTER_CS 0x174 +#define INTEL_MSR_SYSENTER_ESP 0x175 +#define INTEL_MSR_SYSENTER_EIP 0x176 +#define INTEL_MSR_PERFMON_SEL0 0x186 + +#define INTEL_MSR_PERFMON_SEL0_ENABLE (1 << 22) + +#define AMD_EFER_SCE (1L << 0) /* SYSCALL/SYSRET enabled */ +#define AMD_MSR_EFER 0xC0000080 /* extended features msr */ +#define AMD_MSR_STAR 0xC0000081 /* SYSCALL params msr */ + +/* trap styles recorded on kernel entry and exit */ +#define KTS_NONE 1 /* invalid */ +#define KTS_INT_HARD 2 /* exception / hard interrupt */ +#define KTS_INT_ORIG 3 /* soft interrupt from libc */ +#define KTS_INT_UM 4 /* soft interrupt from usermapped code */ +#define KTS_FULLCONTEXT 5 /* must restore full context */ +#define KTS_SYSENTER 6 /* SYSENTER instruction (usermapped) */ +#define KTS_SYSCALL 7 /* SYSCALL instruction (usermapped) */ + #endif /* _I386_ACONST_H */ diff --git a/kernel/arch/i386/memory.c b/kernel/arch/i386/memory.c index 542084f5f..776fec03a 100644 --- a/kernel/arch/i386/memory.c +++ b/kernel/arch/i386/memory.c @@ -883,7 +883,10 @@ int arch_phys_map_reply(const int index, const vir_bytes addr) } #endif if(index == first_um_idx) { - u32_t usermapped_offset; + extern struct minix_ipcvecs minix_ipcvecs_sysenter, + minix_ipcvecs_syscall, + minix_ipcvecs_softint; + extern u32_t usermapped_offset; assert(addr > (u32_t) &usermapped_start); usermapped_offset = addr - (u32_t) &usermapped_start; memset(&minix_kerninfo, 0, sizeof(minix_kerninfo)); @@ -895,13 +898,44 @@ int arch_phys_map_reply(const int index, const vir_bytes addr) ASSIGN(kmessages); ASSIGN(loadinfo); + /* select the right set of IPC routines to map into processes */ + if(minix_feature_flags & MKF_I386_INTEL_SYSENTER) { + printf("kernel: selecting intel sysenter ipc style\n"); + minix_kerninfo.minix_ipcvecs = &minix_ipcvecs_sysenter; + } else if(minix_feature_flags & MKF_I386_AMD_SYSCALL) { + printf("kernel: selecting amd syscall ipc style\n"); + minix_kerninfo.minix_ipcvecs = &minix_ipcvecs_syscall; + } else { + printf("kernel: selecting fallback (int) ipc style\n"); + minix_kerninfo.minix_ipcvecs = &minix_ipcvecs_softint; + } + /* adjust the pointers of the functions and the struct * itself to the user-accessible mapping */ + FIXPTR(minix_kerninfo.minix_ipcvecs->send_ptr); + FIXPTR(minix_kerninfo.minix_ipcvecs->receive_ptr); + FIXPTR(minix_kerninfo.minix_ipcvecs->sendrec_ptr); + FIXPTR(minix_kerninfo.minix_ipcvecs->senda_ptr); + FIXPTR(minix_kerninfo.minix_ipcvecs->sendnb_ptr); + FIXPTR(minix_kerninfo.minix_ipcvecs->notify_ptr); + FIXPTR(minix_kerninfo.minix_ipcvecs->do_kernel_call_ptr); + FIXPTR(minix_kerninfo.minix_ipcvecs); + minix_kerninfo.kerninfo_magic = KERNINFO_MAGIC; minix_kerninfo.minix_feature_flags = minix_feature_flags; minix_kerninfo_user = (vir_bytes) FIXEDPTR(&minix_kerninfo); + /* if libc_ipc is set, disable usermapped ipc functions + * and force binaries to use in-libc fallbacks. + */ + if(env_get("libc_ipc")) { + printf("kernel: forcing in-libc fallback ipc style\n"); + minix_kerninfo.minix_ipcvecs = NULL; + } else { + minix_kerninfo.ki_flags |= MINIX_KIF_IPCVECS; + } + return OK; } diff --git a/kernel/arch/i386/mpx.S b/kernel/arch/i386/mpx.S index 44af169d0..75305e7aa 100644 --- a/kernel/arch/i386/mpx.S +++ b/kernel/arch/i386/mpx.S @@ -47,6 +47,7 @@ /* Selected 386 tss offsets. */ #define TSS3_S_SP0 4 +IMPORT(usermapped_offset) IMPORT(copr_not_available_handler) IMPORT(params_size) IMPORT(params_offset) @@ -72,7 +73,7 @@ IMPORT(multiboot_init) #define hwint_master(irq) \ TEST_INT_IN_KERNEL(4, 0f) ;\ \ - SAVE_PROCESS_CTX(0) ;\ + SAVE_PROCESS_CTX(0, KTS_INT_HARD) ;\ push %ebp ;\ movl $0, %ebp /* for stack trace */ ;\ call _C_LABEL(context_stop) ;\ @@ -132,7 +133,7 @@ ENTRY(hwint07) #define hwint_slave(irq) \ TEST_INT_IN_KERNEL(4, 0f) ;\ \ - SAVE_PROCESS_CTX(0) ;\ + SAVE_PROCESS_CTX(0, KTS_INT_HARD) ;\ push %ebp ;\ movl $0, %ebp /* for stack trace */ ;\ call _C_LABEL(context_stop) ;\ @@ -187,13 +188,83 @@ ENTRY(hwint15) /* Interrupt routine for irq 15 */ hwint_slave(15) +/* differences with sysenter: + * - we have to find our own per-cpu stack (i.e. post-SYSCALL + * %esp is not configured) + * - we have to save the post-SYSRET %eip, provided by the cpu + * in %ecx + * - the system call parameters are passed in %ecx, so we userland + * code that executes SYSCALL copies %ecx to %edx. So the roles + * of %ecx and %edx are reversed + * - we can use %esi as a scratch register + */ +#define ipc_entry_syscall_percpu(cpu) ;\ +ENTRY(ipc_entry_syscall_cpu ## cpu) ;\ + xchg %ecx, %edx ;\ + mov k_percpu_stacks+4*cpu, %esi ;\ + mov (%esi), %ebp ;\ + movl $KTS_SYSCALL, P_KERN_TRAP_STYLE(%ebp) ;\ + xchg %esp, %esi ;\ + jmp syscall_sysenter_common + +ipc_entry_syscall_percpu(0) +ipc_entry_syscall_percpu(1) +ipc_entry_syscall_percpu(2) +ipc_entry_syscall_percpu(3) +ipc_entry_syscall_percpu(4) +ipc_entry_syscall_percpu(5) +ipc_entry_syscall_percpu(6) +ipc_entry_syscall_percpu(7) + +ENTRY(ipc_entry_sysenter) + /* SYSENTER simply sets kernel segments, EIP to here, and ESP + * to tss->sp0 (through MSR). so no automatic context saving is done. + * interrupts are disabled. + * + * register usage: + * edi: call type (IPCVEC, KERVEC) + * ebx, eax, ecx: syscall params, set by userland + * esi, edx: esp, eip to restore, set by userland + * + * no state is automatically saved; userland does all of that. + */ + mov (%esp), %ebp /* get proc saved by arch_finish_switch_to_user */ + + /* inform kernel we entered by sysenter and should + * therefore exit through restore_user_context_sysenter + */ + movl $KTS_SYSENTER, P_KERN_TRAP_STYLE(%ebp) + add usermapped_offset, %edx /* compensate for mapping difference */ + +syscall_sysenter_common: + mov %esi, SPREG(%ebp) /* esi is return esp */ + mov %edx, PCREG(%ebp) /* edx is return eip */ + + /* check for call type; do_ipc? */ + cmp $IPCVEC_UM, %edi + jz ipc_entry_common + + /* check for kernel trap */ + cmp $KERVEC_UM, %edi + jz kernel_call_entry_common + + /* unrecognized call number; restore user with error */ + movl $-1, AXREG(%ebp) + push %ebp + call restore_user_context /* restore_user_context(%ebp); */ + /* * IPC is only from a process to kernel */ -ENTRY(ipc_entry) +ENTRY(ipc_entry_softint_orig) + SAVE_PROCESS_CTX(0, KTS_INT_ORIG) + jmp ipc_entry_common - SAVE_PROCESS_CTX(0) +ENTRY(ipc_entry_softint_um) + SAVE_PROCESS_CTX(0, KTS_INT_UM) + jmp ipc_entry_common +ENTRY(ipc_entry_common) /* save the pointer to the current process */ push %ebp @@ -226,10 +297,15 @@ ENTRY(ipc_entry) /* * kernel call is only from a process to kernel */ -ENTRY(kernel_call_entry) +ENTRY(kernel_call_entry_orig) + SAVE_PROCESS_CTX(0, KTS_INT_ORIG) + jmp kernel_call_entry_common - SAVE_PROCESS_CTX(0) +ENTRY(kernel_call_entry_um) + SAVE_PROCESS_CTX(0, KTS_INT_UM) + jmp kernel_call_entry_common +ENTRY(kernel_call_entry_common) /* save the pointer to the current process */ push %ebp @@ -270,7 +346,7 @@ exception_entry: TEST_INT_IN_KERNEL(12, exception_entry_nested) exception_entry_from_user: - SAVE_PROCESS_CTX(8) + SAVE_PROCESS_CTX(8, KTS_INT_HARD) /* stop user process cycles */ push %ebp @@ -306,10 +382,37 @@ exception_entry_nested: /* resume execution at the point of exception */ iret -/*===========================================================================*/ -/* restart */ -/*===========================================================================*/ -ENTRY(restore_user_context) +ENTRY(restore_user_context_sysenter) + /* return to userspace using sysexit. + * most of the context saving the userspace process is + * responsible for, we just have to take care of the right EIP + * and ESP restoring here to resume execution, and set EAX and + * EBX to the saved status values. + */ + mov 4(%esp), %ebp /* retrieve proc ptr arg */ + movw $USER_DS_SELECTOR, %ax + movw %ax, %ds + mov PCREG(%ebp), %edx /* sysexit restores EIP using EDX */ + mov SPREG(%ebp), %ecx /* sysexit restores ESP using ECX */ + mov AXREG(%ebp), %eax /* trap return value */ + mov BXREG(%ebp), %ebx /* secondary return value */ + sti /* enable interrupts */ + sysexit /* jump to EIP in user */ + +ENTRY(restore_user_context_syscall) + /* return to userspace using sysret. + * the procedure is very similar to sysexit; it requires + * manual %esp restoring, new EIP in ECX, does not require + * enabling interrupts, and of course sysret instead of sysexit. + */ + mov 4(%esp), %ebp /* retrieve proc ptr arg */ + mov PCREG(%ebp), %ecx /* sysret restores EIP using ECX */ + mov SPREG(%ebp), %esp /* restore ESP directly */ + mov AXREG(%ebp), %eax /* trap return value */ + mov BXREG(%ebp), %ebx /* secondary return value */ + sysret /* jump to EIP in user */ + +ENTRY(restore_user_context_int) mov 4(%esp), %ebp /* will assume P_STACKBASE == 0 */ /* reconstruct the stack for iret */ @@ -415,7 +518,7 @@ LABEL(inval_opcode) LABEL(copr_not_available) TEST_INT_IN_KERNEL(4, copr_not_available_in_kernel) cld /* set direction flag to a known value */ - SAVE_PROCESS_CTX(0) + SAVE_PROCESS_CTX(0, KTS_INT_HARD) /* stop user process cycles */ push %ebp mov $0, %ebp @@ -505,6 +608,7 @@ ENTRY(startup_ap_32) .data .short 0x526F /* this must be the first data entry (magic #) */ + .bss k_initial_stack: .space K_STACK_SIZE diff --git a/kernel/arch/i386/procoffsets.cf b/kernel/arch/i386/procoffsets.cf index d39772157..e460650dc 100644 --- a/kernel/arch/i386/procoffsets.cf +++ b/kernel/arch/i386/procoffsets.cf @@ -16,3 +16,5 @@ member CSREG p_reg.cs member PSWREG p_reg.psw member SPREG p_reg.sp member P_CR3 p_seg.p_cr3 +member P_KERN_TRAP_STYLE p_seg.p_kern_trap_style +member P_MAGIC p_magic diff --git a/kernel/arch/i386/protect.c b/kernel/arch/i386/protect.c index 23f566107..8dc050220 100644 --- a/kernel/arch/i386/protect.c +++ b/kernel/arch/i386/protect.c @@ -5,6 +5,7 @@ #include #include +#include #include #include "kernel/kernel.h" @@ -25,6 +26,8 @@ struct segdesc_s gdt[GDT_SIZE] __aligned(DESC_SIZE); struct gatedesc_s idt[IDT_SIZE] __aligned(DESC_SIZE); struct tss_s tss[CONFIG_MAX_CPUS]; +u32_t k_percpu_stacks[CONFIG_MAX_CPUS]; + int prot_init_done = 0; phys_bytes vir2phys(void *vir) @@ -141,8 +144,10 @@ static struct gate_table_s gate_table_exceptions[] = { { alignment_check, ALIGNMENT_CHECK_VECTOR, INTR_PRIVILEGE }, { machine_check, MACHINE_CHECK_VECTOR, INTR_PRIVILEGE }, { simd_exception, SIMD_EXCEPTION_VECTOR, INTR_PRIVILEGE }, - { ipc_entry, IPC_VECTOR, USER_PRIVILEGE }, - { kernel_call_entry, KERN_CALL_VECTOR, USER_PRIVILEGE }, + { ipc_entry_softint_orig, IPC_VECTOR_ORIG, USER_PRIVILEGE }, + { kernel_call_entry_orig, KERN_CALL_VECTOR_ORIG, USER_PRIVILEGE }, + { ipc_entry_softint_um, IPC_VECTOR_UM, USER_PRIVILEGE }, + { kernel_call_entry_um, KERN_CALL_VECTOR_UM, USER_PRIVILEGE }, { NULL, 0, 0} }; @@ -168,13 +173,45 @@ int tss_init(unsigned cpu, void * kernel_stack) * make space for process pointer and cpu id and point to the first * usable word */ - t->sp0 = ((unsigned) kernel_stack) - X86_STACK_TOP_RESERVED; + k_percpu_stacks[cpu] = t->sp0 = ((unsigned) kernel_stack) - X86_STACK_TOP_RESERVED; /* * set the cpu id at the top of the stack so we know on which cpu is * this stak in use when we trap to kernel */ *((reg_t *)(t->sp0 + 1 * sizeof(reg_t))) = cpu; + /* Set up Intel SYSENTER support if available. */ + if(minix_feature_flags & MKF_I386_INTEL_SYSENTER) { + ia32_msr_write(INTEL_MSR_SYSENTER_CS, 0, KERN_CS_SELECTOR); + ia32_msr_write(INTEL_MSR_SYSENTER_ESP, 0, t->sp0); + ia32_msr_write(INTEL_MSR_SYSENTER_EIP, 0, (u32_t) ipc_entry_sysenter); + } + + /* Set up AMD SYSCALL support if available. */ + if(minix_feature_flags & MKF_I386_AMD_SYSCALL) { + u32_t msr_lo, msr_hi; + + /* set SYSCALL ENABLE bit in EFER MSR */ + ia32_msr_read(AMD_MSR_EFER, &msr_hi, &msr_lo); + msr_lo |= AMD_EFER_SCE; + ia32_msr_write(AMD_MSR_EFER, msr_hi, msr_lo); + + /* set STAR register value */ +#define set_star_cpu(forcpu) if(cpu == forcpu) { \ + ia32_msr_write(AMD_MSR_STAR, \ + ((u32_t)USER_CS_SELECTOR << 16) | (u32_t)KERN_CS_SELECTOR, \ + (u32_t) ipc_entry_syscall_cpu ## forcpu); } + set_star_cpu(0); + set_star_cpu(1); + set_star_cpu(2); + set_star_cpu(3); + set_star_cpu(4); + set_star_cpu(5); + set_star_cpu(6); + set_star_cpu(7); + assert(CONFIG_MAX_CPUS <= 8); + } + return SEG_SELECTOR(index); } @@ -285,6 +322,11 @@ void prot_init() { extern char k_boot_stktop; + if(_cpufeature(_CPUF_I386_SYSENTER)) + minix_feature_flags |= MKF_I386_INTEL_SYSENTER; + if(_cpufeature(_CPUF_I386_SYSCALL)) + minix_feature_flags |= MKF_I386_AMD_SYSCALL; + memset(gdt, 0, sizeof(gdt)); memset(idt, 0, sizeof(idt)); diff --git a/kernel/arch/i386/sconst.h b/kernel/arch/i386/sconst.h index 1262f6cd6..6138e4402 100644 --- a/kernel/arch/i386/sconst.h +++ b/kernel/arch/i386/sconst.h @@ -73,7 +73,7 @@ * displ is the stack displacement. In case of an exception, there are two extra * value on the stack - error code and the exception number */ -#define SAVE_PROCESS_CTX(displ) \ +#define SAVE_PROCESS_CTX(displ, trapcode) \ \ cld /* set the direction flag to a known state */ ;\ \ @@ -82,6 +82,7 @@ movl (CURR_PROC_PTR + 4 + displ)(%esp), %ebp ;\ \ SAVE_GP_REGS(%ebp) ;\ + movl $trapcode, P_KERN_TRAP_STYLE(%ebp) ;\ pop %esi /* get the orig %ebp and save it */ ;\ mov %esi, BPREG(%ebp) ;\ \ diff --git a/kernel/arch/i386/usermapped_data_arch.c b/kernel/arch/i386/usermapped_data_arch.c new file mode 100644 index 000000000..837836a21 --- /dev/null +++ b/kernel/arch/i386/usermapped_data_arch.c @@ -0,0 +1,33 @@ +#include "kernel.h" +#include "arch_proto.h" + +struct minix_ipcvecs minix_ipcvecs_softint = { + .send_ptr = usermapped_send_softint, + .receive_ptr = usermapped_receive_softint, + .sendrec_ptr = usermapped_sendrec_softint, + .sendnb_ptr = usermapped_sendnb_softint, + .notify_ptr = usermapped_notify_softint, + .do_kernel_call_ptr = usermapped_do_kernel_call_softint, + .senda_ptr = usermapped_senda_softint +}; + +struct minix_ipcvecs minix_ipcvecs_sysenter = { + .send_ptr = usermapped_send_sysenter, + .receive_ptr = usermapped_receive_sysenter, + .sendrec_ptr = usermapped_sendrec_sysenter, + .sendnb_ptr = usermapped_sendnb_sysenter, + .notify_ptr = usermapped_notify_sysenter, + .do_kernel_call_ptr = usermapped_do_kernel_call_sysenter, + .senda_ptr = usermapped_senda_sysenter +}; + +struct minix_ipcvecs minix_ipcvecs_syscall = { + .send_ptr = usermapped_send_syscall, + .receive_ptr = usermapped_receive_syscall, + .sendrec_ptr = usermapped_sendrec_syscall, + .sendnb_ptr = usermapped_sendnb_syscall, + .notify_ptr = usermapped_notify_syscall, + .do_kernel_call_ptr = usermapped_do_kernel_call_syscall, + .senda_ptr = usermapped_senda_syscall +}; + diff --git a/kernel/arch/i386/usermapped_glo_ipc.S b/kernel/arch/i386/usermapped_glo_ipc.S new file mode 100644 index 000000000..3df59df36 --- /dev/null +++ b/kernel/arch/i386/usermapped_glo_ipc.S @@ -0,0 +1,94 @@ +#include +#include + +/**========================================================================* */ +/* IPC assembly routines * */ +/**========================================================================* */ +/* all message passing routines save ebx, but destroy eax and ecx. */ + +#define IPCFUNC(name,SETARGS,VEC,POSTTRAP) \ +ENTRY(usermapped_ ## name ## _softint) ;\ + push %ebp ;\ + movl %esp, %ebp ;\ + push %ebx ;\ + SETARGS ;\ + int $VEC /* trap to the kernel */ ;\ + mov %ebx, %ecx /* save %ebx */ ;\ + POSTTRAP ;\ + pop %ebx ;\ + pop %ebp ;\ + ret ;\ +ENTRY(usermapped_ ## name ## _sysenter) ;\ + push %ebp ;\ + movl %esp, %ebp ;\ + push %ebp ;\ + push %edx ;\ + push %ebx ;\ + push %esi ;\ + push %edi ;\ + movl %esp, %esi /* kernel uses %esi for restored %esp */;\ + movl $0f, %edx /* kernel uses %edx for restored %eip */;\ + movl $VEC, %edi /* %edi to distinguish ipc/kerncall */ ;\ + SETARGS /* call-specific register setup */ ;\ + sysenter /* disappear into kernel */ ;\ +0: ;\ + mov %ebx, %ecx /* return w. state mangled; save %ebx */;\ + pop %edi ;\ + pop %esi ;\ + pop %ebx ;\ + pop %edx ;\ + pop %ebp ;\ + POSTTRAP ;\ + pop %ebp ;\ + ret ;\ +ENTRY(usermapped_ ## name ## _syscall) ;\ + push %ebp ;\ + movl %esp, %ebp ;\ + push %ebp ;\ + push %edx ;\ + push %ebx ;\ + push %esi ;\ + push %edi ;\ + movl $VEC, %edi /* %edi to distinguish ipc/kerncall */ ;\ + SETARGS /* call-specific register setup */ ;\ + movl %ecx, %edx /* %ecx is clobbered by SYSCALL */ ;\ + syscall /* disappear into kernel */ ;\ + mov %ebx, %ecx /* return w. state mangled; save %ebx */;\ + pop %edi ;\ + pop %esi ;\ + pop %ebx ;\ + pop %edx ;\ + pop %ebp ;\ + POSTTRAP ;\ + pop %ebp ;\ + ret + +#define IPCARGS(opcode) \ + movl 8(%ebp), %eax /* eax = dest-src */ ;\ + movl 12(%ebp), %ebx /* ebx = message pointer */ ;\ + movl $opcode, %ecx ;\ + +#define SENDA_ARGS \ + movl 12(%ebp), %eax /* eax = count */ ;\ + movl 8(%ebp), %ebx /* ebx = table */ ;\ + movl $SENDA, %ecx ;\ + +#define GETSTATUS \ + push %eax ;\ + movl 16(%ebp), %eax /* ecx = saved %ebx */ ;\ + movl %ecx, (%eax) ;\ + pop %eax + +#define KERNARGS mov 8(%ebp), %eax + +IPCFUNC(send,IPCARGS(SEND),IPCVEC_UM,) +IPCFUNC(receive,IPCARGS(RECEIVE),IPCVEC_UM,GETSTATUS) +IPCFUNC(sendrec,IPCARGS(SENDREC),IPCVEC_UM,) +IPCFUNC(sendnb,IPCARGS(SENDNB),IPCVEC_UM,) +IPCFUNC(notify,IPCARGS(NOTIFY),IPCVEC_UM,) +IPCFUNC(senda,SENDA_ARGS,IPCVEC_UM,) +IPCFUNC(do_kernel_call,KERNARGS,KERVEC_UM,) + +.data +LABEL(usermapped_offset) +.space 4 diff --git a/kernel/main.c b/kernel/main.c index 9dff7251c..652f68f5b 100644 --- a/kernel/main.c +++ b/kernel/main.c @@ -240,8 +240,10 @@ void kmain(kinfo_t *local_cbi) get_cpulocal_var(proc_ptr) = rp; /* Process isn't scheduled until VM has set up a pagetable for it. */ - if(rp->p_nr != VM_PROC_NR && rp->p_nr >= 0) + if(rp->p_nr != VM_PROC_NR && rp->p_nr >= 0) { rp->p_rts_flags |= RTS_VMINHIBIT; + rp->p_rts_flags |= RTS_BOOTINHIBIT; + } rp->p_rts_flags |= RTS_PROC_STOP; rp->p_rts_flags &= ~RTS_SLOT_FREE; diff --git a/kernel/proc.h b/kernel/proc.h index 9b792b931..0a32d022d 100644 --- a/kernel/proc.h +++ b/kernel/proc.h @@ -159,6 +159,7 @@ struct proc { pick a new one. Process was dequeued and should be enqueued at the end of some run queue again */ +#define RTS_BOOTINHIBIT 0x10000 /* not ready until VM has made it */ /* A process is runnable iff p_rts_flags == 0. */ #define rts_f_is_runnable(flg) ((flg) == 0) diff --git a/kernel/proto.h b/kernel/proto.h index 28dbba4a6..aa9702480 100644 --- a/kernel/proto.h +++ b/kernel/proto.h @@ -59,6 +59,7 @@ void enqueue(struct proc *rp); void dequeue(struct proc *rp); void switch_to_user(void); void arch_proc_reset(struct proc *rp); +void arch_proc_setcontext(struct proc *rp, struct stackframe_s *state, int user); struct proc * arch_finish_switch_to_user(void); struct proc *endpoint_lookup(endpoint_t ep); #if DEBUG_ENABLE_IPC_WARNINGS diff --git a/kernel/system/do_sigreturn.c b/kernel/system/do_sigreturn.c index 090557fd0..f96a727be 100644 --- a/kernel/system/do_sigreturn.c +++ b/kernel/system/do_sigreturn.c @@ -49,7 +49,7 @@ int do_sigreturn(struct proc * caller, message * m_ptr) #endif /* Restore the registers. */ - memcpy(&rp->p_reg, &sc.sc_regs, sizeof(sigregs)); + arch_proc_setcontext(rp, &sc.sc_regs, 1); #if defined(__i386__) if(sc.sc_flags & MF_FPU_INITIALIZED) { @@ -60,8 +60,6 @@ int do_sigreturn(struct proc * caller, message * m_ptr) } #endif - rp->p_misc_flags |= MF_CONTEXT_SET; - return(OK); } #endif /* USE_SIGRETURN */ diff --git a/kernel/system/do_vmctl.c b/kernel/system/do_vmctl.c index f3a9b980e..7ecc5e30b 100644 --- a/kernel/system/do_vmctl.c +++ b/kernel/system/do_vmctl.c @@ -174,6 +174,9 @@ int do_vmctl(struct proc * caller, message * m_ptr) /* VM says: forget about old mappings we have cached. */ mem_clear_mapcache(); return OK; + case VMCTL_BOOTINHIBIT_CLEAR: + RTS_UNSET(p, RTS_BOOTINHIBIT); + return OK; } /* Try architecture-specific vmctls. */ diff --git a/lib/libc/arch/i386/sys-minix/_ipc.S b/lib/libc/arch/i386/sys-minix/_ipc.S index 4e828d749..bcb70a20c 100644 --- a/lib/libc/arch/i386/sys-minix/_ipc.S +++ b/lib/libc/arch/i386/sys-minix/_ipc.S @@ -9,40 +9,40 @@ /* IPC assembly routines * */ /**========================================================================* */ /* all message passing routines save ebx, but destroy eax and ecx. */ -ENTRY(_send) +ENTRY(_send_orig) push %ebp movl %esp, %ebp push %ebx movl SRC_DST(%ebp), %eax /* eax = dest-src */ movl MESSAGE(%ebp), %ebx /* ebx = message pointer */ movl $SEND, %ecx /* _send(dest, ptr) */ - int $IPCVEC /* trap to the kernel */ + int $IPCVEC_ORIG /* trap to the kernel */ pop %ebx pop %ebp ret -ENTRY(_receive) +ENTRY(_receive_orig) push %ebp movl %esp, %ebp push %ebx movl SRC_DST(%ebp), %eax /* eax = dest-src */ movl MESSAGE(%ebp), %ebx /* ebx = message pointer */ movl $RECEIVE, %ecx /* _receive(src, ptr) */ - int $IPCVEC /* trap to the kernel */ + int $IPCVEC_ORIG /* trap to the kernel */ movl STATUS(%ebp), %ecx /* ecx = status pointer */ movl %ebx, (%ecx) pop %ebx pop %ebp ret -ENTRY(_sendrec) +ENTRY(_sendrec_orig) push %ebp movl %esp, %ebp push %ebx movl SRC_DST(%ebp), %eax /* eax = dest-src */ movl MESSAGE(%ebp), %ebx /* ebx = message pointer */ movl $SENDREC, %ecx /* _sendrec(srcdest, ptr) */ - int $IPCVEC /* trap to the kernel */ + int $IPCVEC_ORIG /* trap to the kernel */ pop %ebx pop %ebp ret @@ -54,38 +54,38 @@ ENTRY(_minix_kernel_info_struct) movl $0, %eax movl $0, %ebx movl $MINIX_KERNINFO, %ecx - int $IPCVEC /* trap to the kernel */ + int $IPCVEC_ORIG /* trap to the kernel */ movl 8(%ebp), %ecx /* ecx = return struct ptr */ movl %ebx, (%ecx) pop %ebx pop %ebp ret -ENTRY(_notify) +ENTRY(_notify_orig) push %ebp movl %esp, %ebp push %ebx movl SRC_DST(%ebp), %eax /* eax = destination */ movl $NOTIFY, %ecx /* _notify(srcdst) */ - int $IPCVEC /* trap to the kernel */ + int $IPCVEC_ORIG /* trap to the kernel */ pop %ebx pop %ebp ret -ENTRY(_sendnb) +ENTRY(_sendnb_orig) push %ebp movl %esp, %ebp push %ebx movl SRC_DST(%ebp), %eax /* eax = dest-src */ movl MESSAGE(%ebp), %ebx /* ebx = message pointer */ movl $SENDNB, %ecx /* _sendnb(dest, ptr) */ - int $IPCVEC /* trap to the kernel */ + int $IPCVEC_ORIG /* trap to the kernel */ pop %ebx pop %ebp ret -ENTRY(_do_kernel_call) +ENTRY(_do_kernel_call_orig) /* pass the message pointer to kernel in the %eax register */ movl 4(%esp), %eax - int $KERVEC + int $KERVEC_ORIG ret diff --git a/lib/libc/arch/i386/sys-minix/_senda.S b/lib/libc/arch/i386/sys-minix/_senda.S index 9a2cb6d81..256d85180 100644 --- a/lib/libc/arch/i386/sys-minix/_senda.S +++ b/lib/libc/arch/i386/sys-minix/_senda.S @@ -4,7 +4,7 @@ MSGTAB = 8 /* message table */ TABCOUNT = 12 /* number of entries in message table */ -ENTRY(_senda) +ENTRY(_senda_orig) push %ebp movl %esp, %ebp push %ebx diff --git a/lib/libc/sys-minix/init.c b/lib/libc/sys-minix/init.c index 7b21078ca..38974f39c 100644 --- a/lib/libc/sys-minix/init.c +++ b/lib/libc/sys-minix/init.c @@ -2,15 +2,29 @@ #include #include +/* Minix kernel info, IPC functions pointers */ struct minix_kerninfo *_minix_kerninfo = NULL; void __minix_init(void) __attribute__((__constructor__, __used__)); +struct minix_ipcvecs _minix_ipcvecs = { + .sendrec_ptr = _sendrec_orig, + .send_ptr = _send_orig, + .notify_ptr = _notify_orig, + .senda_ptr = _senda_orig, + .sendnb_ptr = _sendnb_orig, + .receive_ptr = _receive_orig, + .do_kernel_call_ptr = _do_kernel_call_orig, +}; + void __minix_init(void) { if((_minix_kernel_info_struct(&_minix_kerninfo)) != 0 || _minix_kerninfo->kerninfo_magic != KERNINFO_MAGIC) { _minix_kerninfo = NULL; - } + } else if((_minix_kerninfo->ki_flags & MINIX_KIF_IPCVECS) && + _minix_kerninfo->minix_ipcvecs) { + _minix_ipcvecs = *_minix_kerninfo->minix_ipcvecs; + } } diff --git a/lib/libc/sys-minix/syscall.c b/lib/libc/sys-minix/syscall.c index 522810835..c1b53e31c 100644 --- a/lib/libc/sys-minix/syscall.c +++ b/lib/libc/sys-minix/syscall.c @@ -11,7 +11,7 @@ int _syscall(endpoint_t who, int syscallnr, message *msgptr) int status; msgptr->m_type = syscallnr; - status = _sendrec(who, msgptr); + status = sendrec(who, msgptr); if (status != 0) { /* 'sendrec' itself failed. */ /* XXX - strerror doesn't know all the codes */ diff --git a/lib/libexec/exec_general.c b/lib/libexec/exec_general.c index 20d3ca9ee..2ac942df9 100644 --- a/lib/libexec/exec_general.c +++ b/lib/libexec/exec_general.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/lib/libminlib/i386/_cpufeature.c b/lib/libminlib/i386/_cpufeature.c index 52d8db2dc..1a7c011d5 100644 --- a/lib/libminlib/i386/_cpufeature.c +++ b/lib/libminlib/i386/_cpufeature.c @@ -3,10 +3,14 @@ #include #include #include +#include int _cpufeature(int cpufeature) { u32_t eax, ebx, ecx, edx; + u32_t ef_eax = 0, ef_ebx = 0, ef_ecx = 0, ef_edx = 0; + unsigned int family, model, stepping; + int is_intel = 0, is_amd = 0; eax = ebx = ecx = edx = 0; @@ -14,8 +18,34 @@ int _cpufeature(int cpufeature) eax = 0; _cpuid(&eax, &ebx, &ecx, &edx); if(eax > 0) { + char vendor[12]; + memcpy(vendor, &ebx, sizeof(ebx)); + memcpy(vendor+4, &edx, sizeof(edx)); + memcpy(vendor+8, &ecx, sizeof(ecx)); + if(!strncmp(vendor, "GenuineIntel", sizeof(vendor))) + is_intel = 1; + if(!strncmp(vendor, "AuthenticAMD", sizeof(vendor))) + is_amd = 1; eax = 1; _cpuid(&eax, &ebx, &ecx, &edx); + } else return 0; + + stepping = eax & 0xf; + model = (eax >> 4) & 0xf; + + if(model == 0xf || model == 0x6) { + model += ((eax >> 16) & 0xf) << 4; + } + + family = (eax >> 8) & 0xf; + + if(family == 0xf) { + family += (eax >> 20) & 0xff; + } + + if(is_amd) { + ef_eax = 0x80000001; + _cpuid(&ef_eax, &ef_ebx, &ef_ecx, &ef_edx); } switch(cpufeature) { @@ -53,6 +83,15 @@ int _cpufeature(int cpufeature) return edx & CPUID1_EDX_HTT; case _CPUF_I386_HTT_MAX_NUM: return (ebx >> 16) & 0xff; + case _CPUF_I386_SYSENTER: + if(!is_intel) return 0; + if(!(edx & CPUID1_EDX_SYSENTER)) return 0; + if(family == 6 && model < 3 && stepping < 3) return 0; + return 1; + case _CPUF_I386_SYSCALL: + if(!is_amd) return 0; + if(!(ef_edx & CPUID_EF_EDX_SYSENTER)) return 0; + return 1; } return 0; diff --git a/lib/libsys/kernel_call.c b/lib/libsys/kernel_call.c index 264547325..856e45aa6 100644 --- a/lib/libsys/kernel_call.c +++ b/lib/libsys/kernel_call.c @@ -4,6 +4,6 @@ int _kernel_call(int syscallnr, message *msgptr) { msgptr->m_type = syscallnr; - _do_kernel_call(msgptr); + do_kernel_call(msgptr); return(msgptr->m_type); } diff --git a/lib/libsys/taskcall.c b/lib/libsys/taskcall.c index f6778699a..b547471b7 100644 --- a/lib/libsys/taskcall.c +++ b/lib/libsys/taskcall.c @@ -14,7 +14,7 @@ register message *msgptr; int status; msgptr->m_type = syscallnr; - status = _sendrec(who, msgptr); + status = sendrec(who, msgptr); if (status != 0) return(status); return(msgptr->m_type); } diff --git a/servers/vm/main.c b/servers/vm/main.c index 25a0b862d..dbcae012e 100644 --- a/servers/vm/main.c +++ b/servers/vm/main.c @@ -294,6 +294,10 @@ void exec_bootproc(struct vmproc *vmp, struct boot_image *ip) if(sys_exec(vmp->vm_endpoint, (char *) execi->stack_high - 12, (char *) ip->proc_name, execi->pc) != OK) panic("vm: boot process exec of %d failed\n", vmp->vm_endpoint); + + /* make it runnable */ + if(sys_vmctl(vmp->vm_endpoint, VMCTL_BOOTINHIBIT_CLEAR, 0) != OK) + panic("VMCTL_BOOTINHIBIT_CLEAR failed"); } void init_vm(void) @@ -301,6 +305,7 @@ void init_vm(void) int s, i; static struct memory mem_chunks[NR_MEMS]; static struct boot_image *ip; + extern void __minix_init(void); #if SANITYCHECKS incheck = nocheck = 0; @@ -414,6 +419,11 @@ void init_vm(void) /* Initialize the structures for queryexit */ init_query_exit(); + + /* Acquire kernel ipc vectors that weren't available + * before VM had determined kernel mappings + */ + __minix_init(); } /*===========================================================================* -- 2.44.0