]> Zhao Yanbai Git Server - minix.git/commitdiff
SYSENTER/SYSCALL support
authorBen Gras <ben@minix3.org>
Sun, 10 Jun 2012 17:50:17 +0000 (17:50 +0000)
committerBen Gras <ben@minix3.org>
Mon, 24 Sep 2012 13:53:43 +0000 (15:53 +0200)
. add cpufeature detection of both
. use it for both ipc and kernelcall traps, using a register
  for call number
. SYSENTER/SYSCALL does not save any context, therefore userland
  has to save it
. to accomodate multiple kernel entry/exit types, the entry
  type is recorded in the process struct. hitherto all types
  were interrupt (soft int, exception, hard int); now SYSENTER/SYSCALL
  is new, with the difference that context is not fully restored
  from proc struct when running the process again. this can't be
  done as some information is missing.
. complication: cases in which the kernel has to fully change
  process context (i.e. sigreturn). in that case the exit type
  is changed from SYSENTER/SYSEXIT to soft-int (i.e. iret) and
  context is fully restored from the proc struct. this does mean
  the PC and SP must change, as the sysenter/sysexit userland code
  will otherwise try to restore its own context. this is true in the
  sigreturn case.
. override all usage by setting libc_ipc=1

38 files changed:
include/arch/i386/include/archtypes.h
include/arch/i386/include/asm.h
include/arch/i386/include/interrupt.h
include/arch/i386/include/vm.h
include/minix/com.h
include/minix/const.h
include/minix/cpufeature.h
include/minix/ipc.h
include/minix/type.h
kernel/arch/i386/Makefile.inc
kernel/arch/i386/apic.c
kernel/arch/i386/apic_asm.S
kernel/arch/i386/arch_system.c
kernel/arch/i386/arch_watchdog.c
kernel/arch/i386/exception.c
kernel/arch/i386/include/arch_proto.h
kernel/arch/i386/include/archconst.h
kernel/arch/i386/memory.c
kernel/arch/i386/mpx.S
kernel/arch/i386/procoffsets.cf
kernel/arch/i386/protect.c
kernel/arch/i386/sconst.h
kernel/arch/i386/usermapped_data_arch.c [new file with mode: 0644]
kernel/arch/i386/usermapped_glo_ipc.S [new file with mode: 0644]
kernel/main.c
kernel/proc.h
kernel/proto.h
kernel/system/do_sigreturn.c
kernel/system/do_vmctl.c
lib/libc/arch/i386/sys-minix/_ipc.S
lib/libc/arch/i386/sys-minix/_senda.S
lib/libc/sys-minix/init.c
lib/libc/sys-minix/syscall.c
lib/libexec/exec_general.c
lib/libminlib/i386/_cpufeature.c
lib/libsys/kernel_call.c
lib/libsys/taskcall.c
servers/vm/main.c

index cc3a6b478d735a120f6d41bded02512f517a2b82..22a2ed2bc9c275a2676d4eb4cd458e9b8b8c61b2 100644 (file)
@@ -33,6 +33,7 @@ typedef struct segframe {
        reg_t   p_cr3;          /* page table root */
        u32_t   *p_cr3_v;
        char    *fpu_state;
+       int     p_kern_trap_style;
 } segframe_t;
 
 struct cpu_info {
index cb15ecf552ac2a9007194b7608454783344d7ee0..1b4c91de2c0904bb331e84e15b84f0fc450b7fda 100644 (file)
 #ifdef __minix
 #define IMPORT(sym)               \
         .extern _C_LABEL(sym)
+
+#define KERVEC_ORIG 32     /* syscall trap to kernel */
+#define IPCVEC_ORIG 33     /* ipc trap to kernel  */
+
+#define KERVEC_UM 34     /* syscall trap to kernel, user-mapped code */
+#define IPCVEC_UM 35     /* ipc trap to kernel, user-mapped code  */
+
 #endif
 
 #endif /* !_I386_ASM_H_ */
index f7442f89768e0a9780cfe01096d867a4972a3bfd..b725875c63d98eba100edd75c2d233bdba369235 100644 (file)
 #define OVERFLOW_VECTOR    4   /* from INTO */
 
 /* Fixed system call vector. */
-#define KERN_CALL_VECTOR  32   /* system calls are made with int SYSVEC */
-#define IPC_VECTOR        33   /* interrupt vector for ipc */
+#define KERN_CALL_VECTOR_ORIG  32 /* system calls are made with int SYSVEC */
+#define IPC_VECTOR_ORIG        33 /* interrupt vector for ipc */
+#define KERN_CALL_VECTOR_UM    34 /* user-mapped equivalent */
+#define IPC_VECTOR_UM          35 /* user-mapped equivalent */
 
 /* Hardware interrupt numbers. */
 #ifndef USE_APIC
index 71104e0a26ca6e46b9f3760f7353f3424ce155db..5c0c3c109d60556bab7cfa1eb23d879db86b42c3 100644 (file)
@@ -73,6 +73,7 @@ i386/vm.h
 /* CPUID flags */
 #define CPUID1_EDX_FPU         (1L)            /* FPU presence */
 #define CPUID1_EDX_PSE                 (1L <<  3)      /* Page Size Extension */
+#define CPUID1_EDX_SYSENTER    (1L << 11)      /* Intel SYSENTER */
 #define CPUID1_EDX_PGE                 (1L << 13)      /* Page Global (bit) Enable */
 #define CPUID1_EDX_APIC_ON_CHIP (1L << 9)      /* APIC is present on the chip */
 #define CPUID1_EDX_TSC         (1L << 4)       /* Timestamp counter present */
@@ -85,6 +86,8 @@ i386/vm.h
 #define CPUID1_ECX_SSE4_1      (1L << 19)
 #define CPUID1_ECX_SSE4_2      (1L << 20)
 
+#define CPUID_EF_EDX_SYSENTER  (1L << 11)      /* Intel SYSENTER */
+
 #ifndef __ASSEMBLY__
 
 #include <minix/type.h>
index 525e4bbc230eec8b2e6d748205661494a823ed5e..7ff70e30c751913b4cbc260b663566246521026d 100644 (file)
 #define VMCTL_VMINHIBIT_SET    30
 #define VMCTL_VMINHIBIT_CLEAR  31
 #define VMCTL_CLEARMAPCACHE    32
+#define VMCTL_BOOTINHIBIT_CLEAR        33
 
 /* Codes and field names for SYS_SYSCTL. */
 #define SYSCTL_CODE            m1_i1   /* SYSCTL_CODE_* below */
index 2a2d634dc79b4d3ef7eb6e3717506a3ee7a47d47..0afebcf6cfe603d4908b258e068ee51cecd7c1f1 100644 (file)
 /* magic value to put in struct proc entries for sanity checks. */
 #define PMAGIC 0xC0FFEE1
 
+/* MINIX_KERNFLAGS flags */
+#define MKF_I386_INTEL_SYSENTER        (1L << 0) /* SYSENTER available and supported */
+#define MKF_I386_AMD_SYSCALL   (1L << 1) /* SYSCALL available and supported */
+
 #endif /* _MINIX_CONST_H */
index e5f983893b0853e3088bdcee1be0b74d84ae18ca..4ceae56c5fa943b05e48a49e4327404701874c4a 100644 (file)
 #define _CPUF_I386_HTT         13      /* Supports HTT */
 #define _CPUF_I386_HTT_MAX_NUM 14      /* Maximal num of threads */
 
+#define _CPUF_I386_MTRR                15
+#define _CPUF_I386_SYSENTER    16      /* Intel SYSENTER instrs */
+#define _CPUF_I386_SYSCALL     17      /* AMD SYSCALL instrs */
+
 int _cpufeature(int featureno);
 
 #endif
index e07cdefb39f2bf42d5ed0789190ddb7d31c4dff4..48a92da521b47e8d62db9c946623a98557f0b71c 100644 (file)
@@ -5,6 +5,7 @@
 #endif
 #include <minix/ipcconst.h>
 #include <minix/type.h>
+#include <minix/const.h>
 
 /*==========================================================================* 
  * Types relating to messages.                                                     *
@@ -152,24 +153,37 @@ typedef struct asynmsg
 #define AMF_NOTIFY_ERR 020     /* Send a notification when AMF_DONE is set and
                                 * delivery of the message failed */
 
-/* Hide names to avoid name space pollution. */
-#define echo           _echo
-#define notify         _notify
-#define sendrec                _sendrec
-#define receive                _receive
-#define send           _send
-#define sendnb         _sendnb
-#define senda          _senda
-
-int echo(message *m_ptr);
-int notify(endpoint_t dest);
-int sendrec(endpoint_t src_dest, message *m_ptr);
-int receive(endpoint_t src, message *m_ptr, int *status_ptr);
-int send(endpoint_t dest, message *m_ptr);
-int sendnb(endpoint_t dest, message *m_ptr);
-int senda(asynmsg_t *table, size_t count);
+int _send_orig(endpoint_t dest, message *m_ptr);
+int _receive_orig(endpoint_t src, message *m_ptr, int *status_ptr);
+int _sendrec_orig(endpoint_t src_dest, message *m_ptr);
+int _sendnb_orig(endpoint_t dest, message *m_ptr);
+int _notify_orig(endpoint_t dest);
+int _senda_orig(asynmsg_t *table, size_t count);
+int _do_kernel_call_orig(message *m_ptr);
+
 int _minix_kernel_info_struct(struct minix_kerninfo **);
 
-int _do_kernel_call(message *m_ptr);
+struct minix_ipcvecs {
+       int (*send_ptr)(endpoint_t dest, message *m_ptr);
+       int (*receive_ptr)(endpoint_t src, message *m_ptr, int *st);
+       int (*sendrec_ptr)(endpoint_t src_dest, message *m_ptr);
+       int (*sendnb_ptr)(endpoint_t dest, message *m_ptr);
+       int (*notify_ptr)(endpoint_t dest);
+       int (*do_kernel_call_ptr)(message *m_ptr);
+       int (*senda_ptr)(asynmsg_t *table, size_t count);
+};
+
+/* kernel-set IPC vectors retrieved by a constructor in libc/sys-minix/init.c */
+extern struct minix_ipcvecs _minix_ipcvecs;
+
+#define CHOOSETRAP(name) (_minix_ipcvecs. name ## _ptr)
+
+#define send           CHOOSETRAP(send)
+#define receive                CHOOSETRAP(receive)
+#define sendrec                CHOOSETRAP(sendrec)
+#define sendnb         CHOOSETRAP(sendnb)
+#define notify         CHOOSETRAP(notify)
+#define do_kernel_call CHOOSETRAP(do_kernel_call)
+#define senda          CHOOSETRAP(senda)
 
 #endif /* _IPC_H */
index dd14732090cb9895d81fff774643988b2487837e..dbcb23e1f58d31d8e54cc51124344e397b210d03 100644 (file)
@@ -168,8 +168,8 @@ struct minix_kerninfo {
         */
 #define KERNINFO_MAGIC 0xfc3b84bf
        u32_t kerninfo_magic;
-       u32_t minix_feature_flags;
-       u32_t flags_unused1;
+       u32_t minix_feature_flags;      /* features in minix kernel */
+       u32_t ki_flags;                 /* what is present in this struct */
        u32_t flags_unused2;
        u32_t flags_unused3;
        u32_t flags_unused4;
@@ -177,7 +177,10 @@ struct minix_kerninfo {
        struct machine          *machine;
        struct kmessages        *kmessages;
        struct loadinfo         *loadinfo;
+       struct minix_ipcvecs    *minix_ipcvecs;
 } __packed;
 
+#define MINIX_KIF_IPCVECS      (1L << 0)
+
 #endif /* _TYPE_H */
 
index 25f637c538996adfdbcc98122172756a725f7863..c9b9375cc68cefe8aa4af041e6d2a64be676535c 100644 (file)
@@ -36,7 +36,7 @@ SRCS+=        mpx.S arch_clock.c arch_do_vmctl.c arch_system.c \
        do_iopenable.c do_readbios.c do_sdevio.c exception.c i8259.c io_inb.S \
        io_inl.S io_intr.S io_inw.S io_outb.S io_outl.S io_outw.S klib.S klib16.S memory.c \
        oxpcie.c protect.c direct_tty_utils.c arch_reset.c \
-       pg_utils.c
+       pg_utils.c usermapped_glo_ipc.S usermapped_data_arch.c
 
 OBJS.kernel+=  ${UNPAGED_OBJS}
 
index 597650020df509d1e614632173e505be97e3ad50..9ec3190c2e2e3bf6fe33e08b20f1451a821f4719 100644 (file)
@@ -835,8 +835,10 @@ static struct gate_table_s gate_table_ioapic[] = {
 };
 
 static struct gate_table_s gate_table_common[] = {
-       { ipc_entry, IPC_VECTOR, USER_PRIVILEGE },
-       { kernel_call_entry, KERN_CALL_VECTOR, USER_PRIVILEGE },
+       { ipc_entry_softint_orig, IPC_VECTOR_ORIG, USER_PRIVILEGE },
+       { kernel_call_entry_orig, KERN_CALL_VECTOR_ORIG, USER_PRIVILEGE },
+       { ipc_entry_softint_um, IPC_VECTOR_UM, USER_PRIVILEGE },
+       { kernel_call_entry_um, KERN_CALL_VECTOR_UM, USER_PRIVILEGE },
        { NULL, 0, 0}
 };
 
index 33a379d15324d973a7880ceea40bf738a70e23c2..ae9e0b6baee9a0bb79becab7fd35bc8879e25861 100644 (file)
@@ -18,7 +18,7 @@
 ENTRY(apic_hwint##irq)                                                 \
        TEST_INT_IN_KERNEL(4, 0f)                                       ;\
                                                                        \
-       SAVE_PROCESS_CTX(0)                                             ;\
+       SAVE_PROCESS_CTX(0, KTS_INT_HARD)                               ;\
        push    %ebp                                                    ;\
        call    _C_LABEL(context_stop)                                  ;\
        add     $4, %esp                                                ;\
@@ -47,7 +47,7 @@ ENTRY(apic_hwint##irq)                                                        \
 #define lapic_intr(func) \
        TEST_INT_IN_KERNEL(4, 0f)                                       ;\
                                                                        \
-       SAVE_PROCESS_CTX(0)                                             ;\
+       SAVE_PROCESS_CTX(0, KTS_INT_HARD)                               ;\
        push    %ebp                                                    ;\
        call    _C_LABEL(context_stop)                                  ;\
        add     $4, %esp                                                ;\
index 2296bf835993a018d526c31e11fe7adf39f8c369..4b1812be80411713c028622c55a6daa4928c68da 100644 (file)
@@ -7,6 +7,7 @@
 #include <string.h>
 #include <machine/cmos.h>
 #include <machine/bios.h>
+#include <machine/cpu.h>
 #include <minix/portio.h>
 #include <minix/cpufeature.h>
 #include <assert.h>
@@ -149,6 +150,7 @@ static char fpu_state[NR_PROCS][FPU_XFP_SIZE] __aligned(FPUALIGN);
 void arch_proc_reset(struct proc *pr)
 {
        char *v = NULL;
+       struct stackframe_s reg;
 
        assert(pr->p_nr < NR_PROCS);
 
@@ -161,11 +163,11 @@ void arch_proc_reset(struct proc *pr)
        }
 
        /* Clear process state. */
-        memset(&pr->p_reg, 0, sizeof(pr->p_reg));
+        memset(&reg, 0, sizeof(pr->p_reg));
         if(iskerneln(pr->p_nr))
-               pr->p_reg.psw = INIT_TASK_PSW;
+               reg.psw = INIT_TASK_PSW;
         else
-               pr->p_reg.psw = INIT_PSW;
+               reg.psw = INIT_PSW;
 
        pr->p_seg.fpu_state = v;
 
@@ -178,6 +180,9 @@ void arch_proc_reset(struct proc *pr)
        pr->p_reg.ss = 
        pr->p_reg.es = 
        pr->p_reg.ds = USER_DS_SELECTOR;
+
+       /* set full context and make sure it gets restored */
+       arch_proc_setcontext(pr, &reg, 0);
 }
 
 void arch_set_secondary_ipc_return(struct proc *p, u32_t val)
@@ -512,6 +517,94 @@ struct proc * arch_finish_switch_to_user(void)
        return p;
 }
 
+void arch_proc_setcontext(struct proc *p, struct stackframe_s *state, int isuser)
+{
+       if(isuser) {
+               /* Restore user bits of psw from sc, maintain system bits
+                * from proc.
+                */
+               state->psw  =  (state->psw & X86_FLAGS_USER) |
+                       (p->p_reg.psw & ~X86_FLAGS_USER);
+       }
+
+       /* someone wants to totally re-initialize process state */
+       assert(sizeof(p->p_reg) == sizeof(*state));
+       memcpy(&p->p_reg, state, sizeof(*state));
+
+       /* further code is instructed to not touch the context
+        * any more
+        */
+       p->p_misc_flags |= MF_CONTEXT_SET;
+
+       /* on x86 this requires returning using iret (KTS_INT)
+        * so that the full context is restored instead of relying on
+        * the userspace doing it (as it would do on SYSEXIT).
+        * as ESP and EIP are also reset, userspace won't try to
+        * restore bogus context after returning.
+        *
+        * if the process is not blocked, or the kernel will ignore
+        * our trap style, we needn't panic but things will probably
+        * not go well for the process (restored context will be ignored)
+        * and the situation should be debugged.
+        */
+       if(!(p->p_rts_flags)) {
+               printf("WARNINIG: setting full context of runnable process\n");
+               print_proc(p);
+               util_stacktrace();
+       }
+       if(p->p_seg.p_kern_trap_style == KTS_NONE)
+               printf("WARNINIG: setting full context of out-of-kernel process\n");
+       p->p_seg.p_kern_trap_style = KTS_FULLCONTEXT;
+}
+
+void restore_user_context(struct proc *p)
+{
+       int trap_style = p->p_seg.p_kern_trap_style;
+#if 0
+#define TYPES 10
+       static int restores[TYPES], n = 0;
+
+       p->p_seg.p_kern_trap_style = KTS_NONE;
+
+       if(trap_style >= 0 && trap_style < TYPES)
+               restores[trap_style]++;
+
+       if(!(n++ % 500000)) {
+               int t;
+               for(t = 0; t < TYPES; t++)
+                       if(restores[t])
+                               printf("%d: %d   ", t, restores[t]);
+               printf("\n");
+       }
+#endif
+
+       if(trap_style == KTS_SYSENTER) {
+               restore_user_context_sysenter(p);
+               NOT_REACHABLE;
+        }
+
+       if(trap_style == KTS_SYSCALL) {
+               restore_user_context_syscall(p);
+               NOT_REACHABLE;
+       }
+
+        switch(trap_style) {
+                case KTS_NONE:
+                        panic("no entry trap style known");
+                case KTS_INT_HARD:
+                case KTS_INT_UM:
+                case KTS_FULLCONTEXT:
+                case KTS_INT_ORIG:
+                       restore_user_context_int(p);
+                       NOT_REACHABLE;
+                default:
+                        panic("unknown trap style recorded");
+                        NOT_REACHABLE;
+        }
+
+        NOT_REACHABLE;
+}
+
 void fpu_sigcontext(struct proc *pr, struct sigframe *fr, struct sigcontext *sc)
 {
        int fp_error;
index b6884e4d40887e987b90af34956707b61a4da8d3..80647af0dee542157e3bfb4f8c5ee50e76240c6d 100644 (file)
@@ -9,11 +9,6 @@
 
 #define CPUID_UNHALTED_CORE_CYCLES_AVAILABLE   0
 
-#define INTEL_MSR_PERFMON_CRT0 0xc1
-#define INTEL_MSR_PERFMON_SEL0 0x186
-
-#define INTEL_MSR_PERFMON_SEL0_ENABLE  (1 << 22)
-
 /*
  * Intel architecture performance counters watchdog
  */
index e10d6dd73f89be22ac4511cfcfba72fd87cfd0cc..d885cfcd1c84f91bcf5582f0fde748c16ce7646c 100644 (file)
@@ -248,11 +248,6 @@ void exception_handler(int is_nested, struct exception_frame * frame)
                        frame->vector, (unsigned long)frame->errcode,
                        (unsigned long)frame->eip, frame->cs,
                        (unsigned long)frame->eflags);
-               printseg("cs: ", 1, saved_proc, frame->cs);
-               printseg("ds: ", 0, saved_proc, saved_proc->p_reg.ds);
-               if(saved_proc->p_reg.ds != saved_proc->p_reg.ss) {
-                       printseg("ss: ", 0, saved_proc, saved_proc->p_reg.ss);
-               }
                proc_stacktrace(saved_proc);
        }
 
index 3693ac46043482aae09b47b7a24b0174528c6707..9748b6c52014bcd75fec3e50f7ab13ceb4202f1c 100644 (file)
@@ -47,10 +47,25 @@ void alignment_check(void);
 void machine_check(void);
 void simd_exception(void);
 
+void restore_user_context_int(struct proc *);
+void restore_user_context_sysenter(struct proc *);
+void restore_user_context_syscall(struct proc *);
+
 /* Software interrupt handlers, in numerical order. */
 void trp(void);
-void ipc_entry(void);
-void kernel_call_entry(void);
+void ipc_entry_softint_orig(void);
+void ipc_entry_softint_um(void);
+void ipc_entry_sysenter(void);
+void ipc_entry_syscall_cpu0(void);
+void ipc_entry_syscall_cpu1(void);
+void ipc_entry_syscall_cpu2(void);
+void ipc_entry_syscall_cpu3(void);
+void ipc_entry_syscall_cpu4(void);
+void ipc_entry_syscall_cpu5(void);
+void ipc_entry_syscall_cpu6(void);
+void ipc_entry_syscall_cpu7(void);
+void kernel_call_entry_orig(void);
+void kernel_call_entry_um(void);
 void level0_call(void);
 
 /* exception.c */
@@ -111,6 +126,30 @@ void x86_load_es(u32_t);
 void x86_load_fs(u32_t);
 void x86_load_gs(u32_t);
 
+/* ipc functions in usermapped_ipc.S */
+int usermapped_send_softint(endpoint_t dest, message *m_ptr);
+int usermapped_receive_softint(endpoint_t src, message *m_ptr, int *status_ptr);
+int usermapped_sendrec_softint(endpoint_t src_dest, message *m_ptr);
+int usermapped_sendnb_softint(endpoint_t dest, message *m_ptr);
+int usermapped_notify_softint(endpoint_t dest);
+int usermapped_do_kernel_call_softint(message *m_ptr);
+int usermapped_senda_softint(asynmsg_t *table, size_t count);
+
+int usermapped_send_syscall(endpoint_t dest, message *m_ptr);
+int usermapped_receive_syscall(endpoint_t src, message *m_ptr, int *status_ptr);
+int usermapped_sendrec_syscall(endpoint_t src_dest, message *m_ptr);
+int usermapped_sendnb_syscall(endpoint_t dest, message *m_ptr);
+int usermapped_notify_syscall(endpoint_t dest);
+int usermapped_do_kernel_call_syscall(message *m_ptr);
+int usermapped_senda_syscall(asynmsg_t *table, size_t count);
+
+int usermapped_send_sysenter(endpoint_t dest, message *m_ptr);
+int usermapped_receive_sysenter(endpoint_t src, message *m_ptr, int *status_ptr);
+int usermapped_sendrec_sysenter(endpoint_t src_dest, message *m_ptr);
+int usermapped_sendnb_sysenter(endpoint_t dest, message *m_ptr);
+int usermapped_notify_sysenter(endpoint_t dest);
+int usermapped_do_kernel_call_sysenter(message *m_ptr);
+int usermapped_senda_sysenter(asynmsg_t *table, size_t count);
 
 void switch_k_stack(void * esp, void (* continuation)(void));
 
index 6ae9de784f44c644e9171c0b58f4360d2a96c1b4..88fff3c59f28d3e7738b48a37de38066552c0e88 100644 (file)
 
 #define PG_ALLOCATEME ((phys_bytes)-1)
 
+/* MSRs */
+#define INTEL_MSR_PERFMON_CRT0         0xc1
+#define INTEL_MSR_SYSENTER_CS         0x174
+#define INTEL_MSR_SYSENTER_ESP        0x175
+#define INTEL_MSR_SYSENTER_EIP        0x176
+#define INTEL_MSR_PERFMON_SEL0        0x186
+
+#define INTEL_MSR_PERFMON_SEL0_ENABLE (1 << 22)
+
+#define AMD_EFER_SCE           (1L << 0) /* SYSCALL/SYSRET enabled */
+#define AMD_MSR_EFER           0xC0000080      /* extended features msr */
+#define AMD_MSR_STAR           0xC0000081      /* SYSCALL params msr */
+
+/* trap styles recorded on kernel entry and exit */
+#define KTS_NONE       1 /* invalid */
+#define KTS_INT_HARD   2 /* exception / hard interrupt */
+#define KTS_INT_ORIG   3 /* soft interrupt from libc */
+#define KTS_INT_UM     4 /* soft interrupt from usermapped code */
+#define KTS_FULLCONTEXT        5 /* must restore full context */
+#define KTS_SYSENTER   6 /* SYSENTER instruction (usermapped) */
+#define KTS_SYSCALL    7 /* SYSCALL instruction (usermapped) */
+
 #endif /* _I386_ACONST_H */
index 542084f5f3554f347326ee8381d051b562648f0f..776fec03a18bd6ac2f63e5a8f281a15d0fe2efbf 100644 (file)
@@ -883,7 +883,10 @@ int arch_phys_map_reply(const int index, const vir_bytes addr)
        }
 #endif
        if(index == first_um_idx) {
-               u32_t usermapped_offset;
+               extern struct minix_ipcvecs minix_ipcvecs_sysenter,
+                       minix_ipcvecs_syscall,
+                       minix_ipcvecs_softint;
+               extern u32_t usermapped_offset;
                assert(addr > (u32_t) &usermapped_start);
                usermapped_offset = addr - (u32_t) &usermapped_start;
                memset(&minix_kerninfo, 0, sizeof(minix_kerninfo));
@@ -895,13 +898,44 @@ int arch_phys_map_reply(const int index, const vir_bytes addr)
                ASSIGN(kmessages);
                ASSIGN(loadinfo);
 
+               /* select the right set of IPC routines to map into processes */
+               if(minix_feature_flags & MKF_I386_INTEL_SYSENTER) {
+                       printf("kernel: selecting intel sysenter ipc style\n");
+                       minix_kerninfo.minix_ipcvecs = &minix_ipcvecs_sysenter;
+               } else  if(minix_feature_flags & MKF_I386_AMD_SYSCALL) {
+                       printf("kernel: selecting amd syscall ipc style\n");
+                       minix_kerninfo.minix_ipcvecs = &minix_ipcvecs_syscall;
+               } else  {
+                       printf("kernel: selecting fallback (int) ipc style\n");
+                       minix_kerninfo.minix_ipcvecs = &minix_ipcvecs_softint;
+               }
+
                /* adjust the pointers of the functions and the struct
                 * itself to the user-accessible mapping
                 */
+               FIXPTR(minix_kerninfo.minix_ipcvecs->send_ptr);
+               FIXPTR(minix_kerninfo.minix_ipcvecs->receive_ptr);
+               FIXPTR(minix_kerninfo.minix_ipcvecs->sendrec_ptr);
+               FIXPTR(minix_kerninfo.minix_ipcvecs->senda_ptr);
+               FIXPTR(minix_kerninfo.minix_ipcvecs->sendnb_ptr);
+               FIXPTR(minix_kerninfo.minix_ipcvecs->notify_ptr);
+               FIXPTR(minix_kerninfo.minix_ipcvecs->do_kernel_call_ptr);
+               FIXPTR(minix_kerninfo.minix_ipcvecs);
+
                minix_kerninfo.kerninfo_magic = KERNINFO_MAGIC;
                minix_kerninfo.minix_feature_flags = minix_feature_flags;
                minix_kerninfo_user = (vir_bytes) FIXEDPTR(&minix_kerninfo);
 
+               /* if libc_ipc is set, disable usermapped ipc functions
+                * and force binaries to use in-libc fallbacks.
+                */
+               if(env_get("libc_ipc")) {
+                       printf("kernel: forcing in-libc fallback ipc style\n");
+                       minix_kerninfo.minix_ipcvecs = NULL;
+               } else {
+                       minix_kerninfo.ki_flags |= MINIX_KIF_IPCVECS;
+               }
+
                return OK;
        }
 
index 44af169d059dad457d922e4d8a788a7d2120aa4d..75305e7aa73a25fa7de57e7b9a82ec24c51240c9 100644 (file)
@@ -47,6 +47,7 @@
 /* Selected 386 tss offsets. */
 #define TSS3_S_SP0     4
 
+IMPORT(usermapped_offset)
 IMPORT(copr_not_available_handler)
 IMPORT(params_size)
 IMPORT(params_offset)
@@ -72,7 +73,7 @@ IMPORT(multiboot_init)
 #define hwint_master(irq) \
        TEST_INT_IN_KERNEL(4, 0f)                                       ;\
                                                                        \
-       SAVE_PROCESS_CTX(0)                                             ;\
+       SAVE_PROCESS_CTX(0, KTS_INT_HARD)                               ;\
        push    %ebp                                                    ;\
        movl    $0, %ebp        /* for stack trace */                   ;\
        call    _C_LABEL(context_stop)                                  ;\
@@ -132,7 +133,7 @@ ENTRY(hwint07)
 #define hwint_slave(irq)       \
        TEST_INT_IN_KERNEL(4, 0f)                                       ;\
                                                                        \
-       SAVE_PROCESS_CTX(0)                                             ;\
+       SAVE_PROCESS_CTX(0, KTS_INT_HARD)                               ;\
        push    %ebp                                                    ;\
        movl    $0, %ebp        /* for stack trace */                   ;\
        call    _C_LABEL(context_stop)                                  ;\
@@ -187,13 +188,83 @@ ENTRY(hwint15)
 /* Interrupt routine for irq 15 */
        hwint_slave(15)
 
+/* differences with sysenter:
+ *   - we have to find our own per-cpu stack (i.e. post-SYSCALL
+ *     %esp is not configured)
+ *   - we have to save the post-SYSRET %eip, provided by the cpu
+ *     in %ecx
+ *   - the system call parameters are passed in %ecx, so we userland
+ *     code that executes SYSCALL copies %ecx to %edx. So the roles
+ *     of %ecx and %edx are reversed
+ *   - we can use %esi as a scratch register
+ */
+#define ipc_entry_syscall_percpu(cpu)                  ;\
+ENTRY(ipc_entry_syscall_cpu ## cpu)                    ;\
+       xchg    %ecx, %edx                              ;\
+       mov     k_percpu_stacks+4*cpu, %esi             ;\
+       mov     (%esi), %ebp                            ;\
+       movl    $KTS_SYSCALL, P_KERN_TRAP_STYLE(%ebp)   ;\
+       xchg    %esp, %esi                              ;\
+       jmp     syscall_sysenter_common
+
+ipc_entry_syscall_percpu(0)
+ipc_entry_syscall_percpu(1)
+ipc_entry_syscall_percpu(2)
+ipc_entry_syscall_percpu(3)
+ipc_entry_syscall_percpu(4)
+ipc_entry_syscall_percpu(5)
+ipc_entry_syscall_percpu(6)
+ipc_entry_syscall_percpu(7)
+
+ENTRY(ipc_entry_sysenter)
+       /* SYSENTER simply sets kernel segments, EIP to here, and ESP
+        * to tss->sp0 (through MSR). so no automatic context saving is done.
+        * interrupts are disabled.
+        *
+        * register usage:
+        * edi: call type (IPCVEC, KERVEC)
+        * ebx, eax, ecx: syscall params, set by userland
+        * esi, edx: esp, eip to restore, set by userland
+        *
+        * no state is automatically saved; userland does all of that.
+        */
+       mov     (%esp), %ebp /* get proc saved by arch_finish_switch_to_user */
+
+       /* inform kernel we entered by sysenter and should
+        * therefore exit through restore_user_context_sysenter
+        */
+       movl    $KTS_SYSENTER, P_KERN_TRAP_STYLE(%ebp)
+       add     usermapped_offset, %edx /* compensate for mapping difference */
+
+syscall_sysenter_common:
+       mov     %esi, SPREG(%ebp)       /* esi is return esp */
+       mov     %edx, PCREG(%ebp)       /* edx is return eip */
+
+       /* check for call type; do_ipc? */
+       cmp     $IPCVEC_UM, %edi
+       jz      ipc_entry_common
+
+       /* check for kernel trap */
+       cmp     $KERVEC_UM, %edi
+       jz      kernel_call_entry_common
+
+       /* unrecognized call number; restore user with error */
+       movl    $-1, AXREG(%ebp)
+       push    %ebp    
+       call    restore_user_context    /* restore_user_context(%ebp); */
+
 /*
  * IPC is only from a process to kernel
  */
-ENTRY(ipc_entry)
+ENTRY(ipc_entry_softint_orig)
+       SAVE_PROCESS_CTX(0, KTS_INT_ORIG)
+       jmp ipc_entry_common
 
-       SAVE_PROCESS_CTX(0)
+ENTRY(ipc_entry_softint_um)
+       SAVE_PROCESS_CTX(0, KTS_INT_UM)
+       jmp ipc_entry_common
 
+ENTRY(ipc_entry_common)
        /* save the pointer to the current process */
        push    %ebp
 
@@ -226,10 +297,15 @@ ENTRY(ipc_entry)
 /*
  * kernel call is only from a process to kernel
  */
-ENTRY(kernel_call_entry)
+ENTRY(kernel_call_entry_orig)
+       SAVE_PROCESS_CTX(0, KTS_INT_ORIG)
+       jmp     kernel_call_entry_common
 
-       SAVE_PROCESS_CTX(0)
+ENTRY(kernel_call_entry_um)
+       SAVE_PROCESS_CTX(0, KTS_INT_UM)
+       jmp     kernel_call_entry_common
 
+ENTRY(kernel_call_entry_common)
        /* save the pointer to the current process */
        push    %ebp
 
@@ -270,7 +346,7 @@ exception_entry:
        TEST_INT_IN_KERNEL(12, exception_entry_nested)
 
 exception_entry_from_user:
-       SAVE_PROCESS_CTX(8)
+       SAVE_PROCESS_CTX(8, KTS_INT_HARD)
 
        /* stop user process cycles */
        push    %ebp
@@ -306,10 +382,37 @@ exception_entry_nested:
        /* resume execution at the point of exception */
        iret
 
-/*===========================================================================*/
-/*                             restart                                      */
-/*===========================================================================*/
-ENTRY(restore_user_context)
+ENTRY(restore_user_context_sysenter)
+       /* return to userspace using sysexit.
+        * most of the context saving the userspace process is
+        * responsible for, we just have to take care of the right EIP
+        * and ESP restoring here to resume execution, and set EAX and
+        * EBX to the saved status values.
+        */
+       mov     4(%esp), %ebp           /* retrieve proc ptr arg */
+       movw    $USER_DS_SELECTOR, %ax
+       movw    %ax, %ds
+       mov     PCREG(%ebp), %edx       /* sysexit restores EIP using EDX */
+       mov     SPREG(%ebp), %ecx       /* sysexit restores ESP using ECX */
+       mov     AXREG(%ebp), %eax       /* trap return value */
+       mov     BXREG(%ebp), %ebx       /* secondary return value */
+       sti                             /* enable interrupts */
+       sysexit                         /* jump to EIP in user */
+
+ENTRY(restore_user_context_syscall)
+       /* return to userspace using sysret.
+        * the procedure is very similar to sysexit; it requires
+        * manual %esp restoring, new EIP in ECX, does not require
+        * enabling interrupts, and of course sysret instead of sysexit.
+        */
+       mov     4(%esp), %ebp           /* retrieve proc ptr arg */
+       mov     PCREG(%ebp), %ecx       /* sysret restores EIP using ECX */
+       mov     SPREG(%ebp), %esp       /* restore ESP directly */
+       mov     AXREG(%ebp), %eax       /* trap return value */
+       mov     BXREG(%ebp), %ebx       /* secondary return value */
+       sysret                          /* jump to EIP in user */
+
+ENTRY(restore_user_context_int)
        mov     4(%esp), %ebp   /* will assume P_STACKBASE == 0 */
 
        /* reconstruct the stack for iret */
@@ -415,7 +518,7 @@ LABEL(inval_opcode)
 LABEL(copr_not_available)
        TEST_INT_IN_KERNEL(4, copr_not_available_in_kernel)
        cld                     /* set direction flag to a known value */
-       SAVE_PROCESS_CTX(0)
+       SAVE_PROCESS_CTX(0, KTS_INT_HARD)
        /* stop user process cycles */
        push    %ebp
        mov     $0, %ebp
@@ -505,6 +608,7 @@ ENTRY(startup_ap_32)
 
 .data
 .short 0x526F  /* this must be the first data entry (magic #) */
+
 .bss
 k_initial_stack:
 .space K_STACK_SIZE
index d39772157ef8e292ea07e8f582ff91e23a498f30..e460650dcc9e0b6ad02e83b68b7524423ae88d54 100644 (file)
@@ -16,3 +16,5 @@ member CSREG p_reg.cs
 member PSWREG p_reg.psw
 member SPREG p_reg.sp
 member P_CR3 p_seg.p_cr3
+member P_KERN_TRAP_STYLE p_seg.p_kern_trap_style
+member P_MAGIC p_magic
index 23f566107f5a776305037c1904376f81539764f7..8dc050220a3d55fa9a45e04574f3b1dc04153f20 100644 (file)
@@ -5,6 +5,7 @@
 
 #include <string.h>
 #include <assert.h>
+#include <minix/cpufeature.h>
 #include <machine/multiboot.h>
 
 #include "kernel/kernel.h"
@@ -25,6 +26,8 @@ struct segdesc_s gdt[GDT_SIZE] __aligned(DESC_SIZE);
 struct gatedesc_s idt[IDT_SIZE] __aligned(DESC_SIZE);
 struct tss_s tss[CONFIG_MAX_CPUS];
 
+u32_t k_percpu_stacks[CONFIG_MAX_CPUS];
+
 int prot_init_done = 0;
 
 phys_bytes vir2phys(void *vir)
@@ -141,8 +144,10 @@ static struct gate_table_s gate_table_exceptions[] = {
        { alignment_check, ALIGNMENT_CHECK_VECTOR, INTR_PRIVILEGE },
        { machine_check, MACHINE_CHECK_VECTOR, INTR_PRIVILEGE },
        { simd_exception, SIMD_EXCEPTION_VECTOR, INTR_PRIVILEGE },
-       { ipc_entry, IPC_VECTOR, USER_PRIVILEGE },
-       { kernel_call_entry, KERN_CALL_VECTOR, USER_PRIVILEGE },
+       { ipc_entry_softint_orig, IPC_VECTOR_ORIG, USER_PRIVILEGE },
+       { kernel_call_entry_orig, KERN_CALL_VECTOR_ORIG, USER_PRIVILEGE },
+       { ipc_entry_softint_um, IPC_VECTOR_UM, USER_PRIVILEGE },
+       { kernel_call_entry_um, KERN_CALL_VECTOR_UM, USER_PRIVILEGE },
        { NULL, 0, 0}
 };
 
@@ -168,13 +173,45 @@ int tss_init(unsigned cpu, void * kernel_stack)
         * make space for process pointer and cpu id and point to the first
         * usable word
         */
-       t->sp0 = ((unsigned) kernel_stack) - X86_STACK_TOP_RESERVED;
+       k_percpu_stacks[cpu] = t->sp0 = ((unsigned) kernel_stack) - X86_STACK_TOP_RESERVED;
        /* 
         * set the cpu id at the top of the stack so we know on which cpu is
         * this stak in use when we trap to kernel
         */
        *((reg_t *)(t->sp0 + 1 * sizeof(reg_t))) = cpu;
 
+       /* Set up Intel SYSENTER support if available. */
+       if(minix_feature_flags & MKF_I386_INTEL_SYSENTER) {
+         ia32_msr_write(INTEL_MSR_SYSENTER_CS, 0, KERN_CS_SELECTOR);
+         ia32_msr_write(INTEL_MSR_SYSENTER_ESP, 0, t->sp0);
+         ia32_msr_write(INTEL_MSR_SYSENTER_EIP, 0, (u32_t) ipc_entry_sysenter);
+       }
+
+       /* Set up AMD SYSCALL support if available. */
+       if(minix_feature_flags & MKF_I386_AMD_SYSCALL) {
+               u32_t msr_lo, msr_hi;
+
+               /* set SYSCALL ENABLE bit in EFER MSR */
+               ia32_msr_read(AMD_MSR_EFER, &msr_hi, &msr_lo);
+               msr_lo |= AMD_EFER_SCE;
+               ia32_msr_write(AMD_MSR_EFER, msr_hi, msr_lo);
+
+               /* set STAR register value */
+#define set_star_cpu(forcpu) if(cpu == forcpu) {                               \
+               ia32_msr_write(AMD_MSR_STAR,                                    \
+                 ((u32_t)USER_CS_SELECTOR << 16) | (u32_t)KERN_CS_SELECTOR,    \
+                 (u32_t) ipc_entry_syscall_cpu ## forcpu); }
+               set_star_cpu(0);
+               set_star_cpu(1);
+               set_star_cpu(2);
+               set_star_cpu(3);
+               set_star_cpu(4);
+               set_star_cpu(5);
+               set_star_cpu(6);
+               set_star_cpu(7);
+               assert(CONFIG_MAX_CPUS <= 8);
+       }
+
        return SEG_SELECTOR(index);
 }
 
@@ -285,6 +322,11 @@ void prot_init()
 {
   extern char k_boot_stktop;
 
+  if(_cpufeature(_CPUF_I386_SYSENTER))
+       minix_feature_flags |= MKF_I386_INTEL_SYSENTER;
+  if(_cpufeature(_CPUF_I386_SYSCALL))
+       minix_feature_flags |= MKF_I386_AMD_SYSCALL;
+
   memset(gdt, 0, sizeof(gdt));
   memset(idt, 0, sizeof(idt));
 
index 1262f6cd62ffdc05fca368126c469e6358498081..6138e4402313c9638c3042dd1547069a7a4eda74 100644 (file)
@@ -73,7 +73,7 @@
  * displ is the stack displacement. In case of an exception, there are two extra
  * value on the stack - error code and the exception number
  */
-#define SAVE_PROCESS_CTX(displ) \
+#define SAVE_PROCESS_CTX(displ, trapcode) \
                                                                \
        cld /* set the direction flag to a known state */       ;\
                                                                \
@@ -82,6 +82,7 @@
        movl    (CURR_PROC_PTR + 4 + displ)(%esp), %ebp ;\
                                                        \
        SAVE_GP_REGS(%ebp)                              ;\
+        movl   $trapcode, P_KERN_TRAP_STYLE(%ebp)      ;\
        pop     %esi                    /* get the orig %ebp and save it */ ;\
        mov     %esi, BPREG(%ebp)                       ;\
                                                        \
diff --git a/kernel/arch/i386/usermapped_data_arch.c b/kernel/arch/i386/usermapped_data_arch.c
new file mode 100644 (file)
index 0000000..837836a
--- /dev/null
@@ -0,0 +1,33 @@
+#include "kernel.h"
+#include "arch_proto.h"
+
+struct minix_ipcvecs minix_ipcvecs_softint = {
+       .send_ptr = usermapped_send_softint,
+       .receive_ptr = usermapped_receive_softint,
+       .sendrec_ptr = usermapped_sendrec_softint,
+       .sendnb_ptr = usermapped_sendnb_softint,
+       .notify_ptr = usermapped_notify_softint,
+       .do_kernel_call_ptr = usermapped_do_kernel_call_softint,
+       .senda_ptr = usermapped_senda_softint
+};
+
+struct minix_ipcvecs minix_ipcvecs_sysenter = {
+       .send_ptr = usermapped_send_sysenter,
+       .receive_ptr = usermapped_receive_sysenter,
+       .sendrec_ptr = usermapped_sendrec_sysenter,
+       .sendnb_ptr = usermapped_sendnb_sysenter,
+       .notify_ptr = usermapped_notify_sysenter,
+       .do_kernel_call_ptr = usermapped_do_kernel_call_sysenter,
+       .senda_ptr = usermapped_senda_sysenter
+};
+
+struct minix_ipcvecs minix_ipcvecs_syscall = {
+       .send_ptr = usermapped_send_syscall,
+       .receive_ptr = usermapped_receive_syscall,
+       .sendrec_ptr = usermapped_sendrec_syscall,
+       .sendnb_ptr = usermapped_sendnb_syscall,
+       .notify_ptr = usermapped_notify_syscall,
+       .do_kernel_call_ptr = usermapped_do_kernel_call_syscall,
+       .senda_ptr = usermapped_senda_syscall
+};
+
diff --git a/kernel/arch/i386/usermapped_glo_ipc.S b/kernel/arch/i386/usermapped_glo_ipc.S
new file mode 100644 (file)
index 0000000..3df59df
--- /dev/null
@@ -0,0 +1,94 @@
+#include <minix/ipcconst.h>
+#include <machine/asm.h>
+
+/**========================================================================* */
+/*                           IPC assembly routines                       * */
+/**========================================================================* */
+/* all message passing routines save ebx, but destroy eax and ecx. */
+
+#define IPCFUNC(name,SETARGS,VEC,POSTTRAP)                      \
+ENTRY(usermapped_ ## name ## _softint)                                 ;\
+       push    %ebp                                                    ;\
+       movl    %esp, %ebp                                              ;\
+       push    %ebx                                                    ;\
+       SETARGS                                                         ;\
+       int     $VEC    /* trap to the kernel */                        ;\
+       mov     %ebx, %ecx      /* save %ebx */                         ;\
+       POSTTRAP                                                        ;\
+       pop     %ebx                                                    ;\
+       pop     %ebp                                                    ;\
+       ret                                                             ;\
+ENTRY(usermapped_ ## name ## _sysenter)                                        ;\
+       push    %ebp                                                    ;\
+       movl    %esp, %ebp                                              ;\
+       push    %ebp                                                    ;\
+       push    %edx                                                    ;\
+       push    %ebx                                                    ;\
+       push    %esi                                                    ;\
+       push    %edi                                                    ;\
+       movl    %esp, %esi      /* kernel uses %esi for restored %esp */;\
+       movl    $0f, %edx       /* kernel uses %edx for restored %eip */;\
+       movl    $VEC, %edi      /* %edi to distinguish ipc/kerncall */  ;\
+       SETARGS                 /* call-specific register setup */      ;\
+       sysenter                /* disappear into kernel */             ;\
+0:                                                                     ;\
+       mov     %ebx, %ecx      /* return w. state mangled; save %ebx */;\
+       pop     %edi                                                    ;\
+       pop     %esi                                                    ;\
+       pop     %ebx                                                    ;\
+       pop     %edx                                                    ;\
+       pop     %ebp                                                    ;\
+       POSTTRAP                                                        ;\
+       pop     %ebp                                                    ;\
+       ret                                                             ;\
+ENTRY(usermapped_ ## name ## _syscall)                                 ;\
+       push    %ebp                                                    ;\
+       movl    %esp, %ebp                                              ;\
+       push    %ebp                                                    ;\
+       push    %edx                                                    ;\
+       push    %ebx                                                    ;\
+       push    %esi                                                    ;\
+       push    %edi                                                    ;\
+       movl    $VEC, %edi      /* %edi to distinguish ipc/kerncall */  ;\
+       SETARGS                 /* call-specific register setup */      ;\
+       movl    %ecx, %edx      /* %ecx is clobbered by SYSCALL */      ;\
+       syscall                 /* disappear into kernel */             ;\
+       mov     %ebx, %ecx      /* return w. state mangled; save %ebx */;\
+       pop     %edi                                                    ;\
+       pop     %esi                                                    ;\
+       pop     %ebx                                                    ;\
+       pop     %edx                                                    ;\
+       pop     %ebp                                                    ;\
+       POSTTRAP                                                        ;\
+       pop     %ebp                                                    ;\
+       ret
+
+#define IPCARGS(opcode)                                                        \
+       movl    8(%ebp), %eax   /* eax = dest-src */                    ;\
+       movl    12(%ebp), %ebx  /* ebx = message pointer */             ;\
+       movl    $opcode, %ecx                                           ;\
+
+#define SENDA_ARGS             \
+       movl    12(%ebp), %eax  /* eax = count */                       ;\
+       movl    8(%ebp), %ebx   /* ebx = table */                       ;\
+       movl    $SENDA, %ecx                                            ;\
+
+#define GETSTATUS                                                      \
+       push    %eax                                                    ;\
+       movl    16(%ebp), %eax      /* ecx = saved %ebx */              ;\
+       movl    %ecx,  (%eax)                                           ;\
+       pop     %eax
+
+#define KERNARGS mov 8(%ebp), %eax
+
+IPCFUNC(send,IPCARGS(SEND),IPCVEC_UM,)
+IPCFUNC(receive,IPCARGS(RECEIVE),IPCVEC_UM,GETSTATUS)
+IPCFUNC(sendrec,IPCARGS(SENDREC),IPCVEC_UM,)
+IPCFUNC(sendnb,IPCARGS(SENDNB),IPCVEC_UM,)
+IPCFUNC(notify,IPCARGS(NOTIFY),IPCVEC_UM,)
+IPCFUNC(senda,SENDA_ARGS,IPCVEC_UM,)
+IPCFUNC(do_kernel_call,KERNARGS,KERVEC_UM,)
+
+.data
+LABEL(usermapped_offset)
+.space 4
index 9dff7251cfa0ebf74f3d5c57b3fb145b71c478d4..652f68f5b12de61105727d3958ddc4e7d5031011 100644 (file)
@@ -240,8 +240,10 @@ void kmain(kinfo_t *local_cbi)
                get_cpulocal_var(proc_ptr) = rp;
 
        /* Process isn't scheduled until VM has set up a pagetable for it. */
-       if(rp->p_nr != VM_PROC_NR && rp->p_nr >= 0)
+       if(rp->p_nr != VM_PROC_NR && rp->p_nr >= 0) {
                rp->p_rts_flags |= RTS_VMINHIBIT;
+               rp->p_rts_flags |= RTS_BOOTINHIBIT;
+       }
 
        rp->p_rts_flags |= RTS_PROC_STOP;
        rp->p_rts_flags &= ~RTS_SLOT_FREE;
index 9b792b931ae4522eb716af6111aaeb0c44c5fc18..0a32d022d1c17f3decca8166032f7d0619367412 100644 (file)
@@ -159,6 +159,7 @@ struct proc {
                                   pick a new one. Process was dequeued and
                                   should be enqueued at the end of some run
                                   queue again */
+#define RTS_BOOTINHIBIT        0x10000 /* not ready until VM has made it */
 
 /* A process is runnable iff p_rts_flags == 0. */
 #define rts_f_is_runnable(flg) ((flg) == 0)
index 28dbba4a622a6c145a8ed05efcb8f02d6d320091..aa97024800569f30a38640d32a031191b893a9a6 100644 (file)
@@ -59,6 +59,7 @@ void enqueue(struct proc *rp);
 void dequeue(struct proc *rp);
 void switch_to_user(void);
 void arch_proc_reset(struct proc *rp);
+void arch_proc_setcontext(struct proc *rp, struct stackframe_s *state, int user);
 struct proc * arch_finish_switch_to_user(void);
 struct proc *endpoint_lookup(endpoint_t ep);
 #if DEBUG_ENABLE_IPC_WARNINGS
index 090557fd040586c942f87d66635e99a33ce82e71..f96a727bebd0cebe8f85c5e81d8ee33f1e73b64b 100644 (file)
@@ -49,7 +49,7 @@ int do_sigreturn(struct proc * caller, message * m_ptr)
 #endif
 
   /* Restore the registers. */
-  memcpy(&rp->p_reg, &sc.sc_regs, sizeof(sigregs));
+  arch_proc_setcontext(rp, &sc.sc_regs, 1);
 #if defined(__i386__)
   if(sc.sc_flags & MF_FPU_INITIALIZED)
   {
@@ -60,8 +60,6 @@ int do_sigreturn(struct proc * caller, message * m_ptr)
   }
 #endif
 
-  rp->p_misc_flags |= MF_CONTEXT_SET;
-
   return(OK);
 }
 #endif /* USE_SIGRETURN */
index f3a9b980eacd1bef2880ae723eba857e3adaf1b9..7ecc5e30b783144147ed56c43be94f1c055b3f3b 100644 (file)
@@ -174,6 +174,9 @@ int do_vmctl(struct proc * caller, message * m_ptr)
                /* VM says: forget about old mappings we have cached. */
                mem_clear_mapcache();
                return OK;
+       case VMCTL_BOOTINHIBIT_CLEAR:
+               RTS_UNSET(p, RTS_BOOTINHIBIT);
+               return OK;
   }
 
   /* Try architecture-specific vmctls. */
index 4e828d749d7c096fedc5be6df3b8c101da11e695..bcb70a20c6c31d74144e5558d9e557277418b5c9 100644 (file)
@@ -9,40 +9,40 @@
 /*                           IPC assembly routines                       * */
 /**========================================================================* */
 /* all message passing routines save ebx, but destroy eax and ecx. */
-ENTRY(_send)
+ENTRY(_send_orig)
        push    %ebp
        movl    %esp, %ebp
        push    %ebx
        movl    SRC_DST(%ebp), %eax     /* eax = dest-src */
        movl    MESSAGE(%ebp), %ebx     /* ebx = message pointer */
        movl    $SEND, %ecx     /* _send(dest, ptr) */
-       int     $IPCVEC /* trap to the kernel */
+       int     $IPCVEC_ORIG    /* trap to the kernel */
        pop     %ebx
        pop     %ebp
        ret
 
-ENTRY(_receive)
+ENTRY(_receive_orig)
        push    %ebp
        movl    %esp, %ebp
        push    %ebx
        movl    SRC_DST(%ebp), %eax     /* eax = dest-src */
        movl    MESSAGE(%ebp), %ebx     /* ebx = message pointer */
        movl    $RECEIVE, %ecx  /* _receive(src, ptr) */
-       int     $IPCVEC /* trap to the kernel */
+       int     $IPCVEC_ORIG    /* trap to the kernel */
        movl    STATUS(%ebp), %ecx      /* ecx = status pointer */
        movl    %ebx, (%ecx)
        pop     %ebx
        pop     %ebp
        ret
 
-ENTRY(_sendrec)
+ENTRY(_sendrec_orig)
        push    %ebp
        movl    %esp, %ebp
        push    %ebx
        movl    SRC_DST(%ebp), %eax     /* eax = dest-src */
        movl    MESSAGE(%ebp), %ebx     /* ebx = message pointer */
        movl    $SENDREC, %ecx  /* _sendrec(srcdest, ptr) */
-       int     $IPCVEC /* trap to the kernel */
+       int     $IPCVEC_ORIG    /* trap to the kernel */
        pop     %ebx
        pop     %ebp
        ret
@@ -54,38 +54,38 @@ ENTRY(_minix_kernel_info_struct)
        movl    $0, %eax
        movl    $0, %ebx
        movl    $MINIX_KERNINFO, %ecx
-       int     $IPCVEC /* trap to the kernel */
+       int     $IPCVEC_ORIG    /* trap to the kernel */
        movl    8(%ebp), %ecx   /* ecx = return struct ptr */
        movl    %ebx, (%ecx)
        pop     %ebx
        pop     %ebp
        ret
 
-ENTRY(_notify)
+ENTRY(_notify_orig)
        push    %ebp
        movl    %esp, %ebp
        push    %ebx
        movl    SRC_DST(%ebp), %eax     /* eax = destination  */
        movl    $NOTIFY, %ecx   /* _notify(srcdst) */
-       int     $IPCVEC /* trap to the kernel */
+       int     $IPCVEC_ORIG    /* trap to the kernel */
        pop     %ebx
        pop     %ebp
        ret
 
-ENTRY(_sendnb)
+ENTRY(_sendnb_orig)
        push    %ebp
        movl    %esp, %ebp
        push    %ebx
        movl    SRC_DST(%ebp), %eax     /* eax = dest-src */
        movl    MESSAGE(%ebp), %ebx     /* ebx = message pointer */
        movl    $SENDNB, %ecx   /* _sendnb(dest, ptr) */
-       int     $IPCVEC /* trap to the kernel */
+       int     $IPCVEC_ORIG    /* trap to the kernel */
        pop     %ebx
        pop     %ebp
        ret
 
-ENTRY(_do_kernel_call)
+ENTRY(_do_kernel_call_orig)
        /* pass the message pointer to kernel in the %eax register */
        movl    4(%esp), %eax
-       int     $KERVEC
+       int     $KERVEC_ORIG
        ret
index 9a2cb6d815e5fcfc42c80402017a1c48a838f447..256d851804ffb5c622ca027611b5bdcddd7277c6 100644 (file)
@@ -4,7 +4,7 @@
        MSGTAB = 8      /* message table */
        TABCOUNT = 12   /* number of entries in message table */
 
-ENTRY(_senda)
+ENTRY(_senda_orig)
        push    %ebp
        movl    %esp, %ebp
        push    %ebx
index 7b21078cae7acc3f5646b6ecf08341beec90058c..38974f39c63fcf39f9a5082fda222b29223e111a 100644 (file)
@@ -2,15 +2,29 @@
 #include <stdio.h>
 #include <minix/ipc.h>
 
+/* Minix kernel info, IPC functions pointers */
 struct minix_kerninfo *_minix_kerninfo = NULL;
 
 void    __minix_init(void) __attribute__((__constructor__, __used__));
 
+struct minix_ipcvecs _minix_ipcvecs = {
+       .sendrec_ptr = _sendrec_orig,
+       .send_ptr = _send_orig,
+       .notify_ptr = _notify_orig,
+       .senda_ptr = _senda_orig,
+       .sendnb_ptr = _sendnb_orig,
+       .receive_ptr = _receive_orig,
+       .do_kernel_call_ptr = _do_kernel_call_orig,
+};
+
 void __minix_init(void)
 {
        if((_minix_kernel_info_struct(&_minix_kerninfo)) != 0
          || _minix_kerninfo->kerninfo_magic != KERNINFO_MAGIC) {
                _minix_kerninfo = NULL;
-       }
+         } else if((_minix_kerninfo->ki_flags & MINIX_KIF_IPCVECS) &&
+               _minix_kerninfo->minix_ipcvecs) {
+               _minix_ipcvecs = *_minix_kerninfo->minix_ipcvecs;
+         }
 }
 
index 522810835ba4228bdeca67c17c8a624aec512752..c1b53e31c488ea734f1e815f314eb73cb8a923a7 100644 (file)
@@ -11,7 +11,7 @@ int _syscall(endpoint_t who, int syscallnr, message *msgptr)
   int status;
 
   msgptr->m_type = syscallnr;
-  status = _sendrec(who, msgptr);
+  status = sendrec(who, msgptr);
   if (status != 0) {
        /* 'sendrec' itself failed. */
        /* XXX - strerror doesn't know all the codes */
index 20d3ca9eed364f6b5666e7fac2a7b86582ff50ad..2ac942df958b34ba49d3254ece41c262f4893c13 100644 (file)
@@ -13,6 +13,7 @@
 #include <minix/com.h>
 #include <minix/callnr.h>
 #include <minix/vm.h>
+#include <minix/ipc.h>
 #include <minix/syslib.h>
 #include <sys/mman.h>
 #include <machine/elf.h>
index 52d8db2dc2296eeccfc197eb761fd3ec12769195..1a7c011d585a91d13d9ba74b8716f6bee792d859 100644 (file)
@@ -3,10 +3,14 @@
 #include <minix/minlib.h>
 #include <minix/cpufeature.h>
 #include <machine/vm.h>
+#include <string.h>
 
 int _cpufeature(int cpufeature)
 {
        u32_t eax, ebx, ecx, edx;
+       u32_t ef_eax = 0, ef_ebx = 0, ef_ecx = 0, ef_edx = 0;
+       unsigned int family, model, stepping;
+       int is_intel = 0, is_amd = 0;
 
        eax = ebx = ecx = edx = 0;
 
@@ -14,8 +18,34 @@ int _cpufeature(int cpufeature)
        eax = 0;
        _cpuid(&eax, &ebx, &ecx, &edx);
        if(eax > 0) {
+               char vendor[12];
+               memcpy(vendor,   &ebx, sizeof(ebx));
+               memcpy(vendor+4, &edx, sizeof(edx));
+               memcpy(vendor+8, &ecx, sizeof(ecx));
+               if(!strncmp(vendor, "GenuineIntel", sizeof(vendor)))
+                       is_intel = 1;
+               if(!strncmp(vendor, "AuthenticAMD", sizeof(vendor)))
+                       is_amd = 1;
                eax = 1;
                _cpuid(&eax, &ebx, &ecx, &edx);
+       } else return 0;
+
+       stepping   =  eax        & 0xf;
+       model    = (eax >>  4) & 0xf;
+
+       if(model == 0xf || model == 0x6) {
+               model += ((eax >> 16) & 0xf) << 4;
+       }
+
+       family   = (eax >>  8) & 0xf;
+
+       if(family == 0xf) {
+               family += (eax >> 20) & 0xff;
+       }
+
+       if(is_amd) {
+               ef_eax = 0x80000001;
+               _cpuid(&ef_eax, &ef_ebx, &ef_ecx, &ef_edx);
        }
 
        switch(cpufeature) {
@@ -53,6 +83,15 @@ int _cpufeature(int cpufeature)
                        return edx & CPUID1_EDX_HTT;
                case _CPUF_I386_HTT_MAX_NUM:
                        return (ebx >> 16) & 0xff;
+               case _CPUF_I386_SYSENTER:
+                       if(!is_intel) return 0;
+                       if(!(edx & CPUID1_EDX_SYSENTER)) return 0;
+                       if(family == 6 && model < 3 && stepping < 3) return 0;
+                       return 1;
+               case _CPUF_I386_SYSCALL:
+                       if(!is_amd) return 0;
+                       if(!(ef_edx & CPUID_EF_EDX_SYSENTER)) return 0;
+                       return 1;
        }
 
        return 0;
index 264547325783b91ca7051e3cb654e370b0310e4e..856e45aa663a1c107e0e80629b8661e0d96c6550 100644 (file)
@@ -4,6 +4,6 @@
 int _kernel_call(int syscallnr, message *msgptr)
 {
   msgptr->m_type = syscallnr;
-  _do_kernel_call(msgptr);
+  do_kernel_call(msgptr);
   return(msgptr->m_type);
 }
index f6778699a60be609884f970870889e75a42d37c4..b547471b7097d33ef64e0dba03ece5bafb4f01fe 100644 (file)
@@ -14,7 +14,7 @@ register message *msgptr;
   int status;
 
   msgptr->m_type = syscallnr;
-  status = _sendrec(who, msgptr);
+  status = sendrec(who, msgptr);
   if (status != 0) return(status);
   return(msgptr->m_type);
 }
index 25a0b862dd2b08b06acac06a7a55d8fa222f0259..dbcae012e912e34705cdb9505cd2326e8698608c 100644 (file)
@@ -294,6 +294,10 @@ void exec_bootproc(struct vmproc *vmp, struct boot_image *ip)
         if(sys_exec(vmp->vm_endpoint, (char *) execi->stack_high - 12,
                (char *) ip->proc_name, execi->pc) != OK)
                panic("vm: boot process exec of %d failed\n", vmp->vm_endpoint);
+
+       /* make it runnable */
+       if(sys_vmctl(vmp->vm_endpoint, VMCTL_BOOTINHIBIT_CLEAR, 0) != OK)
+               panic("VMCTL_BOOTINHIBIT_CLEAR failed");
 }
 
 void init_vm(void)
@@ -301,6 +305,7 @@ void init_vm(void)
        int s, i;
        static struct memory mem_chunks[NR_MEMS];
        static struct boot_image *ip;
+       extern void __minix_init(void);
 
 #if SANITYCHECKS
        incheck = nocheck = 0;
@@ -414,6 +419,11 @@ void init_vm(void)
 
        /* Initialize the structures for queryexit */
        init_query_exit();
+
+       /* Acquire kernel ipc vectors that weren't available
+        * before VM had determined kernel mappings
+        */
+       __minix_init();
 }
 
 /*===========================================================================*