reg_t p_cr3; /* page table root */
u32_t *p_cr3_v;
char *fpu_state;
+ int p_kern_trap_style;
} segframe_t;
struct cpu_info {
#ifdef __minix
#define IMPORT(sym) \
.extern _C_LABEL(sym)
+
+#define KERVEC_ORIG 32 /* syscall trap to kernel */
+#define IPCVEC_ORIG 33 /* ipc trap to kernel */
+
+#define KERVEC_UM 34 /* syscall trap to kernel, user-mapped code */
+#define IPCVEC_UM 35 /* ipc trap to kernel, user-mapped code */
+
#endif
#endif /* !_I386_ASM_H_ */
#define OVERFLOW_VECTOR 4 /* from INTO */
/* Fixed system call vector. */
-#define KERN_CALL_VECTOR 32 /* system calls are made with int SYSVEC */
-#define IPC_VECTOR 33 /* interrupt vector for ipc */
+#define KERN_CALL_VECTOR_ORIG 32 /* system calls are made with int SYSVEC */
+#define IPC_VECTOR_ORIG 33 /* interrupt vector for ipc */
+#define KERN_CALL_VECTOR_UM 34 /* user-mapped equivalent */
+#define IPC_VECTOR_UM 35 /* user-mapped equivalent */
/* Hardware interrupt numbers. */
#ifndef USE_APIC
/* CPUID flags */
#define CPUID1_EDX_FPU (1L) /* FPU presence */
#define CPUID1_EDX_PSE (1L << 3) /* Page Size Extension */
+#define CPUID1_EDX_SYSENTER (1L << 11) /* Intel SYSENTER */
#define CPUID1_EDX_PGE (1L << 13) /* Page Global (bit) Enable */
#define CPUID1_EDX_APIC_ON_CHIP (1L << 9) /* APIC is present on the chip */
#define CPUID1_EDX_TSC (1L << 4) /* Timestamp counter present */
#define CPUID1_ECX_SSE4_1 (1L << 19)
#define CPUID1_ECX_SSE4_2 (1L << 20)
+#define CPUID_EF_EDX_SYSENTER (1L << 11) /* Intel SYSENTER */
+
#ifndef __ASSEMBLY__
#include <minix/type.h>
#define VMCTL_VMINHIBIT_SET 30
#define VMCTL_VMINHIBIT_CLEAR 31
#define VMCTL_CLEARMAPCACHE 32
+#define VMCTL_BOOTINHIBIT_CLEAR 33
/* Codes and field names for SYS_SYSCTL. */
#define SYSCTL_CODE m1_i1 /* SYSCTL_CODE_* below */
/* magic value to put in struct proc entries for sanity checks. */
#define PMAGIC 0xC0FFEE1
+/* MINIX_KERNFLAGS flags */
+#define MKF_I386_INTEL_SYSENTER (1L << 0) /* SYSENTER available and supported */
+#define MKF_I386_AMD_SYSCALL (1L << 1) /* SYSCALL available and supported */
+
#endif /* _MINIX_CONST_H */
#define _CPUF_I386_HTT 13 /* Supports HTT */
#define _CPUF_I386_HTT_MAX_NUM 14 /* Maximal num of threads */
+#define _CPUF_I386_MTRR 15
+#define _CPUF_I386_SYSENTER 16 /* Intel SYSENTER instrs */
+#define _CPUF_I386_SYSCALL 17 /* AMD SYSCALL instrs */
+
int _cpufeature(int featureno);
#endif
#endif
#include <minix/ipcconst.h>
#include <minix/type.h>
+#include <minix/const.h>
/*==========================================================================*
* Types relating to messages. *
#define AMF_NOTIFY_ERR 020 /* Send a notification when AMF_DONE is set and
* delivery of the message failed */
-/* Hide names to avoid name space pollution. */
-#define echo _echo
-#define notify _notify
-#define sendrec _sendrec
-#define receive _receive
-#define send _send
-#define sendnb _sendnb
-#define senda _senda
-
-int echo(message *m_ptr);
-int notify(endpoint_t dest);
-int sendrec(endpoint_t src_dest, message *m_ptr);
-int receive(endpoint_t src, message *m_ptr, int *status_ptr);
-int send(endpoint_t dest, message *m_ptr);
-int sendnb(endpoint_t dest, message *m_ptr);
-int senda(asynmsg_t *table, size_t count);
+int _send_orig(endpoint_t dest, message *m_ptr);
+int _receive_orig(endpoint_t src, message *m_ptr, int *status_ptr);
+int _sendrec_orig(endpoint_t src_dest, message *m_ptr);
+int _sendnb_orig(endpoint_t dest, message *m_ptr);
+int _notify_orig(endpoint_t dest);
+int _senda_orig(asynmsg_t *table, size_t count);
+int _do_kernel_call_orig(message *m_ptr);
+
int _minix_kernel_info_struct(struct minix_kerninfo **);
-int _do_kernel_call(message *m_ptr);
+struct minix_ipcvecs {
+ int (*send_ptr)(endpoint_t dest, message *m_ptr);
+ int (*receive_ptr)(endpoint_t src, message *m_ptr, int *st);
+ int (*sendrec_ptr)(endpoint_t src_dest, message *m_ptr);
+ int (*sendnb_ptr)(endpoint_t dest, message *m_ptr);
+ int (*notify_ptr)(endpoint_t dest);
+ int (*do_kernel_call_ptr)(message *m_ptr);
+ int (*senda_ptr)(asynmsg_t *table, size_t count);
+};
+
+/* kernel-set IPC vectors retrieved by a constructor in libc/sys-minix/init.c */
+extern struct minix_ipcvecs _minix_ipcvecs;
+
+#define CHOOSETRAP(name) (_minix_ipcvecs. name ## _ptr)
+
+#define send CHOOSETRAP(send)
+#define receive CHOOSETRAP(receive)
+#define sendrec CHOOSETRAP(sendrec)
+#define sendnb CHOOSETRAP(sendnb)
+#define notify CHOOSETRAP(notify)
+#define do_kernel_call CHOOSETRAP(do_kernel_call)
+#define senda CHOOSETRAP(senda)
#endif /* _IPC_H */
*/
#define KERNINFO_MAGIC 0xfc3b84bf
u32_t kerninfo_magic;
- u32_t minix_feature_flags;
- u32_t flags_unused1;
+ u32_t minix_feature_flags; /* features in minix kernel */
+ u32_t ki_flags; /* what is present in this struct */
u32_t flags_unused2;
u32_t flags_unused3;
u32_t flags_unused4;
struct machine *machine;
struct kmessages *kmessages;
struct loadinfo *loadinfo;
+ struct minix_ipcvecs *minix_ipcvecs;
} __packed;
+#define MINIX_KIF_IPCVECS (1L << 0)
+
#endif /* _TYPE_H */
do_iopenable.c do_readbios.c do_sdevio.c exception.c i8259.c io_inb.S \
io_inl.S io_intr.S io_inw.S io_outb.S io_outl.S io_outw.S klib.S klib16.S memory.c \
oxpcie.c protect.c direct_tty_utils.c arch_reset.c \
- pg_utils.c
+ pg_utils.c usermapped_glo_ipc.S usermapped_data_arch.c
OBJS.kernel+= ${UNPAGED_OBJS}
};
static struct gate_table_s gate_table_common[] = {
- { ipc_entry, IPC_VECTOR, USER_PRIVILEGE },
- { kernel_call_entry, KERN_CALL_VECTOR, USER_PRIVILEGE },
+ { ipc_entry_softint_orig, IPC_VECTOR_ORIG, USER_PRIVILEGE },
+ { kernel_call_entry_orig, KERN_CALL_VECTOR_ORIG, USER_PRIVILEGE },
+ { ipc_entry_softint_um, IPC_VECTOR_UM, USER_PRIVILEGE },
+ { kernel_call_entry_um, KERN_CALL_VECTOR_UM, USER_PRIVILEGE },
{ NULL, 0, 0}
};
ENTRY(apic_hwint##irq) \
TEST_INT_IN_KERNEL(4, 0f) ;\
\
- SAVE_PROCESS_CTX(0) ;\
+ SAVE_PROCESS_CTX(0, KTS_INT_HARD) ;\
push %ebp ;\
call _C_LABEL(context_stop) ;\
add $4, %esp ;\
#define lapic_intr(func) \
TEST_INT_IN_KERNEL(4, 0f) ;\
\
- SAVE_PROCESS_CTX(0) ;\
+ SAVE_PROCESS_CTX(0, KTS_INT_HARD) ;\
push %ebp ;\
call _C_LABEL(context_stop) ;\
add $4, %esp ;\
#include <string.h>
#include <machine/cmos.h>
#include <machine/bios.h>
+#include <machine/cpu.h>
#include <minix/portio.h>
#include <minix/cpufeature.h>
#include <assert.h>
void arch_proc_reset(struct proc *pr)
{
char *v = NULL;
+ struct stackframe_s reg;
assert(pr->p_nr < NR_PROCS);
}
/* Clear process state. */
- memset(&pr->p_reg, 0, sizeof(pr->p_reg));
+ memset(®, 0, sizeof(pr->p_reg));
if(iskerneln(pr->p_nr))
- pr->p_reg.psw = INIT_TASK_PSW;
+ reg.psw = INIT_TASK_PSW;
else
- pr->p_reg.psw = INIT_PSW;
+ reg.psw = INIT_PSW;
pr->p_seg.fpu_state = v;
pr->p_reg.ss =
pr->p_reg.es =
pr->p_reg.ds = USER_DS_SELECTOR;
+
+ /* set full context and make sure it gets restored */
+ arch_proc_setcontext(pr, ®, 0);
}
void arch_set_secondary_ipc_return(struct proc *p, u32_t val)
return p;
}
+void arch_proc_setcontext(struct proc *p, struct stackframe_s *state, int isuser)
+{
+ if(isuser) {
+ /* Restore user bits of psw from sc, maintain system bits
+ * from proc.
+ */
+ state->psw = (state->psw & X86_FLAGS_USER) |
+ (p->p_reg.psw & ~X86_FLAGS_USER);
+ }
+
+ /* someone wants to totally re-initialize process state */
+ assert(sizeof(p->p_reg) == sizeof(*state));
+ memcpy(&p->p_reg, state, sizeof(*state));
+
+ /* further code is instructed to not touch the context
+ * any more
+ */
+ p->p_misc_flags |= MF_CONTEXT_SET;
+
+ /* on x86 this requires returning using iret (KTS_INT)
+ * so that the full context is restored instead of relying on
+ * the userspace doing it (as it would do on SYSEXIT).
+ * as ESP and EIP are also reset, userspace won't try to
+ * restore bogus context after returning.
+ *
+ * if the process is not blocked, or the kernel will ignore
+ * our trap style, we needn't panic but things will probably
+ * not go well for the process (restored context will be ignored)
+ * and the situation should be debugged.
+ */
+ if(!(p->p_rts_flags)) {
+ printf("WARNINIG: setting full context of runnable process\n");
+ print_proc(p);
+ util_stacktrace();
+ }
+ if(p->p_seg.p_kern_trap_style == KTS_NONE)
+ printf("WARNINIG: setting full context of out-of-kernel process\n");
+ p->p_seg.p_kern_trap_style = KTS_FULLCONTEXT;
+}
+
+void restore_user_context(struct proc *p)
+{
+ int trap_style = p->p_seg.p_kern_trap_style;
+#if 0
+#define TYPES 10
+ static int restores[TYPES], n = 0;
+
+ p->p_seg.p_kern_trap_style = KTS_NONE;
+
+ if(trap_style >= 0 && trap_style < TYPES)
+ restores[trap_style]++;
+
+ if(!(n++ % 500000)) {
+ int t;
+ for(t = 0; t < TYPES; t++)
+ if(restores[t])
+ printf("%d: %d ", t, restores[t]);
+ printf("\n");
+ }
+#endif
+
+ if(trap_style == KTS_SYSENTER) {
+ restore_user_context_sysenter(p);
+ NOT_REACHABLE;
+ }
+
+ if(trap_style == KTS_SYSCALL) {
+ restore_user_context_syscall(p);
+ NOT_REACHABLE;
+ }
+
+ switch(trap_style) {
+ case KTS_NONE:
+ panic("no entry trap style known");
+ case KTS_INT_HARD:
+ case KTS_INT_UM:
+ case KTS_FULLCONTEXT:
+ case KTS_INT_ORIG:
+ restore_user_context_int(p);
+ NOT_REACHABLE;
+ default:
+ panic("unknown trap style recorded");
+ NOT_REACHABLE;
+ }
+
+ NOT_REACHABLE;
+}
+
void fpu_sigcontext(struct proc *pr, struct sigframe *fr, struct sigcontext *sc)
{
int fp_error;
#define CPUID_UNHALTED_CORE_CYCLES_AVAILABLE 0
-#define INTEL_MSR_PERFMON_CRT0 0xc1
-#define INTEL_MSR_PERFMON_SEL0 0x186
-
-#define INTEL_MSR_PERFMON_SEL0_ENABLE (1 << 22)
-
/*
* Intel architecture performance counters watchdog
*/
frame->vector, (unsigned long)frame->errcode,
(unsigned long)frame->eip, frame->cs,
(unsigned long)frame->eflags);
- printseg("cs: ", 1, saved_proc, frame->cs);
- printseg("ds: ", 0, saved_proc, saved_proc->p_reg.ds);
- if(saved_proc->p_reg.ds != saved_proc->p_reg.ss) {
- printseg("ss: ", 0, saved_proc, saved_proc->p_reg.ss);
- }
proc_stacktrace(saved_proc);
}
void machine_check(void);
void simd_exception(void);
+void restore_user_context_int(struct proc *);
+void restore_user_context_sysenter(struct proc *);
+void restore_user_context_syscall(struct proc *);
+
/* Software interrupt handlers, in numerical order. */
void trp(void);
-void ipc_entry(void);
-void kernel_call_entry(void);
+void ipc_entry_softint_orig(void);
+void ipc_entry_softint_um(void);
+void ipc_entry_sysenter(void);
+void ipc_entry_syscall_cpu0(void);
+void ipc_entry_syscall_cpu1(void);
+void ipc_entry_syscall_cpu2(void);
+void ipc_entry_syscall_cpu3(void);
+void ipc_entry_syscall_cpu4(void);
+void ipc_entry_syscall_cpu5(void);
+void ipc_entry_syscall_cpu6(void);
+void ipc_entry_syscall_cpu7(void);
+void kernel_call_entry_orig(void);
+void kernel_call_entry_um(void);
void level0_call(void);
/* exception.c */
void x86_load_fs(u32_t);
void x86_load_gs(u32_t);
+/* ipc functions in usermapped_ipc.S */
+int usermapped_send_softint(endpoint_t dest, message *m_ptr);
+int usermapped_receive_softint(endpoint_t src, message *m_ptr, int *status_ptr);
+int usermapped_sendrec_softint(endpoint_t src_dest, message *m_ptr);
+int usermapped_sendnb_softint(endpoint_t dest, message *m_ptr);
+int usermapped_notify_softint(endpoint_t dest);
+int usermapped_do_kernel_call_softint(message *m_ptr);
+int usermapped_senda_softint(asynmsg_t *table, size_t count);
+
+int usermapped_send_syscall(endpoint_t dest, message *m_ptr);
+int usermapped_receive_syscall(endpoint_t src, message *m_ptr, int *status_ptr);
+int usermapped_sendrec_syscall(endpoint_t src_dest, message *m_ptr);
+int usermapped_sendnb_syscall(endpoint_t dest, message *m_ptr);
+int usermapped_notify_syscall(endpoint_t dest);
+int usermapped_do_kernel_call_syscall(message *m_ptr);
+int usermapped_senda_syscall(asynmsg_t *table, size_t count);
+
+int usermapped_send_sysenter(endpoint_t dest, message *m_ptr);
+int usermapped_receive_sysenter(endpoint_t src, message *m_ptr, int *status_ptr);
+int usermapped_sendrec_sysenter(endpoint_t src_dest, message *m_ptr);
+int usermapped_sendnb_sysenter(endpoint_t dest, message *m_ptr);
+int usermapped_notify_sysenter(endpoint_t dest);
+int usermapped_do_kernel_call_sysenter(message *m_ptr);
+int usermapped_senda_sysenter(asynmsg_t *table, size_t count);
void switch_k_stack(void * esp, void (* continuation)(void));
#define PG_ALLOCATEME ((phys_bytes)-1)
+/* MSRs */
+#define INTEL_MSR_PERFMON_CRT0 0xc1
+#define INTEL_MSR_SYSENTER_CS 0x174
+#define INTEL_MSR_SYSENTER_ESP 0x175
+#define INTEL_MSR_SYSENTER_EIP 0x176
+#define INTEL_MSR_PERFMON_SEL0 0x186
+
+#define INTEL_MSR_PERFMON_SEL0_ENABLE (1 << 22)
+
+#define AMD_EFER_SCE (1L << 0) /* SYSCALL/SYSRET enabled */
+#define AMD_MSR_EFER 0xC0000080 /* extended features msr */
+#define AMD_MSR_STAR 0xC0000081 /* SYSCALL params msr */
+
+/* trap styles recorded on kernel entry and exit */
+#define KTS_NONE 1 /* invalid */
+#define KTS_INT_HARD 2 /* exception / hard interrupt */
+#define KTS_INT_ORIG 3 /* soft interrupt from libc */
+#define KTS_INT_UM 4 /* soft interrupt from usermapped code */
+#define KTS_FULLCONTEXT 5 /* must restore full context */
+#define KTS_SYSENTER 6 /* SYSENTER instruction (usermapped) */
+#define KTS_SYSCALL 7 /* SYSCALL instruction (usermapped) */
+
#endif /* _I386_ACONST_H */
}
#endif
if(index == first_um_idx) {
- u32_t usermapped_offset;
+ extern struct minix_ipcvecs minix_ipcvecs_sysenter,
+ minix_ipcvecs_syscall,
+ minix_ipcvecs_softint;
+ extern u32_t usermapped_offset;
assert(addr > (u32_t) &usermapped_start);
usermapped_offset = addr - (u32_t) &usermapped_start;
memset(&minix_kerninfo, 0, sizeof(minix_kerninfo));
ASSIGN(kmessages);
ASSIGN(loadinfo);
+ /* select the right set of IPC routines to map into processes */
+ if(minix_feature_flags & MKF_I386_INTEL_SYSENTER) {
+ printf("kernel: selecting intel sysenter ipc style\n");
+ minix_kerninfo.minix_ipcvecs = &minix_ipcvecs_sysenter;
+ } else if(minix_feature_flags & MKF_I386_AMD_SYSCALL) {
+ printf("kernel: selecting amd syscall ipc style\n");
+ minix_kerninfo.minix_ipcvecs = &minix_ipcvecs_syscall;
+ } else {
+ printf("kernel: selecting fallback (int) ipc style\n");
+ minix_kerninfo.minix_ipcvecs = &minix_ipcvecs_softint;
+ }
+
/* adjust the pointers of the functions and the struct
* itself to the user-accessible mapping
*/
+ FIXPTR(minix_kerninfo.minix_ipcvecs->send_ptr);
+ FIXPTR(minix_kerninfo.minix_ipcvecs->receive_ptr);
+ FIXPTR(minix_kerninfo.minix_ipcvecs->sendrec_ptr);
+ FIXPTR(minix_kerninfo.minix_ipcvecs->senda_ptr);
+ FIXPTR(minix_kerninfo.minix_ipcvecs->sendnb_ptr);
+ FIXPTR(minix_kerninfo.minix_ipcvecs->notify_ptr);
+ FIXPTR(minix_kerninfo.minix_ipcvecs->do_kernel_call_ptr);
+ FIXPTR(minix_kerninfo.minix_ipcvecs);
+
minix_kerninfo.kerninfo_magic = KERNINFO_MAGIC;
minix_kerninfo.minix_feature_flags = minix_feature_flags;
minix_kerninfo_user = (vir_bytes) FIXEDPTR(&minix_kerninfo);
+ /* if libc_ipc is set, disable usermapped ipc functions
+ * and force binaries to use in-libc fallbacks.
+ */
+ if(env_get("libc_ipc")) {
+ printf("kernel: forcing in-libc fallback ipc style\n");
+ minix_kerninfo.minix_ipcvecs = NULL;
+ } else {
+ minix_kerninfo.ki_flags |= MINIX_KIF_IPCVECS;
+ }
+
return OK;
}
/* Selected 386 tss offsets. */
#define TSS3_S_SP0 4
+IMPORT(usermapped_offset)
IMPORT(copr_not_available_handler)
IMPORT(params_size)
IMPORT(params_offset)
#define hwint_master(irq) \
TEST_INT_IN_KERNEL(4, 0f) ;\
\
- SAVE_PROCESS_CTX(0) ;\
+ SAVE_PROCESS_CTX(0, KTS_INT_HARD) ;\
push %ebp ;\
movl $0, %ebp /* for stack trace */ ;\
call _C_LABEL(context_stop) ;\
#define hwint_slave(irq) \
TEST_INT_IN_KERNEL(4, 0f) ;\
\
- SAVE_PROCESS_CTX(0) ;\
+ SAVE_PROCESS_CTX(0, KTS_INT_HARD) ;\
push %ebp ;\
movl $0, %ebp /* for stack trace */ ;\
call _C_LABEL(context_stop) ;\
/* Interrupt routine for irq 15 */
hwint_slave(15)
+/* differences with sysenter:
+ * - we have to find our own per-cpu stack (i.e. post-SYSCALL
+ * %esp is not configured)
+ * - we have to save the post-SYSRET %eip, provided by the cpu
+ * in %ecx
+ * - the system call parameters are passed in %ecx, so we userland
+ * code that executes SYSCALL copies %ecx to %edx. So the roles
+ * of %ecx and %edx are reversed
+ * - we can use %esi as a scratch register
+ */
+#define ipc_entry_syscall_percpu(cpu) ;\
+ENTRY(ipc_entry_syscall_cpu ## cpu) ;\
+ xchg %ecx, %edx ;\
+ mov k_percpu_stacks+4*cpu, %esi ;\
+ mov (%esi), %ebp ;\
+ movl $KTS_SYSCALL, P_KERN_TRAP_STYLE(%ebp) ;\
+ xchg %esp, %esi ;\
+ jmp syscall_sysenter_common
+
+ipc_entry_syscall_percpu(0)
+ipc_entry_syscall_percpu(1)
+ipc_entry_syscall_percpu(2)
+ipc_entry_syscall_percpu(3)
+ipc_entry_syscall_percpu(4)
+ipc_entry_syscall_percpu(5)
+ipc_entry_syscall_percpu(6)
+ipc_entry_syscall_percpu(7)
+
+ENTRY(ipc_entry_sysenter)
+ /* SYSENTER simply sets kernel segments, EIP to here, and ESP
+ * to tss->sp0 (through MSR). so no automatic context saving is done.
+ * interrupts are disabled.
+ *
+ * register usage:
+ * edi: call type (IPCVEC, KERVEC)
+ * ebx, eax, ecx: syscall params, set by userland
+ * esi, edx: esp, eip to restore, set by userland
+ *
+ * no state is automatically saved; userland does all of that.
+ */
+ mov (%esp), %ebp /* get proc saved by arch_finish_switch_to_user */
+
+ /* inform kernel we entered by sysenter and should
+ * therefore exit through restore_user_context_sysenter
+ */
+ movl $KTS_SYSENTER, P_KERN_TRAP_STYLE(%ebp)
+ add usermapped_offset, %edx /* compensate for mapping difference */
+
+syscall_sysenter_common:
+ mov %esi, SPREG(%ebp) /* esi is return esp */
+ mov %edx, PCREG(%ebp) /* edx is return eip */
+
+ /* check for call type; do_ipc? */
+ cmp $IPCVEC_UM, %edi
+ jz ipc_entry_common
+
+ /* check for kernel trap */
+ cmp $KERVEC_UM, %edi
+ jz kernel_call_entry_common
+
+ /* unrecognized call number; restore user with error */
+ movl $-1, AXREG(%ebp)
+ push %ebp
+ call restore_user_context /* restore_user_context(%ebp); */
+
/*
* IPC is only from a process to kernel
*/
-ENTRY(ipc_entry)
+ENTRY(ipc_entry_softint_orig)
+ SAVE_PROCESS_CTX(0, KTS_INT_ORIG)
+ jmp ipc_entry_common
- SAVE_PROCESS_CTX(0)
+ENTRY(ipc_entry_softint_um)
+ SAVE_PROCESS_CTX(0, KTS_INT_UM)
+ jmp ipc_entry_common
+ENTRY(ipc_entry_common)
/* save the pointer to the current process */
push %ebp
/*
* kernel call is only from a process to kernel
*/
-ENTRY(kernel_call_entry)
+ENTRY(kernel_call_entry_orig)
+ SAVE_PROCESS_CTX(0, KTS_INT_ORIG)
+ jmp kernel_call_entry_common
- SAVE_PROCESS_CTX(0)
+ENTRY(kernel_call_entry_um)
+ SAVE_PROCESS_CTX(0, KTS_INT_UM)
+ jmp kernel_call_entry_common
+ENTRY(kernel_call_entry_common)
/* save the pointer to the current process */
push %ebp
TEST_INT_IN_KERNEL(12, exception_entry_nested)
exception_entry_from_user:
- SAVE_PROCESS_CTX(8)
+ SAVE_PROCESS_CTX(8, KTS_INT_HARD)
/* stop user process cycles */
push %ebp
/* resume execution at the point of exception */
iret
-/*===========================================================================*/
-/* restart */
-/*===========================================================================*/
-ENTRY(restore_user_context)
+ENTRY(restore_user_context_sysenter)
+ /* return to userspace using sysexit.
+ * most of the context saving the userspace process is
+ * responsible for, we just have to take care of the right EIP
+ * and ESP restoring here to resume execution, and set EAX and
+ * EBX to the saved status values.
+ */
+ mov 4(%esp), %ebp /* retrieve proc ptr arg */
+ movw $USER_DS_SELECTOR, %ax
+ movw %ax, %ds
+ mov PCREG(%ebp), %edx /* sysexit restores EIP using EDX */
+ mov SPREG(%ebp), %ecx /* sysexit restores ESP using ECX */
+ mov AXREG(%ebp), %eax /* trap return value */
+ mov BXREG(%ebp), %ebx /* secondary return value */
+ sti /* enable interrupts */
+ sysexit /* jump to EIP in user */
+
+ENTRY(restore_user_context_syscall)
+ /* return to userspace using sysret.
+ * the procedure is very similar to sysexit; it requires
+ * manual %esp restoring, new EIP in ECX, does not require
+ * enabling interrupts, and of course sysret instead of sysexit.
+ */
+ mov 4(%esp), %ebp /* retrieve proc ptr arg */
+ mov PCREG(%ebp), %ecx /* sysret restores EIP using ECX */
+ mov SPREG(%ebp), %esp /* restore ESP directly */
+ mov AXREG(%ebp), %eax /* trap return value */
+ mov BXREG(%ebp), %ebx /* secondary return value */
+ sysret /* jump to EIP in user */
+
+ENTRY(restore_user_context_int)
mov 4(%esp), %ebp /* will assume P_STACKBASE == 0 */
/* reconstruct the stack for iret */
LABEL(copr_not_available)
TEST_INT_IN_KERNEL(4, copr_not_available_in_kernel)
cld /* set direction flag to a known value */
- SAVE_PROCESS_CTX(0)
+ SAVE_PROCESS_CTX(0, KTS_INT_HARD)
/* stop user process cycles */
push %ebp
mov $0, %ebp
.data
.short 0x526F /* this must be the first data entry (magic #) */
+
.bss
k_initial_stack:
.space K_STACK_SIZE
member PSWREG p_reg.psw
member SPREG p_reg.sp
member P_CR3 p_seg.p_cr3
+member P_KERN_TRAP_STYLE p_seg.p_kern_trap_style
+member P_MAGIC p_magic
#include <string.h>
#include <assert.h>
+#include <minix/cpufeature.h>
#include <machine/multiboot.h>
#include "kernel/kernel.h"
struct gatedesc_s idt[IDT_SIZE] __aligned(DESC_SIZE);
struct tss_s tss[CONFIG_MAX_CPUS];
+u32_t k_percpu_stacks[CONFIG_MAX_CPUS];
+
int prot_init_done = 0;
phys_bytes vir2phys(void *vir)
{ alignment_check, ALIGNMENT_CHECK_VECTOR, INTR_PRIVILEGE },
{ machine_check, MACHINE_CHECK_VECTOR, INTR_PRIVILEGE },
{ simd_exception, SIMD_EXCEPTION_VECTOR, INTR_PRIVILEGE },
- { ipc_entry, IPC_VECTOR, USER_PRIVILEGE },
- { kernel_call_entry, KERN_CALL_VECTOR, USER_PRIVILEGE },
+ { ipc_entry_softint_orig, IPC_VECTOR_ORIG, USER_PRIVILEGE },
+ { kernel_call_entry_orig, KERN_CALL_VECTOR_ORIG, USER_PRIVILEGE },
+ { ipc_entry_softint_um, IPC_VECTOR_UM, USER_PRIVILEGE },
+ { kernel_call_entry_um, KERN_CALL_VECTOR_UM, USER_PRIVILEGE },
{ NULL, 0, 0}
};
* make space for process pointer and cpu id and point to the first
* usable word
*/
- t->sp0 = ((unsigned) kernel_stack) - X86_STACK_TOP_RESERVED;
+ k_percpu_stacks[cpu] = t->sp0 = ((unsigned) kernel_stack) - X86_STACK_TOP_RESERVED;
/*
* set the cpu id at the top of the stack so we know on which cpu is
* this stak in use when we trap to kernel
*/
*((reg_t *)(t->sp0 + 1 * sizeof(reg_t))) = cpu;
+ /* Set up Intel SYSENTER support if available. */
+ if(minix_feature_flags & MKF_I386_INTEL_SYSENTER) {
+ ia32_msr_write(INTEL_MSR_SYSENTER_CS, 0, KERN_CS_SELECTOR);
+ ia32_msr_write(INTEL_MSR_SYSENTER_ESP, 0, t->sp0);
+ ia32_msr_write(INTEL_MSR_SYSENTER_EIP, 0, (u32_t) ipc_entry_sysenter);
+ }
+
+ /* Set up AMD SYSCALL support if available. */
+ if(minix_feature_flags & MKF_I386_AMD_SYSCALL) {
+ u32_t msr_lo, msr_hi;
+
+ /* set SYSCALL ENABLE bit in EFER MSR */
+ ia32_msr_read(AMD_MSR_EFER, &msr_hi, &msr_lo);
+ msr_lo |= AMD_EFER_SCE;
+ ia32_msr_write(AMD_MSR_EFER, msr_hi, msr_lo);
+
+ /* set STAR register value */
+#define set_star_cpu(forcpu) if(cpu == forcpu) { \
+ ia32_msr_write(AMD_MSR_STAR, \
+ ((u32_t)USER_CS_SELECTOR << 16) | (u32_t)KERN_CS_SELECTOR, \
+ (u32_t) ipc_entry_syscall_cpu ## forcpu); }
+ set_star_cpu(0);
+ set_star_cpu(1);
+ set_star_cpu(2);
+ set_star_cpu(3);
+ set_star_cpu(4);
+ set_star_cpu(5);
+ set_star_cpu(6);
+ set_star_cpu(7);
+ assert(CONFIG_MAX_CPUS <= 8);
+ }
+
return SEG_SELECTOR(index);
}
{
extern char k_boot_stktop;
+ if(_cpufeature(_CPUF_I386_SYSENTER))
+ minix_feature_flags |= MKF_I386_INTEL_SYSENTER;
+ if(_cpufeature(_CPUF_I386_SYSCALL))
+ minix_feature_flags |= MKF_I386_AMD_SYSCALL;
+
memset(gdt, 0, sizeof(gdt));
memset(idt, 0, sizeof(idt));
* displ is the stack displacement. In case of an exception, there are two extra
* value on the stack - error code and the exception number
*/
-#define SAVE_PROCESS_CTX(displ) \
+#define SAVE_PROCESS_CTX(displ, trapcode) \
\
cld /* set the direction flag to a known state */ ;\
\
movl (CURR_PROC_PTR + 4 + displ)(%esp), %ebp ;\
\
SAVE_GP_REGS(%ebp) ;\
+ movl $trapcode, P_KERN_TRAP_STYLE(%ebp) ;\
pop %esi /* get the orig %ebp and save it */ ;\
mov %esi, BPREG(%ebp) ;\
\
--- /dev/null
+#include "kernel.h"
+#include "arch_proto.h"
+
+struct minix_ipcvecs minix_ipcvecs_softint = {
+ .send_ptr = usermapped_send_softint,
+ .receive_ptr = usermapped_receive_softint,
+ .sendrec_ptr = usermapped_sendrec_softint,
+ .sendnb_ptr = usermapped_sendnb_softint,
+ .notify_ptr = usermapped_notify_softint,
+ .do_kernel_call_ptr = usermapped_do_kernel_call_softint,
+ .senda_ptr = usermapped_senda_softint
+};
+
+struct minix_ipcvecs minix_ipcvecs_sysenter = {
+ .send_ptr = usermapped_send_sysenter,
+ .receive_ptr = usermapped_receive_sysenter,
+ .sendrec_ptr = usermapped_sendrec_sysenter,
+ .sendnb_ptr = usermapped_sendnb_sysenter,
+ .notify_ptr = usermapped_notify_sysenter,
+ .do_kernel_call_ptr = usermapped_do_kernel_call_sysenter,
+ .senda_ptr = usermapped_senda_sysenter
+};
+
+struct minix_ipcvecs minix_ipcvecs_syscall = {
+ .send_ptr = usermapped_send_syscall,
+ .receive_ptr = usermapped_receive_syscall,
+ .sendrec_ptr = usermapped_sendrec_syscall,
+ .sendnb_ptr = usermapped_sendnb_syscall,
+ .notify_ptr = usermapped_notify_syscall,
+ .do_kernel_call_ptr = usermapped_do_kernel_call_syscall,
+ .senda_ptr = usermapped_senda_syscall
+};
+
--- /dev/null
+#include <minix/ipcconst.h>
+#include <machine/asm.h>
+
+/**========================================================================* */
+/* IPC assembly routines * */
+/**========================================================================* */
+/* all message passing routines save ebx, but destroy eax and ecx. */
+
+#define IPCFUNC(name,SETARGS,VEC,POSTTRAP) \
+ENTRY(usermapped_ ## name ## _softint) ;\
+ push %ebp ;\
+ movl %esp, %ebp ;\
+ push %ebx ;\
+ SETARGS ;\
+ int $VEC /* trap to the kernel */ ;\
+ mov %ebx, %ecx /* save %ebx */ ;\
+ POSTTRAP ;\
+ pop %ebx ;\
+ pop %ebp ;\
+ ret ;\
+ENTRY(usermapped_ ## name ## _sysenter) ;\
+ push %ebp ;\
+ movl %esp, %ebp ;\
+ push %ebp ;\
+ push %edx ;\
+ push %ebx ;\
+ push %esi ;\
+ push %edi ;\
+ movl %esp, %esi /* kernel uses %esi for restored %esp */;\
+ movl $0f, %edx /* kernel uses %edx for restored %eip */;\
+ movl $VEC, %edi /* %edi to distinguish ipc/kerncall */ ;\
+ SETARGS /* call-specific register setup */ ;\
+ sysenter /* disappear into kernel */ ;\
+0: ;\
+ mov %ebx, %ecx /* return w. state mangled; save %ebx */;\
+ pop %edi ;\
+ pop %esi ;\
+ pop %ebx ;\
+ pop %edx ;\
+ pop %ebp ;\
+ POSTTRAP ;\
+ pop %ebp ;\
+ ret ;\
+ENTRY(usermapped_ ## name ## _syscall) ;\
+ push %ebp ;\
+ movl %esp, %ebp ;\
+ push %ebp ;\
+ push %edx ;\
+ push %ebx ;\
+ push %esi ;\
+ push %edi ;\
+ movl $VEC, %edi /* %edi to distinguish ipc/kerncall */ ;\
+ SETARGS /* call-specific register setup */ ;\
+ movl %ecx, %edx /* %ecx is clobbered by SYSCALL */ ;\
+ syscall /* disappear into kernel */ ;\
+ mov %ebx, %ecx /* return w. state mangled; save %ebx */;\
+ pop %edi ;\
+ pop %esi ;\
+ pop %ebx ;\
+ pop %edx ;\
+ pop %ebp ;\
+ POSTTRAP ;\
+ pop %ebp ;\
+ ret
+
+#define IPCARGS(opcode) \
+ movl 8(%ebp), %eax /* eax = dest-src */ ;\
+ movl 12(%ebp), %ebx /* ebx = message pointer */ ;\
+ movl $opcode, %ecx ;\
+
+#define SENDA_ARGS \
+ movl 12(%ebp), %eax /* eax = count */ ;\
+ movl 8(%ebp), %ebx /* ebx = table */ ;\
+ movl $SENDA, %ecx ;\
+
+#define GETSTATUS \
+ push %eax ;\
+ movl 16(%ebp), %eax /* ecx = saved %ebx */ ;\
+ movl %ecx, (%eax) ;\
+ pop %eax
+
+#define KERNARGS mov 8(%ebp), %eax
+
+IPCFUNC(send,IPCARGS(SEND),IPCVEC_UM,)
+IPCFUNC(receive,IPCARGS(RECEIVE),IPCVEC_UM,GETSTATUS)
+IPCFUNC(sendrec,IPCARGS(SENDREC),IPCVEC_UM,)
+IPCFUNC(sendnb,IPCARGS(SENDNB),IPCVEC_UM,)
+IPCFUNC(notify,IPCARGS(NOTIFY),IPCVEC_UM,)
+IPCFUNC(senda,SENDA_ARGS,IPCVEC_UM,)
+IPCFUNC(do_kernel_call,KERNARGS,KERVEC_UM,)
+
+.data
+LABEL(usermapped_offset)
+.space 4
get_cpulocal_var(proc_ptr) = rp;
/* Process isn't scheduled until VM has set up a pagetable for it. */
- if(rp->p_nr != VM_PROC_NR && rp->p_nr >= 0)
+ if(rp->p_nr != VM_PROC_NR && rp->p_nr >= 0) {
rp->p_rts_flags |= RTS_VMINHIBIT;
+ rp->p_rts_flags |= RTS_BOOTINHIBIT;
+ }
rp->p_rts_flags |= RTS_PROC_STOP;
rp->p_rts_flags &= ~RTS_SLOT_FREE;
pick a new one. Process was dequeued and
should be enqueued at the end of some run
queue again */
+#define RTS_BOOTINHIBIT 0x10000 /* not ready until VM has made it */
/* A process is runnable iff p_rts_flags == 0. */
#define rts_f_is_runnable(flg) ((flg) == 0)
void dequeue(struct proc *rp);
void switch_to_user(void);
void arch_proc_reset(struct proc *rp);
+void arch_proc_setcontext(struct proc *rp, struct stackframe_s *state, int user);
struct proc * arch_finish_switch_to_user(void);
struct proc *endpoint_lookup(endpoint_t ep);
#if DEBUG_ENABLE_IPC_WARNINGS
#endif
/* Restore the registers. */
- memcpy(&rp->p_reg, &sc.sc_regs, sizeof(sigregs));
+ arch_proc_setcontext(rp, &sc.sc_regs, 1);
#if defined(__i386__)
if(sc.sc_flags & MF_FPU_INITIALIZED)
{
}
#endif
- rp->p_misc_flags |= MF_CONTEXT_SET;
-
return(OK);
}
#endif /* USE_SIGRETURN */
/* VM says: forget about old mappings we have cached. */
mem_clear_mapcache();
return OK;
+ case VMCTL_BOOTINHIBIT_CLEAR:
+ RTS_UNSET(p, RTS_BOOTINHIBIT);
+ return OK;
}
/* Try architecture-specific vmctls. */
/* IPC assembly routines * */
/**========================================================================* */
/* all message passing routines save ebx, but destroy eax and ecx. */
-ENTRY(_send)
+ENTRY(_send_orig)
push %ebp
movl %esp, %ebp
push %ebx
movl SRC_DST(%ebp), %eax /* eax = dest-src */
movl MESSAGE(%ebp), %ebx /* ebx = message pointer */
movl $SEND, %ecx /* _send(dest, ptr) */
- int $IPCVEC /* trap to the kernel */
+ int $IPCVEC_ORIG /* trap to the kernel */
pop %ebx
pop %ebp
ret
-ENTRY(_receive)
+ENTRY(_receive_orig)
push %ebp
movl %esp, %ebp
push %ebx
movl SRC_DST(%ebp), %eax /* eax = dest-src */
movl MESSAGE(%ebp), %ebx /* ebx = message pointer */
movl $RECEIVE, %ecx /* _receive(src, ptr) */
- int $IPCVEC /* trap to the kernel */
+ int $IPCVEC_ORIG /* trap to the kernel */
movl STATUS(%ebp), %ecx /* ecx = status pointer */
movl %ebx, (%ecx)
pop %ebx
pop %ebp
ret
-ENTRY(_sendrec)
+ENTRY(_sendrec_orig)
push %ebp
movl %esp, %ebp
push %ebx
movl SRC_DST(%ebp), %eax /* eax = dest-src */
movl MESSAGE(%ebp), %ebx /* ebx = message pointer */
movl $SENDREC, %ecx /* _sendrec(srcdest, ptr) */
- int $IPCVEC /* trap to the kernel */
+ int $IPCVEC_ORIG /* trap to the kernel */
pop %ebx
pop %ebp
ret
movl $0, %eax
movl $0, %ebx
movl $MINIX_KERNINFO, %ecx
- int $IPCVEC /* trap to the kernel */
+ int $IPCVEC_ORIG /* trap to the kernel */
movl 8(%ebp), %ecx /* ecx = return struct ptr */
movl %ebx, (%ecx)
pop %ebx
pop %ebp
ret
-ENTRY(_notify)
+ENTRY(_notify_orig)
push %ebp
movl %esp, %ebp
push %ebx
movl SRC_DST(%ebp), %eax /* eax = destination */
movl $NOTIFY, %ecx /* _notify(srcdst) */
- int $IPCVEC /* trap to the kernel */
+ int $IPCVEC_ORIG /* trap to the kernel */
pop %ebx
pop %ebp
ret
-ENTRY(_sendnb)
+ENTRY(_sendnb_orig)
push %ebp
movl %esp, %ebp
push %ebx
movl SRC_DST(%ebp), %eax /* eax = dest-src */
movl MESSAGE(%ebp), %ebx /* ebx = message pointer */
movl $SENDNB, %ecx /* _sendnb(dest, ptr) */
- int $IPCVEC /* trap to the kernel */
+ int $IPCVEC_ORIG /* trap to the kernel */
pop %ebx
pop %ebp
ret
-ENTRY(_do_kernel_call)
+ENTRY(_do_kernel_call_orig)
/* pass the message pointer to kernel in the %eax register */
movl 4(%esp), %eax
- int $KERVEC
+ int $KERVEC_ORIG
ret
MSGTAB = 8 /* message table */
TABCOUNT = 12 /* number of entries in message table */
-ENTRY(_senda)
+ENTRY(_senda_orig)
push %ebp
movl %esp, %ebp
push %ebx
#include <stdio.h>
#include <minix/ipc.h>
+/* Minix kernel info, IPC functions pointers */
struct minix_kerninfo *_minix_kerninfo = NULL;
void __minix_init(void) __attribute__((__constructor__, __used__));
+struct minix_ipcvecs _minix_ipcvecs = {
+ .sendrec_ptr = _sendrec_orig,
+ .send_ptr = _send_orig,
+ .notify_ptr = _notify_orig,
+ .senda_ptr = _senda_orig,
+ .sendnb_ptr = _sendnb_orig,
+ .receive_ptr = _receive_orig,
+ .do_kernel_call_ptr = _do_kernel_call_orig,
+};
+
void __minix_init(void)
{
if((_minix_kernel_info_struct(&_minix_kerninfo)) != 0
|| _minix_kerninfo->kerninfo_magic != KERNINFO_MAGIC) {
_minix_kerninfo = NULL;
- }
+ } else if((_minix_kerninfo->ki_flags & MINIX_KIF_IPCVECS) &&
+ _minix_kerninfo->minix_ipcvecs) {
+ _minix_ipcvecs = *_minix_kerninfo->minix_ipcvecs;
+ }
}
int status;
msgptr->m_type = syscallnr;
- status = _sendrec(who, msgptr);
+ status = sendrec(who, msgptr);
if (status != 0) {
/* 'sendrec' itself failed. */
/* XXX - strerror doesn't know all the codes */
#include <minix/com.h>
#include <minix/callnr.h>
#include <minix/vm.h>
+#include <minix/ipc.h>
#include <minix/syslib.h>
#include <sys/mman.h>
#include <machine/elf.h>
#include <minix/minlib.h>
#include <minix/cpufeature.h>
#include <machine/vm.h>
+#include <string.h>
int _cpufeature(int cpufeature)
{
u32_t eax, ebx, ecx, edx;
+ u32_t ef_eax = 0, ef_ebx = 0, ef_ecx = 0, ef_edx = 0;
+ unsigned int family, model, stepping;
+ int is_intel = 0, is_amd = 0;
eax = ebx = ecx = edx = 0;
eax = 0;
_cpuid(&eax, &ebx, &ecx, &edx);
if(eax > 0) {
+ char vendor[12];
+ memcpy(vendor, &ebx, sizeof(ebx));
+ memcpy(vendor+4, &edx, sizeof(edx));
+ memcpy(vendor+8, &ecx, sizeof(ecx));
+ if(!strncmp(vendor, "GenuineIntel", sizeof(vendor)))
+ is_intel = 1;
+ if(!strncmp(vendor, "AuthenticAMD", sizeof(vendor)))
+ is_amd = 1;
eax = 1;
_cpuid(&eax, &ebx, &ecx, &edx);
+ } else return 0;
+
+ stepping = eax & 0xf;
+ model = (eax >> 4) & 0xf;
+
+ if(model == 0xf || model == 0x6) {
+ model += ((eax >> 16) & 0xf) << 4;
+ }
+
+ family = (eax >> 8) & 0xf;
+
+ if(family == 0xf) {
+ family += (eax >> 20) & 0xff;
+ }
+
+ if(is_amd) {
+ ef_eax = 0x80000001;
+ _cpuid(&ef_eax, &ef_ebx, &ef_ecx, &ef_edx);
}
switch(cpufeature) {
return edx & CPUID1_EDX_HTT;
case _CPUF_I386_HTT_MAX_NUM:
return (ebx >> 16) & 0xff;
+ case _CPUF_I386_SYSENTER:
+ if(!is_intel) return 0;
+ if(!(edx & CPUID1_EDX_SYSENTER)) return 0;
+ if(family == 6 && model < 3 && stepping < 3) return 0;
+ return 1;
+ case _CPUF_I386_SYSCALL:
+ if(!is_amd) return 0;
+ if(!(ef_edx & CPUID_EF_EDX_SYSENTER)) return 0;
+ return 1;
}
return 0;
int _kernel_call(int syscallnr, message *msgptr)
{
msgptr->m_type = syscallnr;
- _do_kernel_call(msgptr);
+ do_kernel_call(msgptr);
return(msgptr->m_type);
}
int status;
msgptr->m_type = syscallnr;
- status = _sendrec(who, msgptr);
+ status = sendrec(who, msgptr);
if (status != 0) return(status);
return(msgptr->m_type);
}
if(sys_exec(vmp->vm_endpoint, (char *) execi->stack_high - 12,
(char *) ip->proc_name, execi->pc) != OK)
panic("vm: boot process exec of %d failed\n", vmp->vm_endpoint);
+
+ /* make it runnable */
+ if(sys_vmctl(vmp->vm_endpoint, VMCTL_BOOTINHIBIT_CLEAR, 0) != OK)
+ panic("VMCTL_BOOTINHIBIT_CLEAR failed");
}
void init_vm(void)
int s, i;
static struct memory mem_chunks[NR_MEMS];
static struct boot_image *ip;
+ extern void __minix_init(void);
#if SANITYCHECKS
incheck = nocheck = 0;
/* Initialize the structures for queryexit */
init_query_exit();
+
+ /* Acquire kernel ipc vectors that weren't available
+ * before VM had determined kernel mappings
+ */
+ __minix_init();
}
/*===========================================================================*