From: Tomas Hruby Date: Wed, 15 Sep 2010 14:09:52 +0000 (+0000) Subject: SMP - We boot APs X-Git-Tag: v3.2.0~866 X-Git-Url: http://zhaoyanbai.com/repos/icons/debian/static/datamaps.china.min.js?a=commitdiff_plain;h=62c666566eb002ec293767af87e8a31dcfffec05;p=minix.git SMP - We boot APs - kernel detects CPUs by searching ACPI tables for local apic nodes - each CPU has its own TSS that points to its own stack. All cpus boot on the same boot stack (in sequence) but switch to its private stack as soon as they can. - final booting code in main() placed in bsp_finish_booting() which is executed only after the BSP switches to its final stack - apic functions to send startup interrupts - assembler functions to handle CPU features not needed for single cpu mode like memory barries, HT detection etc. - new files kernel/smp.[ch], kernel/arch/i386/arch_smp.c and kernel/arch/i386/include/arch_smp.h - 16-bit trampoline code for the APs. It is executed by each AP after receiving startup IPIs it brings up the CPUs to 32bit mode and let them spin in an infinite loop so they don't do any damage. - implementation of kernel spinlock - CONFIG_SMP and CONFIG_MAX_CPUS set by the build system --- diff --git a/Makefile b/Makefile index b9856cb58..c16526afe 100644 --- a/Makefile +++ b/Makefile @@ -30,12 +30,6 @@ usage: # 'make install' target. # # etcfiles has to be done first. -.if ${COMPILER_TYPE} == "ack" -world: mkfiles includes depend libraries install etcforce -.elif ${COMPILER_TYPE} == "gnu" -world: mkfiles includes depend gnu-libraries install etcforce -.endif - mkfiles: make -C share/mk install diff --git a/include/arch/i386/archtypes.h b/include/arch/i386/archtypes.h index 31f3ba9f3..48626018c 100644 --- a/include/arch/i386/archtypes.h +++ b/include/arch/i386/archtypes.h @@ -41,5 +41,7 @@ struct fpu_state_s { #define INMEMORY(p) (!p->p_seg.p_cr3 || get_cpulocal_var(ptproc) == p) +typedef u32_t atomic_t; /* access to an aligned 32bit value is atomic on i386 */ + #endif /* #ifndef _I386_TYPES_H */ diff --git a/kernel/Makefile b/kernel/Makefile index 0f8d295e3..780c414e0 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -9,9 +9,15 @@ SRCS+= start.c table.c main.c proc.c \ system.c clock.c utility.c debug.c profile.c interrupt.c \ watchdog.c cpulocals.c +.ifdef CONFIG_SMP +SRCS += smp.c +.endif + DPADD+= ${LIBTIMERS} ${LIBSYS} LDADD+= -ltimers -lsys +CFLAGS += -D__kernel__ + .if ${COMPILER_TYPE} == "ack" LDFLAGS+= -.o .elif ${COMPILER_TYPE} == "gnu" diff --git a/kernel/arch/i386/Makefile.inc b/kernel/arch/i386/Makefile.inc index c2b7d279b..5e6972e3e 100644 --- a/kernel/arch/i386/Makefile.inc +++ b/kernel/arch/i386/Makefile.inc @@ -33,10 +33,18 @@ SRCS+= arch_do_vmctl.c \ pre_init.c \ acpi.c +.ifdef CONFIG_SMP +SRCS += arch_smp.c trampoline.S +.endif + + .if ${COMPILER_TYPE} == "ack" I86CPPFLAGS = -mi86 I86LDFLAGS = -mi86 CPPFLAGS.klib16.S = ${I86CPPFLAGS} LDFLAGS.klib16.S = ${I86LDFLAGS} + +CPPFLAGS.trampoline.S = ${I86CPPFLAGS} +LDFLAGS.trampoline.S = ${I86LDFLAGS} .endif diff --git a/kernel/arch/i386/acpi.c b/kernel/arch/i386/acpi.c index 159f15b98..c0d319f87 100644 --- a/kernel/arch/i386/acpi.c +++ b/kernel/arch/i386/acpi.c @@ -243,3 +243,34 @@ PUBLIC struct acpi_madt_ioapic * acpi_get_ioapic_next(void) return ret; } + +PUBLIC struct acpi_madt_lapic * acpi_get_lapic_next(void) +{ + static unsigned idx = 0; + static struct acpi_madt_hdr * madt_hdr; + + struct acpi_madt_lapic * ret; + + if (idx == 0) { + madt_hdr = (struct acpi_madt_hdr *) + phys2vir(acpi_get_table_base("APIC")); + if (madt_hdr == NULL) + return NULL; + } + + for (;;) { + ret = (struct acpi_madt_lapic *) + acpi_madt_get_typed_item(madt_hdr, + ACPI_MADT_TYPE_LAPIC, idx); + if (!ret) + break; + + idx++; + + /* report only usable CPUs */ + if (ret->flags & 1) + break; + } + + return ret; +} diff --git a/kernel/arch/i386/acpi.h b/kernel/arch/i386/acpi.h index e2cf35217..e1678f4c0 100644 --- a/kernel/arch/i386/acpi.h +++ b/kernel/arch/i386/acpi.h @@ -90,5 +90,7 @@ _PROTOTYPE(void acpi_init, (void)); * this function thus no memory needs to be freed */ _PROTOTYPE(struct acpi_madt_ioapic * acpi_get_ioapic_next, (void)); +/* same as above for local APICs */ +_PROTOTYPE(struct acpi_madt_lapic * acpi_get_lapic_next, (void)); #endif /* __ACPI_H__ */ diff --git a/kernel/arch/i386/apic.c b/kernel/arch/i386/apic.c index 3fd7e68bf..29ad02cee 100644 --- a/kernel/arch/i386/apic.c +++ b/kernel/arch/i386/apic.c @@ -106,7 +106,6 @@ #define SPL0 0x0 #define SPLHI 0xF -#define cpu_is_bsp(x) 1 PUBLIC struct io_apic io_apic[MAX_NR_IOAPICS]; PUBLIC unsigned nioapics; @@ -124,6 +123,22 @@ struct irq { PRIVATE struct irq io_apic_irq[NR_IRQ_VECTORS]; +/* + * to make APIC work if SMP is not configured, we need to set the maximal number + * of CPUS to 1, cpuid to return 0 and the current cpu is always BSP + */ +#ifndef CONFIG_SMP +/* this is always true on an uniprocessor */ +#define cpu_is_bsp(x) 1 + +#else + +#include "kernel/smp.h" + +#endif + +#include "kernel/spinlock.h" + #define lapic_write_icr1(val) lapic_write(LAPIC_ICR1, val) #define lapic_write_icr2(val) lapic_write(LAPIC_ICR2, val) @@ -131,12 +146,15 @@ PRIVATE struct irq io_apic_irq[NR_IRQ_VECTORS]; #define lapic_read_icr1(x) lapic_read(LAPIC_ICR1) #define lapic_read_icr2(x) lapic_read(LAPIC_ICR2) +#define is_boot_apic(apicid) ((apicid) == bsp_lapic_id) + #define VERBOSE_APIC(x) x PUBLIC int ioapic_enabled; PUBLIC u32_t lapic_addr_vaddr; PUBLIC vir_bytes lapic_addr; PUBLIC vir_bytes lapic_eoi_addr; +PUBLIC int bsp_lapic_id; PRIVATE volatile unsigned probe_ticks; PRIVATE u64_t tsc0, tsc1; @@ -171,8 +189,8 @@ PRIVATE void ioapic_write(u32_t ioa_base, u8_t reg, u32_t val) *((u32_t *)(ioa_base + IOAPIC_IOWIN)) = val; } -FORWARD _PROTOTYPE(void lapic_microsec_sleep, (unsigned count)); -FORWARD _PROTOTYPE(void apic_idt_init, (const int reset)); +_PROTOTYPE(void lapic_microsec_sleep, (unsigned count)); +_PROTOTYPE(void apic_idt_init, (const int reset)); PRIVATE void ioapic_enable_pin(vir_bytes ioapic_addr, int pin) { @@ -375,6 +393,16 @@ PUBLIC void ioapic_mask_irq(unsigned irq) irq_8259_mask(irq); } +PUBLIC unsigned int apicid(void) +{ + return lapic_read(LAPIC_ID); +} + +PUBLIC void ioapic_set_id(u32_t addr, unsigned int id) +{ + ioapic_write(addr, IOAPIC_ID, id << 24); +} + PRIVATE int calib_clk_handler(irq_hook_t * UNUSED(hook)) { u32_t tcrt; @@ -398,7 +426,7 @@ PRIVATE int calib_clk_handler(irq_hook_t * UNUSED(hook)) return 1; } -PRIVATE void apic_calibrate_clocks(void) +PRIVATE void apic_calibrate_clocks(unsigned cpu) { u32_t lvtt, val, lapic_delta; u64_t tsc_delta; @@ -462,7 +490,7 @@ PRIVATE void apic_calibrate_clocks(void) BOOT_VERBOSE(cpu_print_freq(cpuid)); } -PRIVATE void lapic_set_timer_one_shot(const u32_t value) +PUBLIC void lapic_set_timer_one_shot(const u32_t value) { /* sleep in micro seconds */ u32_t lvtt; @@ -508,10 +536,11 @@ PUBLIC void lapic_stop_timer(void) lapic_write(LAPIC_LVTTR, lvtt | APIC_LVTT_MASK); } -PRIVATE void lapic_microsec_sleep(unsigned count) +PUBLIC void lapic_microsec_sleep(unsigned count) { lapic_set_timer_one_shot(count); - while (lapic_read(LAPIC_TIMER_CCR)); + while (lapic_read(LAPIC_TIMER_CCR)) + arch_pause(); } PRIVATE u32_t lapic_errstatus(void) @@ -541,7 +570,10 @@ PUBLIC void lapic_disable(void) if (!lapic_addr) return; - if (!apic_imcrp) { +#ifdef CONFIG_SMP + if (cpu_is_bsp(cpuid) && !apic_imcrp) +#endif + { /* leave it enabled if imcr is not set */ val = lapic_read(LAPIC_LINT0); val &= ~(APIC_ICR_DM_MASK|APIC_ICR_INT_MASK); @@ -591,14 +623,9 @@ PRIVATE int lapic_enable_in_msr(void) return 1; } -PUBLIC int lapic_enable(void) +PUBLIC int lapic_enable(unsigned cpu) { u32_t val, nlvt; -#if 0 - u32_t timeout = 0xFFFF; - u32_t errstatus = 0; -#endif - unsigned cpu = cpuid; if (!lapic_addr) return 0; @@ -629,8 +656,6 @@ PUBLIC int lapic_enable(void) apic_eoi(); - cpu = cpuid; - /* Program Logical Destination Register. */ val = lapic_read(LAPIC_LDR) & ~0xFF000000; val |= (cpu & 0xFF) << 24; @@ -663,7 +688,7 @@ PUBLIC int lapic_enable(void) (void) lapic_read (LAPIC_SIVR); apic_eoi(); - apic_calibrate_clocks(); + apic_calibrate_clocks(cpu); BOOT_VERBOSE(printf("APIC timer calibrated\n")); return 1; @@ -785,13 +810,14 @@ PRIVATE void lapic_set_dummy_handlers(void) #endif /* Build descriptors for interrupt gates in IDT. */ -PRIVATE void apic_idt_init(const int reset) +PUBLIC void apic_idt_init(const int reset) { u32_t val; /* Set up idt tables for smp mode. */ vir_bytes local_timer_intr_handler; + int is_bsp = is_boot_apic(apicid()); if (reset) { idt_copy_vectors(gate_table_pic); @@ -825,7 +851,7 @@ PRIVATE void apic_idt_init(const int reset) (void) lapic_read(LAPIC_LVTER); /* configure the timer interupt handler */ - if (cpu_is_bsp(cpuid)) { + if (is_bsp) { local_timer_intr_handler = (vir_bytes) lapic_bsp_timer_int_handler; BOOT_VERBOSE(printf("Initiating BSP timer handler\n")); } else { @@ -865,7 +891,7 @@ PRIVATE int acpi_get_ioapics(struct io_apic * ioa, unsigned * nioa, unsigned max return n; } -PRIVATE int detect_ioapics(void) +PUBLIC int detect_ioapics(void) { int status; @@ -874,11 +900,130 @@ PRIVATE int detect_ioapics(void) if (!status) { /* try something different like MPS */ } - - printf("nioapics %d\n", nioapics); return status; } +#ifdef CONFIG_SMP + +PUBLIC int apic_send_startup_ipi(unsigned cpu, phys_bytes trampoline) +{ + int timeout; + u32_t errstatus = 0; + int i; + + /* INIT-SIPI-SIPI sequence */ + + for (i = 0; i < 2; i++) { + u32_t val; + lapic_errstatus(); + + /* set target pe */ + val = lapic_read(LAPIC_ICR2) & 0xFFFFFF; + val |= cpuid2apicid[cpu] << 24; + lapic_write(LAPIC_ICR2, val); + + /* send SIPI */ + val = lapic_read(LAPIC_ICR1) & 0xFFF32000; + val |= APIC_ICR_LEVEL_ASSERT |APIC_ICR_DM_STARTUP; + val |= (((u32_t)trampoline >> 12)&0xff); + lapic_write(LAPIC_ICR1, val); + + timeout = 1000; + + /* wait for 200 micro-seconds*/ + lapic_microsec_sleep (200); + errstatus = 0; + + while ((lapic_read(LAPIC_ICR1) & APIC_ICR_DELIVERY_PENDING) && !errstatus) + { + errstatus = lapic_errstatus(); + timeout--; + if (!timeout) break; + } + + /* skip this one and continue with another cpu */ + if (errstatus) + return -1; + } + + return 0; +} + +PUBLIC int apic_send_init_ipi(unsigned cpu, phys_bytes trampoline) +{ + u32_t ptr, errstatus = 0; + int timeout; + + /* set the warm reset vector */ + ptr = (u32_t)(trampoline & 0xF); + phys_copy(0x467, vir2phys(&ptr), sizeof(u16_t )); + ptr = (u32_t)(trampoline >> 4); + phys_copy(0x469, vir2phys(&ptr), sizeof(u16_t )); + + /* set shutdown code */ + outb (RTC_INDEX, 0xF); + outb (RTC_IO, 0xA); + + /* clear error state register. */ + (void) lapic_errstatus(); + + /* assert INIT IPI , No Shorthand, destination mode : physical */ + lapic_write(LAPIC_ICR2, (lapic_read (LAPIC_ICR2) & 0xFFFFFF) | + (cpuid2apicid[cpu] << 24)); + lapic_write(LAPIC_ICR1, (lapic_read (LAPIC_ICR1) & 0xFFF32000) | + APIC_ICR_DM_INIT | APIC_ICR_TM_LEVEL | APIC_ICR_LEVEL_ASSERT); + + timeout = 1000; + + /* sleep for 200 micro-seconds */ + lapic_microsec_sleep(200); + + errstatus = 0; + + while ((lapic_read(LAPIC_ICR1) & APIC_ICR_DELIVERY_PENDING) && !errstatus) { + errstatus = lapic_errstatus(); + timeout--; + if (!timeout) break; + } + + if (errstatus) + return -1; /* to continue with a new processor */ + + /* clear error state register. */ + lapic_errstatus(); + + /* deassert INIT IPI , No Shorthand, destination mode : physical */ + lapic_write(LAPIC_ICR2, (lapic_read (LAPIC_ICR2) & 0xFFFFFF) | + (cpuid2apicid[cpu] << 24)); + lapic_write(LAPIC_ICR1, (lapic_read (LAPIC_ICR1) & 0xFFF32000) | + APIC_ICR_DEST_ALL | APIC_ICR_TM_LEVEL | APIC_ICR_DM_INIT); + + timeout = 1000; + errstatus = 0; + + /* sleep for 200 micro-seconds */ + lapic_microsec_sleep(200); + + while ((lapic_read(LAPIC_ICR1)&APIC_ICR_DELIVERY_PENDING) && !errstatus) { + errstatus = lapic_errstatus(); + timeout--; + if(!timeout) break; + } + + if (errstatus) + return -1; /* with the new processor */ + + /* clear error state register. */ + (void) lapic_errstatus(); + + /* wait 10ms */ + lapic_microsec_sleep (10000); + + return 0; +} +#endif + +#ifndef CONFIG_SMP PUBLIC int apic_single_cpu_init(void) { if (!cpu_feature_apic_on_chip()) @@ -887,7 +1032,7 @@ PUBLIC int apic_single_cpu_init(void) lapic_addr = phys2vir(LOCAL_APIC_DEF_ADDR); ioapic_enabled = 0; - if (!lapic_enable()) { + if (!lapic_enable(0)) { lapic_addr = 0x0; return 0; } @@ -909,6 +1054,7 @@ PUBLIC int apic_single_cpu_init(void) idt_reload(); return 1; } +#endif PRIVATE eoi_method_t set_eoi_method(unsigned irq) { diff --git a/kernel/arch/i386/apic.h b/kernel/arch/i386/apic.h index 49adaa8b9..ca7f6a9c3 100644 --- a/kernel/arch/i386/apic.h +++ b/kernel/arch/i386/apic.h @@ -97,6 +97,8 @@ EXTERN vir_bytes lapic_addr; EXTERN vir_bytes lapic_eoi_addr; +EXTERN int ioapic_enabled; +EXTERN int bsp_lapic_id; #define MAX_NR_IOAPICS 32 #define MAX_IOAPIC_IRQS 64 @@ -118,9 +120,35 @@ EXTERN unsigned nioapics; EXTERN u32_t lapic_addr_vaddr; /* we remember the virtual address here until we switch to paging */ +_PROTOTYPE (int lapic_enable, (unsigned cpu)); + +EXTERN int ioapic_enabled; +EXTERN unsigned nioapics; + +_PROTOTYPE (void lapic_microsec_sleep, (unsigned count)); +_PROTOTYPE (void ioapic_disable_irqs, (u32_t irqs)); +_PROTOTYPE (void ioapic_enable_irqs, (u32_t irqs)); + +_PROTOTYPE (int lapic_enable, (unsigned cpu)); +_PROTOTYPE (void lapic_disable, (void)); + +_PROTOTYPE (void ioapic_disable_all, (void)); +_PROTOTYPE (int ioapic_enable_all, (void)); + +_PROTOTYPE(int detect_ioapics, (void)); +_PROTOTYPE(void apic_idt_init, (int reset)); + +#ifdef CONFIG_SMP +_PROTOTYPE(int apic_send_startup_ipi, (unsigned cpu, phys_bytes trampoline)); +_PROTOTYPE(int apic_send_init_ipi, (unsigned cpu, phys_bytes trampoline)); +_PROTOTYPE(unsigned int apicid, (void)); +_PROTOTYPE(void ioapic_set_id, (u32_t addr, unsigned int id)); +#else _PROTOTYPE(int apic_single_cpu_init, (void)); +#endif -_PROTOTYPE(void lapic_set_timer_periodic, (unsigned freq)); +_PROTOTYPE(void lapic_set_timer_periodic, (const unsigned freq)); +_PROTOTYPE(void lapic_set_timer_one_shot, (const u32_t value)); _PROTOTYPE(void lapic_stop_timer, (void)); _PROTOTYPE(void ioapic_set_irq, (unsigned irq)); @@ -141,7 +169,7 @@ _PROTOTYPE(void dump_apic_irq_state, (void)); #define lapic_read(what) (*((volatile u32_t *)((what)))) #define lapic_write(what, data) do { \ - (*((volatile u32_t *)((what)))) = data; \ + (*((volatile u32_t *)((what)))) = data; \ } while(0) #endif /* __ASSEMBLY__ */ diff --git a/kernel/arch/i386/apic_asm.S b/kernel/arch/i386/apic_asm.S index 8a9f7b87c..8faa33963 100644 --- a/kernel/arch/i386/apic_asm.S +++ b/kernel/arch/i386/apic_asm.S @@ -70,6 +70,30 @@ ENTRY(lapic_bsp_timer_int_handler) ENTRY(lapic_ap_timer_int_handler) lapic_intr(_C_LABEL(ap_timer_int_handler)) +#ifdef CONFIG_SMP +#include "arch_smp.h" + +/* FIXME dummy stubs */ +ENTRY(smp_ipi_sched) +1: jmp 1b + +ENTRY(smp_ipi_dequeue) +1: jmp 1b + +ENTRY(smp_ipi_stop) +1: jmp 1b + +ENTRY(smp_ipi_reboot) +1: jmp 1b + +ENTRY(smp_ipi_err_int) +1: jmp 1b + +ENTRY(smp_ipi_spv_int) +1: jmp 1b + +#endif /* CONFIG_SMP */ + #ifdef CONFIG_APIC_DEBUG .data @@ -86,7 +110,7 @@ lapic_intr_dummy_handler_msg: #define LAPIC_INTR_DUMMY_HANDLER(vect) \ .balign LAPIC_INTR_DUMMY_HANDLER_SIZE; \ - lapic_intr_dummy_handler_##vect: lapic_intr_dummy_handler(vect) + _lapic_intr_dummy_handler_##vect: lapic_intr_dummy_handler(vect) apic_hwint(0) apic_hwint(1) @@ -414,4 +438,3 @@ LABEL(lapic_intr_dummy_handles_end) #endif /* CONFIG_APIC_DEBUG */ - diff --git a/kernel/arch/i386/arch_clock.c b/kernel/arch/i386/arch_clock.c index 8646a32cc..e66f4c30a 100644 --- a/kernel/arch/i386/arch_clock.c +++ b/kernel/arch/i386/arch_clock.c @@ -9,6 +9,7 @@ #include "kernel/clock.h" #include "kernel/proc.h" #include +#include "glo.h" #ifdef CONFIG_APIC diff --git a/kernel/arch/i386/arch_smp.c b/kernel/arch/i386/arch_smp.c new file mode 100644 index 000000000..13daca166 --- /dev/null +++ b/kernel/arch/i386/arch_smp.c @@ -0,0 +1,239 @@ +/* This file contains essentially the MP handling code of the Minix kernel. + * + * Changes: + * Apr 1, 2008 Added SMP support. + */ + +#define _SMP + +#include "kernel/kernel.h" +#include "kernel/proc.h" +#include "arch_proto.h" +#include "kernel/glo.h" +#include +#include +#include +#include + +#include "kernel/spinlock.h" +#include "kernel/smp.h" +#include "apic.h" +#include "acpi.h" + +#include "glo.h" + +_PROTOTYPE(void trampoline, (void)); + +/* + * arguments for trampoline. We need to pass the logical cpu id, gdt and idt. + * They have to be in location which is reachable using absolute addressing in + * 16-bit mode + */ +extern volatile u32_t __ap_id; +extern volatile struct segdesc_s __ap_gdt, __ap_idt; + +extern u32_t busclock[CONFIG_MAX_CPUS]; +extern int panicking; + +static int ap_cpu_ready; + +/* there can be at most 255 local APIC ids, each fits in 8 bits */ +PRIVATE unsigned char apicid2cpuid[255]; +PUBLIC unsigned char cpuid2apicid[CONFIG_MAX_CPUS]; + +SPINLOCK_DEFINE(smp_cpu_lock) +SPINLOCK_DEFINE(dispq_lock) + +FORWARD _PROTOTYPE(void smp_init_vars, (void)); +FORWARD _PROTOTYPE(void smp_reinit_vars, (void)); + +PRIVATE void smp_start_aps(void) +{ + /* + * Find an address and align it to a 4k boundary. + */ + unsigned cpu; + u32_t biosresetvector; + phys_bytes trampoline_base = vir2phys(trampoline); + + /* TODO hack around the alignment problem */ + + phys_copy (0x467, vir2phys(&biosresetvector), sizeof(u32_t)); + + /* set the bios shutdown code to 0xA */ + outb(RTC_INDEX, 0xF); + outb(RTC_IO, 0xA); + + /* setup the warm reset vector */ + phys_copy(vir2phys(&trampoline_base), 0x467, sizeof(u32_t)); + + /* prepare gdt and idt for the new cpus */ + __ap_gdt = gdt[GDT_INDEX]; + __ap_idt = gdt[IDT_INDEX]; + + /* okay, we're ready to go. boot all of the ap's now. we loop through + * using the processor's apic id values. + */ + for (cpu = 0; cpu < ncpus; cpu++) { + printf("Booting cpu %d\n", cpu); + ap_cpu_ready = -1; + /* Don't send INIT/SIPI to boot cpu. */ + if((apicid() == cpuid2apicid[cpu]) && + (apicid() == bsp_lapic_id)) { + cpu_set_flag(cpu, CPU_IS_READY); + printf("Skiping bsp\n"); + continue; + } + + __ap_id = cpu; + if (apic_send_init_ipi(cpu, trampoline_base) || + apic_send_startup_ipi(cpu, trampoline_base)) { + printf("WARNING cannot boot cpu %d\n", cpu); + continue; + } + + /* wait for 5 secs for the processors to boot */ + lapic_set_timer_one_shot(5000000); + + while (lapic_read(LAPIC_TIMER_CCR)) { + if (ap_cpu_ready == cpu) { + printf("CPU %d is up\n", cpu); + cpu_set_flag(cpu, CPU_IS_READY); + break; + } + } + if (ap_cpu_ready == -1) { + printf("WARNING : CPU %d didn't boot\n", cpu); + } + } + + phys_copy(vir2phys(&biosresetvector),(phys_bytes)0x467,sizeof(u32_t)); + + outb(RTC_INDEX, 0xF); + outb(RTC_IO, 0); + + bsp_finish_booting(); + NOT_REACHABLE; +} + +PUBLIC void smp_halt_cpu (void) +{ + NOT_IMPLEMENTED; +} + +PUBLIC void smp_shutdown_aps (void) +{ + NOT_IMPLEMENTED; +} + +PRIVATE void ap_finish_booting(void) +{ + unsigned cpu = cpuid; + + printf("CPU %d says hello world!\n", cpu); + /* inform the world of our presence. */ + ap_cpu_ready = cpu; + + while(!i386_paging_enabled) + arch_pause(); + for(;;); + + /* finish processor initialisation. */ + lapic_enable(cpu); + + switch_to_user(); + NOT_REACHABLE; +} + +PUBLIC void smp_ap_boot(void) +{ + switch_k_stack((char *)get_k_stack_top(__ap_id) - + X86_STACK_TOP_RESERVED, ap_finish_booting); +} + +PRIVATE void smp_reinit_vars(void) +{ + int i; + lapic_addr = lapic_eoi_addr = 0; + ioapic_enabled = 0; + + ncpus = 1; +} + +PRIVATE void tss_init_all(void) +{ + unsigned cpu; + + for(cpu = 0; cpu < ncpus ; cpu++) + tss_init(cpu, get_k_stack_top(cpu)); +} + +PRIVATE int discover_cpus(void) +{ + struct acpi_madt_lapic * cpu; + + while (ncpus < CONFIG_MAX_CPUS && (cpu = acpi_get_lapic_next())) { + apicid2cpuid[cpu->apic_id] = ncpus; + cpuid2apicid[ncpus] = cpu->apic_id; + printf("CPU %3d local APIC id %3d\n", ncpus, cpu->apic_id); + ncpus++; + } + + return ncpus; +} + +PUBLIC void smp_init (void) +{ + /* read the MP configuration */ + if (!discover_cpus()) { + ncpus = 1; + goto uniproc_fallback; + } + + lapic_addr = phys2vir(LOCAL_APIC_DEF_ADDR); + ioapic_enabled = 0; + + tss_init_all(); + + /* + * we still run on the boot stack and we cannot use cpuid as its value + * wasn't set yet. apicid2cpuid initialized in mps_init() + */ + bsp_cpu_id = apicid2cpuid[apicid()]; + + if (!lapic_enable(bsp_cpu_id)) { + printf("ERROR : failed to initialize BSP Local APIC\n"); + goto uniproc_fallback; + } + + acpi_init(); + + if (!detect_ioapics()) { + lapic_disable(); + lapic_addr = 0x0; + goto uniproc_fallback; + } + + ioapic_enable_all(); + + if (ioapic_enabled) + machine.apic_enabled = 1; + + /* set smp idt entries. */ + apic_idt_init(0); /* Not a reset ! */ + idt_reload(); + + BOOT_VERBOSE(printf("SMP initialized\n")); + + switch_k_stack((char *)get_k_stack_top(bsp_cpu_id) - + X86_STACK_TOP_RESERVED, smp_start_aps); + + return; + +uniproc_fallback: + apic_idt_init(1); /* Reset to PIC idt ! */ + idt_reload(); + smp_reinit_vars (); /* revert to a single proc system. */ + intr_init (INTS_MINIX, 0); /* no auto eoi */ + printf("WARNING : SMP initialization failed\n"); +} diff --git a/kernel/arch/i386/arch_system.c b/kernel/arch/i386/arch_system.c index bb9daa654..bf0f39934 100644 --- a/kernel/arch/i386/arch_system.c +++ b/kernel/arch/i386/arch_system.c @@ -24,6 +24,8 @@ #include "kernel/debug.h" #include "multiboot.h" +#include "glo.h" + #ifdef CONFIG_APIC #include "apic.h" #endif @@ -43,6 +45,8 @@ extern void poweroff16_end(); /* set OSXMMEXCPT[bit 10] if we provide #XM handler. */ #define CR4_OSXMMEXCPT (1L<<10) +PUBLIC void * k_stacks; + FORWARD _PROTOTYPE( void ser_debug, (int c)); PUBLIC __dead void arch_monitor(void) @@ -198,23 +202,6 @@ PUBLIC void arch_get_aout_headers(const int i, struct exec *h) phys_copy(aout + i * A_MINHDR, vir2phys(h), (phys_bytes) A_MINHDR); } -PRIVATE void tss_init(struct tss_s * tss, void * kernel_stack, - const unsigned cpu) -{ - /* - * make space for process pointer and cpu id and point to the first - * usable word - */ - tss->sp0 = ((unsigned) kernel_stack) - 2 * sizeof(void *); - tss->ss0 = DS_SELECTOR; - - /* - * set the cpu id at the top of the stack so we know on which cpu is - * this stak in use when we trap to kernel - */ - *((reg_t *)(tss->sp0 + 1 * sizeof(reg_t))) = cpu; -} - PRIVATE void fpu_init(void) { unsigned short cw, sw; @@ -313,7 +300,20 @@ PUBLIC void arch_init(void) idt_init(); - tss_init(&tss, &k_boot_stktop, 0); + /* FIXME stupid a.out + * align the stacks in the stack are to the K_STACK_SIZE which is a + * power of 2 + */ + k_stacks = (void*) (((vir_bytes)&k_stacks_start + K_STACK_SIZE - 1) & + ~(K_STACK_SIZE - 1)); + +#ifndef CONFIG_SMP + /* + * use stack 0 and cpu id 0 on a single processor machine, SMP + * configuration does this in smp_init() for all cpus at once + */ + tss_init(0, get_k_stack_top(0)); +#endif acpi_init(); @@ -547,7 +547,11 @@ PUBLIC struct proc * arch_finish_switch_to_user(void) char * stk; struct proc * p; - stk = (char *)tss.sp0; +#ifdef CONFIG_SMP + stk = (char *)tss[cpuid].sp0; +#else + stk = (char *)tss[0].sp0; +#endif /* set pointer to the process to run on the stack */ p = get_cpulocal_var(proc_ptr); *((reg_t *)stk) = (reg_t) p; diff --git a/kernel/arch/i386/arch_watchdog.c b/kernel/arch/i386/arch_watchdog.c index 6becd4f69..8fce5c2dc 100644 --- a/kernel/arch/i386/arch_watchdog.c +++ b/kernel/arch/i386/arch_watchdog.c @@ -1,6 +1,7 @@ #include "kernel/kernel.h" #include "kernel/watchdog.h" #include "arch_proto.h" +#include "glo.h" #include #include diff --git a/kernel/arch/i386/glo.h b/kernel/arch/i386/glo.h index d056d5c20..aaa910899 100644 --- a/kernel/arch/i386/glo.h +++ b/kernel/arch/i386/glo.h @@ -1,7 +1,14 @@ #ifndef __GLO_X86_H__ #define __GLO_X86_H__ +#include "kernel/kernel.h" +#include "proto.h" + EXTERN int cpu_has_tsc; /* signal whether this cpu has time stamp register. This feature was introduced by Pentium */ +EXTERN struct tss_s tss[CONFIG_MAX_CPUS]; + +EXTERN int i386_paging_enabled; + #endif /* __GLO_X86_H__ */ diff --git a/kernel/arch/i386/include/arch_proto.h b/kernel/arch/i386/include/arch_proto.h index a669981d2..54658754c 100644 --- a/kernel/arch/i386/include/arch_proto.h +++ b/kernel/arch/i386/include/arch_proto.h @@ -2,6 +2,12 @@ #ifndef _I386_PROTO_H #define _I386_PROTO_H +#include + +#define K_STACK_SIZE I386_PAGE_SIZE + +#ifndef __ASSEMBLY__ + /* Hardware interrupt handlers. */ _PROTOTYPE( void hwint00, (void) ); _PROTOTYPE( void hwint01, (void) ); @@ -95,6 +101,8 @@ _PROTOTYPE( void frstor, (void *)); _PROTOTYPE( unsigned short fnstsw, (void)); _PROTOTYPE( void fnstcw, (unsigned short* cw)); +_PROTOTYPE( void switch_k_stack, (void * esp, void (* continuation)(void))); + _PROTOTYPE(void __switch_address_space, (struct proc * p, struct proc ** __ptproc)); #define switch_address_space(proc) \ @@ -132,8 +140,7 @@ struct tss_s { /* u8_t iomap[0]; */ }; -EXTERN struct tss_s tss; - +_PROTOTYPE( void prot_init, (void) ); _PROTOTYPE( void idt_init, (void) ); _PROTOTYPE( void init_dataseg, (struct segdesc_s *segdp, phys_bytes base, vir_bytes size, int privilege) ); @@ -151,13 +158,32 @@ struct gate_table_s { unsigned char privilege; }; -EXTERN struct gate_table_s gate_table_pic[]; +extern struct gate_table_s gate_table_pic[]; /* copies an array of vectors to the IDT. The last vector must be zero filled */ _PROTOTYPE(void idt_copy_vectors, (struct gate_table_s * first)); _PROTOTYPE(void idt_reload,(void)); EXTERN void * k_boot_stktop; +EXTERN void * k_stacks_start; +extern void * k_stacks; + +#define get_k_stack_top(cpu) ((void *)(((char*)(k_stacks)) \ + + 2 * ((cpu) + 1) * K_STACK_SIZE)) + +#ifndef __GNUC__ +/* call a function to read the stack fram pointer (%ebp) */ +_PROTOTYPE(reg_t read_ebp, (void)); +#define get_stack_frame(__X) ((reg_t)read_ebp()) +#else +/* read %ebp directly */ +#define get_stack_frame(__X) ((reg_t)__builtin_frame_address(0)) +#endif + +/* + * sets up TSS for a cpu and assigns kernel stack and cpu id + */ +_PROTOTYPE(void tss_init, (unsigned cpu, void * kernel_stack)); _PROTOTYPE( void int_gate, (unsigned vec_nr, vir_bytes offset, unsigned dpl_type) ); @@ -193,4 +219,6 @@ _PROTOTYPE(int platform_tbl_ptr, (phys_bytes start, /* functions defined in architecture-independent kernel source. */ #include "kernel/proto.h" +#endif /* __ASSEMBLY__ */ + #endif diff --git a/kernel/arch/i386/include/arch_smp.h b/kernel/arch/i386/include/arch_smp.h new file mode 100644 index 000000000..90e70223d --- /dev/null +++ b/kernel/arch/i386/include/arch_smp.h @@ -0,0 +1,42 @@ +#ifndef __SMP_X86_H__ +#define __SMP_X86_H__ + +#include "arch_proto.h" /* K_STACK_SIZE */ + +#define MAX_NR_INTERRUPT_ENTRIES 128 + +#define SMP_SCHED_PROC 0xF0 +#define SMP_DEQUEUE_PROC 0xF1 +#define SMP_CPU_REBOOT 0xF2 +#define SMP_CPU_HALT 0xF3 +#define SMP_ERROR_INT 0xF4 + +/* currently only 2 interrupt priority levels are used */ +#define SPL0 0x0 +#define SPLHI 0xF + +#define SMP_IPI_DEST 0 +#define SMP_IPI_SELF 1 +#define SMP_IPI_TO_ALL 2 +#define SMP_IPI_TO_ALL_BUT_SELF 3 + +#ifndef __ASSEMBLY__ + +/* returns the current cpu id */ +#define cpuid (((u32_t *)(((u32_t)get_stack_frame() + (K_STACK_SIZE - 1)) \ + & ~(K_STACK_SIZE - 1)))[-1]) +/* + * in case apic or smp is disabled in boot monitor, we need to finish single cpu + * boot using the legacy PIC + */ +#define smp_single_cpu_fallback() do { \ + tss_init(0, get_k_stack_top(0)); \ + bsp_finish_booting(); \ +} while(0) + +extern unsigned char cpuid2apicid[CONFIG_MAX_CPUS]; + +#endif + +#endif /* __SMP_X86_H__ */ + diff --git a/kernel/arch/i386/include/archconst.h b/kernel/arch/i386/include/archconst.h index ad38d37b3..d66f19275 100644 --- a/kernel/arch/i386/include/archconst.h +++ b/kernel/arch/i386/include/archconst.h @@ -22,8 +22,10 @@ #define SS_INDEX 5 /* kernel SS (386: monitor SS at startup) */ #define CS_INDEX 6 /* kernel CS */ #define MON_CS_INDEX 7 /* temp for BIOS (386: monitor CS at startup) */ -#define TSS_INDEX 8 /* kernel TSS */ -#define FIRST_LDT_INDEX 9 /* rest of descriptors are LDT's */ +#define TSS_INDEX_FIRST 8 /* first kernel TSS */ +#define TSS_INDEX_BOOT TSS_INDEX_FIRST +#define TSS_INDEX(cpu) (TSS_INDEX_FIRST + (cpu)) /* per cpu kernel tss */ +#define FIRST_LDT_INDEX TSS_INDEX(CONFIG_MAX_CPUS) /* rest of descriptors are LDT's */ /* Descriptor structure offsets. */ #define DESC_BASE 2 /* to base_low */ @@ -44,7 +46,8 @@ #define SS_SELECTOR SS_INDEX * DESC_SIZE #define CS_SELECTOR CS_INDEX * DESC_SIZE #define MON_CS_SELECTOR MON_CS_INDEX * DESC_SIZE -#define TSS_SELECTOR TSS_INDEX * DESC_SIZE +#define TSS_SELECTOR(cpu) (TSS_INDEX(cpu) * DESC_SIZE) +#define TSS_SELECTOR_BOOT (TSS_INDEX_BOOT * DESC_SIZE) /* Privileges. */ #define INTR_PRIVILEGE 0 /* kernel and interrupt handlers */ @@ -156,4 +159,11 @@ /* Poweroff 16-bit code address */ #define BIOS_POWEROFF_ENTRY 0x1000 + +/* + * defines how many bytes are reserved at the top of the kernel stack for global + * information like currently scheduled process or current cpu id + */ +#define X86_STACK_TOP_RESERVED (2 * sizeof(reg_t)) + #endif /* _I386_ACONST_H */ diff --git a/kernel/arch/i386/klib.S b/kernel/arch/i386/klib.S index e6d9cd911..bf1091bbe 100644 --- a/kernel/arch/i386/klib.S +++ b/kernel/arch/i386/klib.S @@ -115,37 +115,6 @@ csinit: xchgl _C_LABEL(mon_sp), %esp /* unswitch stacks */ lidt _C_LABEL(gdt)+IDT_SELECTOR /* reload interrupt descriptor table */ -#ifdef CONFIG_APIC - cmpl $0x0, lapic_addr - jne 3f - mov $0, %ebx - jmp 4f - -3: - mov $FLAT_DS_SELECTOR, %ebx - mov %bx, %fs - movl lapic_addr, %eax - add $0x20, %eax - .byte 0x64; mov (%eax), %ebx - and $0xFF000000, %ebx - shr $24, %ebx - movzb %bl, %ebx - -4: - add $apicid2cpuid, %ebx - movzb (%ebx), %eax - shl $3, %eax - mov %eax, %ebx - add $TSS_SELECTOR, %eax - addl _C_LABEL(gdt)+DESC_ACCESS, %eax - and $~0x02, %eax - ltr %bx /* set TSS register */ - - mov $DS_SELECTOR, %eax - mov %ax, %fs - -#endif /* CONFIG_APIC */ - pop %eax outb $INT_CTLMASK /* restore interrupt masks */ movb %ah, %al @@ -908,3 +877,171 @@ ENTRY(eoi_8259_slave) idt_ptr: .short 0x3ff .long 0x0 +#ifdef CONFIG_SMP + +/*===========================================================================*/ +/* smp_get_htt */ +/*===========================================================================*/ +/* PUBLIC int smp_get_htt(void); */ +/* return true if the processor is hyper-threaded. */ +ENTRY(smp_get_htt) + push %ebp + mov %esp, %ebp + pushf + pop %eax + mov %eax, %ebx + and $0x200000, %eax + je 0f + mov $0x1, %eax +/* FIXME don't use the byte code */ +.byte 0x0f, 0xa2 /* opcode for cpuid */ + mov %edx, %eax + pop %ebp + ret +0: + xor %eax, %eax + pop %ebp + ret + +/*===========================================================================*/ +/* smp_get_num_htt */ +/*===========================================================================*/ +/* PUBLIC int smp_get_num_htt(void); */ +/* Get the number of hyper-threaded processor cores */ +ENTRY(smp_get_num_htt) + push %ebp + mov %esp, %ebp + pushf + pop %eax + mov %eax, %ebx + and $0x200000, %eax + je 0f + mov $0x1, %eax +/* FIXME don't use the byte code */ +.byte 0x0f, 0xa2 /* opcode for cpuid */ + mov %ebx, %eax + pop %ebp + ret +0: + xor %eax, %eax + pop %ebp + ret + +/*===========================================================================*/ +/* smp_get_cores */ +/*===========================================================================*/ +/* PUBLIC int smp_get_cores(void); */ +/* Get the number of cores. */ +ENTRY(smp_get_cores) + push %ebp + mov %esp, %ebp + pushf + pop %eax + mov %eax, %ebx + and $0x200000, %eax + je 0f + push %ecx + xor %ecx, %ecx + mov $0x4, %eax +/* FIXME don't use the byte code */ +.byte 0x0f, 0xa2 /* opcode for cpuid */ + pop %ebp + ret +0: + xor %eax, %eax + pop %ebp + ret + +/*===========================================================================*/ +/* arch_spinlock_lock */ +/*===========================================================================*/ +/* void arch_spinlock_lock (u32_t *lock_data) + * { + * while (test_and_set(lock_data) == 1) + * while (*lock_data == 1) + * ; + * } + * eax register is clobbered. + */ +ENTRY(arch_spinlock_lock) + push %ebp + mov %esp, %ebp + push %ebx + mov 8(%ebp), %eax + mov $1, %ebx +/* FIXME use exponential backoff */ +2: + xchg %ebx, (%eax) + test %ebx, %ebx + je 0f +1: +/* FIXME don't use the byte code */ +.byte 0xf3, 0x90 /* pause */ + cmp $0, (%eax) + jne 1b + jmp 2b +0: + mfence + pop %ebx + pop %ebp + ret + +/*===========================================================================*/ +/* arch_spinlock_unlock */ +/*===========================================================================*/ +/* * void arch_spinlock_unlock (unsigned int *lockp) */ +/* spin lock release routine. */ +ENTRY(arch_spinlock_unlock) + mov 4(%esp), %eax + push %ebx + mov $0, %ebx + xchg %ebx, (%eax) + mfence + pop %ebx + ret + +/*===========================================================================*/ +/* mfence */ +/*===========================================================================*/ +/* PUBLIC void mfence (void); */ +/* architecture specific memory barrier routine. */ +ENTRY(mfence) + mfence + ret + +#endif /* CONFIG_SMP */ + +/*===========================================================================*/ +/* arch_pause */ +/*===========================================================================*/ +/* PUBLIC void arch_pause (void); */ +/* architecture specific pause routine. */ +ENTRY(arch_pause) + pause + ret + +/*===========================================================================*/ +/* read_ebp */ +/*===========================================================================*/ +/* PUBLIC u16_t cpuid(void) */ +ENTRY(read_ebp) + mov %ebp, %eax + ret + + +/* + * void switch_k_stack(void * esp, void (* continuation)(void)); + * + * sets the current stack pointer to the given value and continues execution at + * the given address + */ +ENTRY(switch_k_stack) + /* get the arguments from the stack */ + mov 8(%esp), %eax + mov 4(%esp), %ecx + mov $0, %ebp /* reset %ebp for stack trace */ + mov %ecx, %esp /* set the new stack */ + jmp *%eax /* and jump to the continuation */ + + /* NOT_REACHABLE */ +0: jmp 0b diff --git a/kernel/arch/i386/memory.c b/kernel/arch/i386/memory.c index 24ca0db62..10c432126 100644 --- a/kernel/arch/i386/memory.c +++ b/kernel/arch/i386/memory.c @@ -27,6 +27,8 @@ #endif #endif +PUBLIC int i386_paging_enabled = 0; + PRIVATE int psok = 0; #define MAX_FREEPDES (3 * CONFIG_MAX_CPUS) @@ -935,8 +937,10 @@ void i386_freepde(const int pde) PRIVATE int oxpcie_mapping_index = -1; -PUBLIC int arch_phys_map(const int index, phys_bytes *addr, - phys_bytes *len, int *flags) +PUBLIC int arch_phys_map(const int index, + phys_bytes *addr, + phys_bytes *len, + int *flags) { static int first = 1; int freeidx = 0; @@ -1079,7 +1083,12 @@ PUBLIC int arch_enable_paging(struct proc * caller, const message * m_ptr) io_apic[i].addr = io_apic[i].vaddr; } } + + /* TODO APs are still waiting, release them */ #endif + + i386_paging_enabled = 1; + #ifdef CONFIG_WATCHDOG /* * We make sure that we don't enable the watchdog until paging is turned diff --git a/kernel/arch/i386/mpx.S b/kernel/arch/i386/mpx.S index 42d25cd73..65d8855b6 100644 --- a/kernel/arch/i386/mpx.S +++ b/kernel/arch/i386/mpx.S @@ -43,7 +43,7 @@ begdata: begbss: #endif - +#include "../../kernel.h" #include #include #include @@ -55,6 +55,12 @@ begbss: #include "sconst.h" #include "multiboot.h" +#include "arch_proto.h" /* K_STACK_SIZE */ + +#ifdef CONFIG_SMP +#include "kernel/smp.h" +#endif + /* Selected 386 tss offsets. */ #define TSS3_S_SP0 4 @@ -72,7 +78,7 @@ IMPORT(switch_to_user) /*===========================================================================*/ /* MINIX */ /*===========================================================================*/ -.globl MINIX +.global MINIX MINIX: /* this is the entry point for the MINIX kernel */ jmp over_flags /* skip over the next few bytes */ @@ -164,7 +170,7 @@ copygdt: mov %ax, %fs mov %ax, %gs mov %ax, %ss - mov $_C_LABEL(k_boot_stktop), %esp /* set sp to point to the top of kernel stack */ + mov $_C_LABEL(k_boot_stktop) - 4, %esp /* set sp to point to the top of kernel stack */ /* Save boot parameters into these global variables for i386 code */ movl %edx, _C_LABEL(params_size) @@ -194,7 +200,7 @@ csinit: mov %ax, %fs mov %ax, %gs mov %ax, %ss - movw $TSS_SELECTOR, %ax /* no other TSS is used */ + movw $TSS_SELECTOR_BOOT, %ax /* no other TSS is used */ ltr %ax push $0 /* set flags to known good state */ popf /* esp, clear nested task and int enable */ @@ -615,6 +621,36 @@ ENTRY(reload_cr3) pop %ebp ret +#ifdef CONFIG_SMP +ENTRY(startup_ap_32) + /* + * we are in protected mode now, %cs is correct and we need to set the + * data descriptors before we can touch anything + */ + movw $DS_SELECTOR, %ax + mov %ax, %ds + mov %ax, %ss + mov %ax, %es + movw $0, %ax + mov %ax, %fs + mov %ax, %gs + + /* load TSS for this cpu which was prepared by BSP */ + movl _C_LABEL(__ap_id), %ecx + shl $3, %cx + mov $TSS_SELECTOR(0), %eax + add %cx, %ax + ltr %ax + + /* + * use the boot stack for now. The running CPUs are already using their + * own stack, the rest is still waiting to be booted + */ + mov $_C_LABEL(k_boot_stktop) - 4, %esp + jmp _C_LABEL(smp_ap_boot) + hlt +#endif + /*===========================================================================*/ /* data */ /*===========================================================================*/ @@ -630,5 +666,15 @@ ENTRY(reload_cr3) * the kernel stack */ k_boot_stack: -.space 4096 /* kernel stack */ /* FIXME use macro here */ +.space K_STACK_SIZE /* kernel stack */ /* FIXME use macro here */ LABEL(k_boot_stktop) /* top of kernel stack */ + +.balign K_STACK_SIZE +LABEL(k_stacks_start) + +/* two pages for each stack, one for data, other as a sandbox */ +.space 2 * (K_STACK_SIZE * (CONFIG_MAX_CPUS + 1)) + +LABEL(k_stacks_end) + +/* top of kernel stack */ diff --git a/kernel/arch/i386/protect.c b/kernel/arch/i386/protect.c index c752e6a3b..1dd4018b5 100644 --- a/kernel/arch/i386/protect.c +++ b/kernel/arch/i386/protect.c @@ -36,7 +36,7 @@ PUBLIC struct segdesc_s gdt[GDT_SIZE]= /* used in klib.s and mpx.s */ {0xffff,0,0,0x9a,0x0f,0}, /* temp for BIOS (386: monitor CS at startup) */ }; PRIVATE struct gatedesc_s idt[IDT_SIZE]; /* zero-init so none present */ -PUBLIC struct tss_s tss; /* zero init */ +PUBLIC struct tss_s tss[CONFIG_MAX_CPUS]; /* zero init */ FORWARD _PROTOTYPE( void sdesc, (struct segdesc_s *segdp, phys_bytes base, vir_bytes size) ); @@ -130,6 +130,31 @@ PUBLIC struct gate_table_s gate_table_pic[] = { { NULL, 0, 0} }; +PUBLIC void tss_init(unsigned cpu, void * kernel_stack) +{ + struct tss_s * t = &tss[cpu]; + + t->ss0 = DS_SELECTOR; + init_dataseg(&gdt[TSS_INDEX(cpu)], vir2phys(t), + sizeof(struct tss_s), INTR_PRIVILEGE); + gdt[TSS_INDEX(cpu)].access = PRESENT | + (INTR_PRIVILEGE << DPL_SHIFT) | TSS_TYPE; + + /* Complete building of main TSS. */ + t->iobase = sizeof(struct tss_s); /* empty i/o permissions map */ + + /* + * make space for process pointer and cpu id and point to the first + * usable word + */ + t->sp0 = ((unsigned) kernel_stack) - X86_STACK_TOP_RESERVED; + /* + * set the cpu id at the top of the stack so we know on which cpu is + * this stak in use when we trap to kernel + */ + *((reg_t *)(t->sp0 + 1 * sizeof(reg_t))) = cpu; +} + /*===========================================================================* * prot_init * *===========================================================================*/ @@ -175,13 +200,8 @@ PUBLIC void prot_init(void) rp->p_seg.p_ldt_sel = ldt_index * DESC_SIZE; } - /* Build main TSS */ - tss.ss0 = DS_SELECTOR; - init_dataseg(&gdt[TSS_INDEX], vir2phys(&tss), sizeof(tss), INTR_PRIVILEGE); - gdt[TSS_INDEX].access = PRESENT | (INTR_PRIVILEGE << DPL_SHIFT) | TSS_TYPE; - - /* Complete building of main TSS. */ - tss.iobase = sizeof tss; /* empty i/o permissions map */ + /* Build boot TSS */ + tss_init(0, &k_boot_stktop); } PUBLIC void idt_copy_vectors(struct gate_table_s * first) diff --git a/kernel/arch/i386/sconst.h b/kernel/arch/i386/sconst.h index 50e92f97b..5f2bd6e09 100644 --- a/kernel/arch/i386/sconst.h +++ b/kernel/arch/i386/sconst.h @@ -128,7 +128,7 @@ push %ebp ;\ ;\ movl (CURR_PROC_PTR + 4 + displ)(%esp), %ebp ;\ - ;\ + \ /* save the segment registers */ \ SAVE_SEGS(%ebp) ;\ \ diff --git a/kernel/arch/i386/trampoline.S b/kernel/arch/i386/trampoline.S new file mode 100644 index 000000000..1ec8919b6 --- /dev/null +++ b/kernel/arch/i386/trampoline.S @@ -0,0 +1,31 @@ +#include +#include "archconst.h" + +.balign 4096 +.text +.code16 +ENTRY(trampoline) + cli + + /* %cs has some value and we must use the same for data */ + mov %cs, %ax + mov %ax, %ds + + /* load gdt and idt prepared by bsp */ + lgdtl _C_LABEL(__ap_gdt) - _C_LABEL(trampoline) + lidtl _C_LABEL(__ap_idt) - _C_LABEL(trampoline) + + /* switch to protected mode */ + mov %cr0, %eax + orb $1, %al + mov %eax, %cr0 + + ljmp $CS_SELECTOR, $_C_LABEL(startup_ap_32) + +.balign 4 +LABEL(__ap_id) +.space 4 +LABEL(__ap_gdt) +.space 8 +LABEL(__ap_idt) +.space 8 diff --git a/kernel/config.h b/kernel/config.h index 31dbd3792..ff7fda94c 100644 --- a/kernel/config.h +++ b/kernel/config.h @@ -64,8 +64,7 @@ #endif #define VDEVIO_BUF_SIZE 64 /* max elements per VDEVIO request */ -/* How many bytes for the kernel stack. Space allocated in mpx.s. */ -#define K_STACK_BYTES 1024 +#define K_PARAM_SIZE 512 #endif /* CONFIG_H */ diff --git a/kernel/glo.h b/kernel/glo.h index cae5332c9..3ac06075a 100644 --- a/kernel/glo.h +++ b/kernel/glo.h @@ -62,6 +62,10 @@ EXTERN u64_t cpu_hz[CONFIG_MAX_CPUS]; #define cpu_set_freq(cpu, freq) do {cpu_hz[cpu] = freq;} while (0) #define cpu_get_freq(cpu) cpu_hz[cpu] +#ifdef CONFIG_SMP +EXTERN int config_no_smp; /* optionaly turn off SMP */ +#endif + /* VM */ EXTERN int vm_running; EXTERN int catch_pagefaults; diff --git a/kernel/kernel.h b/kernel/kernel.h index a8d571cea..6247f2c29 100644 --- a/kernel/kernel.h +++ b/kernel/kernel.h @@ -12,9 +12,10 @@ * (non-zero) is set in monitor */ #define CONFIG_WATCHDOG -/* We only support 1 cpu now */ + +#ifndef CONFIG_MAX_CPUS #define CONFIG_MAX_CPUS 1 -#define cpuid 0 +#endif /* OXPCIe952 PCIe with 2 UARTs in-kernel support */ #define CONFIG_OXPCIE 0 @@ -56,6 +57,17 @@ #include "debug.h" /* debugging, MUST be last kernel header */ #include "cpulocals.h" +#ifndef CONFIG_SMP +/* We only support 1 cpu now */ +#define CONFIG_MAX_CPUS 1 +#define cpuid 0 + +#else + +#include "smp.h" + +#endif + #endif /* __ASSEMBLY__ */ #endif /* KERNEL_H */ diff --git a/kernel/main.c b/kernel/main.c index 02194d166..da5532c39 100644 --- a/kernel/main.c +++ b/kernel/main.c @@ -21,9 +21,62 @@ #include "clock.h" #include "hw_intr.h" +#ifdef CONFIG_SMP +#include "smp.h" +#endif + /* Prototype declarations for PRIVATE functions. */ FORWARD _PROTOTYPE( void announce, (void)); +PUBLIC void bsp_finish_booting(void) +{ +#if SPROFILE + sprofiling = 0; /* we're not profiling until instructed to */ +#endif /* SPROFILE */ + cprof_procs_no = 0; /* init nr of hash table slots used */ + + vm_running = 0; + krandom.random_sources = RANDOM_SOURCES; + krandom.random_elements = RANDOM_ELEMENTS; + + /* MINIX is now ready. All boot image processes are on the ready queue. + * Return to the assembly code to start running the current process. + */ + get_cpulocal_var(bill_ptr) = proc_addr(IDLE); /* it has to point somewhere */ + announce(); /* print MINIX startup banner */ + + /* + * enable timer interrupts and clock task on the boot CPU + */ + if (boot_cpu_init_timer(system_hz)) { + panic("FATAL : failed to initialize timer interrupts, " + "cannot continue without any clock source!"); + } + +/* Warnings for sanity checks that take time. These warnings are printed + * so it's a clear warning no full release should be done with them + * enabled. + */ +#if DEBUG_SCHED_CHECK + FIXME("DEBUG_SCHED_CHECK enabled"); +#endif +#if DEBUG_VMASSERT + FIXME("DEBUG_VMASSERT enabled"); +#endif +#if DEBUG_PROC_CHECK + FIXME("PROC check enabled"); +#endif + + DEBUGEXTRA(("cycles_accounting_init()... ")); + cycles_accounting_init(); + DEBUGEXTRA(("done\n")); + + assert(runqueues_ok()); + + switch_to_user(); + NOT_REACHABLE; +} + /*===========================================================================* * main * *===========================================================================*/ @@ -197,52 +250,6 @@ PUBLIC int main(void) DEBUGEXTRA(("done\n")); } - /* Architecture-dependent initialization. */ - DEBUGEXTRA(("arch_init()... ")); - arch_init(); - DEBUGEXTRA(("done\n")); - - /* System and processes initialization */ - DEBUGEXTRA(("system_init()... ")); - system_init(); - DEBUGEXTRA(("done\n")); - -#if SPROFILE - sprofiling = 0; /* we're not profiling until instructed to */ -#endif /* SPROFILE */ - cprof_procs_no = 0; /* init nr of hash table slots used */ - - vm_running = 0; - krandom.random_sources = RANDOM_SOURCES; - krandom.random_elements = RANDOM_ELEMENTS; - - /* MINIX is now ready. All boot image processes are on the ready queue. - * Return to the assembly code to start running the current process. - */ - get_cpulocal_var(bill_ptr) = proc_addr(IDLE); /* it has to point somewhere */ - announce(); /* print MINIX startup banner */ - - /* - * enable timer interrupts and clock task on the boot CPU - */ - - if (boot_cpu_init_timer(system_hz)) { - panic( "FATAL : failed to initialize timer interrupts; " - "cannot continue without any clock source!"); - } - -/* Warnings for sanity checks that take time. These warnings are printed - * so it's a clear warning no full release should be done with them - * enabled. - */ -#if DEBUG_PROC_CHECK - FIXME("PROC check enabled"); -#endif - - DEBUGEXTRA(("cycles_accounting_init()... ")); - cycles_accounting_init(); - DEBUGEXTRA(("done\n")); - #define IPCNAME(n) { \ assert((n) >= 0 && (n) <= IPCNO_HIGHEST); \ assert(!ipc_call_names[n]); \ @@ -256,9 +263,34 @@ PUBLIC int main(void) IPCNAME(SENDNB); IPCNAME(SENDA); - assert(runqueues_ok()); + /* Architecture-dependent initialization. */ + DEBUGEXTRA(("arch_init()... ")); + arch_init(); + DEBUGEXTRA(("done\n")); + + /* System and processes initialization */ + DEBUGEXTRA(("system_init()... ")); + system_init(); + DEBUGEXTRA(("done\n")); + +#ifdef CONFIG_SMP + if (config_no_apic) { + BOOT_VERBOSE(printf("APIC disabled, disables SMP, using legacy PIC\n")); + smp_single_cpu_fallback(); + } else if (config_no_smp) { + BOOT_VERBOSE(printf("SMP disabled, using legacy PIC\n")); + smp_single_cpu_fallback(); + } else + smp_init(); +#else + /* + * if configured for a single CPU, we are already on the kernel stack which we + * are going to use everytime we execute kernel code. We finish booting and we + * never return here + */ + bsp_finish_booting(); +#endif - switch_to_user(); NOT_REACHABLE; return 1; } @@ -304,6 +336,17 @@ PUBLIC void minix_shutdown(timer_t *tp) * down MINIX. How to shutdown is in the argument: RBT_HALT (return to the * monitor), RBT_MONITOR (execute given code), RBT_RESET (hard reset). */ +#ifdef CONFIG_SMP + /* + * FIXME + * + * we will need to stop timers on all cpus if SMP is enabled and put them in + * such a state that we can perform the whole boot process once restarted from + * monitor again + */ + if (ncpus > 1) + NOT_IMPLEMENTED; +#endif arch_stop_local_timer(); hw_intr_disable_all(); intr_init(INTS_ORIG, 0); diff --git a/kernel/proc.c b/kernel/proc.c index c562c9f11..70938a9d9 100644 --- a/kernel/proc.c +++ b/kernel/proc.c @@ -42,6 +42,7 @@ #include "proc.h" #include "vm.h" #include "clock.h" +#include "spinlock.h" #include "arch_proto.h" diff --git a/kernel/proto.h b/kernel/proto.h index bdc7a8a18..f0923ee5e 100644 --- a/kernel/proto.h +++ b/kernel/proto.h @@ -1,5 +1,8 @@ /* Function prototypes. */ +/* FIXME this is a hack how to avoid inclusion conflicts */ +#ifdef __kernel__ + #ifndef PROTO_H #define PROTO_H @@ -37,6 +40,7 @@ _PROTOTYPE( void fpu_sigcontext, (struct proc *, struct sigframe *fr, struct sig _PROTOTYPE( int main, (void) ); _PROTOTYPE( void prepare_shutdown, (int how) ); _PROTOTYPE( __dead void minix_shutdown, (struct timer *tp) ); +_PROTOTYPE( void bsp_finish_booting, (void) ); /* proc.c */ @@ -207,4 +211,6 @@ _PROTOTYPE(void release_fpu, (void)); /* utility.c */ _PROTOTYPE( void cpu_print_freq, (unsigned cpu)); +#endif /* __kernel__ */ + #endif /* PROTO_H */ diff --git a/kernel/smp.c b/kernel/smp.c new file mode 100644 index 000000000..61745cdc1 --- /dev/null +++ b/kernel/smp.c @@ -0,0 +1,7 @@ +#include "smp.h" + +unsigned ncpus; +unsigned ht_per_core; +unsigned bsp_cpu_id; + +struct cpu cpus[CONFIG_MAX_CPUS]; diff --git a/kernel/smp.h b/kernel/smp.h new file mode 100644 index 000000000..d68c7ee05 --- /dev/null +++ b/kernel/smp.h @@ -0,0 +1,55 @@ +#ifndef __SMP_H__ +#define __SMP_H__ + +#ifdef CONFIG_SMP + +#ifndef __ASSEMBLY__ + +#include "kernel.h" +#include "arch_smp.h" + +/* number of CPUs (execution strands in the system */ +EXTERN unsigned ncpus; +/* Number of virtual strands per physical core */ +EXTERN unsigned ht_per_core; +/* which cpu is bootstraping */ +EXTERN unsigned bsp_cpu_id; + +#define cpu_is_bsp(cpu) (bsp_cpu_id == cpu) + +/* + * SMP initialization is largely architecture dependent and each architecture + * must provide a method how to do it. If initiating SMP fails the function does + * not report it. However it must put the system in such a state that it falls + * back to a uniprocessor system. Although the uniprocessor configuration may be + * suboptimal, the system must be able to run on the bootstrap processor as if + * it was the only processor in the system + */ +_PROTOTYPE(void smp_init, (void)); + +_PROTOTYPE(void smp_ipi_err_int, (void)); +_PROTOTYPE(void smp_ipi_spv_int, (void)); +_PROTOTYPE(void smp_ipi_sched, (void)); +_PROTOTYPE(void smp_ipi_dequeue, (void)); +_PROTOTYPE(void smp_ipi_stop, (void)); +_PROTOTYPE(void smp_ipi_reboot, (void)); + +#define CPU_IS_BSP 1 +#define CPU_IS_READY 2 + +struct cpu { + u32_t flags; +}; + +EXTERN struct cpu cpus[CONFIG_MAX_CPUS]; + +#define cpu_set_flag(cpu, flag) do { cpus[cpu].flags |= (flag); } while(0) +#define cpu_clear_flag(cpu, flag) do { cpus[cpu].flags &= ~(flag); } while(0) +#define cpu_test_flag(cpu, flag) (cpus[cpu].flags & (flag)) +#define cpu_is_ready(cpu) cpu_test_flag(cpu, CPU_IS_READY) + +#endif /* __ASSEMBLY__ */ + +#endif /* CONFIG_SMP */ + +#endif /* __SMP_H__ */ diff --git a/kernel/spinlock.h b/kernel/spinlock.h new file mode 100644 index 000000000..0a2bdffa5 --- /dev/null +++ b/kernel/spinlock.h @@ -0,0 +1,37 @@ +#ifndef __SPINLOCK_H__ +#define __SPINLOCK_H__ + +#include "kernel.h" + +typedef struct spinlock { + atomic_t val; +} spinlock_t; + +#ifndef CONFIG_SMP + +#define SPINLOCK_DEFINE(name) +#define PRIVATE_SPINLOCK_DEFINE(name) +#define SPINLOCK_DECLARE(name) +#define spinlock_init(sl) +#define spinlock_lock(sl) +#define spinlock_unlock(sl) + +#else + +/* SMP */ +#define SPINLOCK_DEFINE(name) spinlock_t name; +#define PRIVATE_SPINLOCK_DEFINE(name) PRIVATE SPINLOCK_DEFINE(name) +#define SPINLOCK_DECLARE(name) extern SPINLOCK_DEFINE(name) +#define spinlock_init(sl) do { (sl)->val = 0; } while (0) +#if CONFIG_MAX_CPUS == 1 +#define spinlock_lock(sl) +#define spinlock_unlock(sl) +#else +#define spinlock_lock(sl) arch_spinlock_lock((atomic_t*) sl) +#define spinlock_unlock(sl) arch_spinlock_unlock((atomic_t*) sl) +#endif + + +#endif + +#endif /* __SPINLOCK_H__ */ diff --git a/kernel/start.c b/kernel/start.c index 3b4786309..1c178914b 100644 --- a/kernel/start.c +++ b/kernel/start.c @@ -101,6 +101,16 @@ PUBLIC void cstart( watchdog_enabled = atoi(value); #endif +#ifdef CONFIG_SMP + if (config_no_apic) + config_no_smp = 1; + value = env_get("no_smp"); + if(value) + config_no_smp = atoi(value); + else + config_no_smp = 0; +#endif + /* Return to assembler code to switch to protected mode (if 286), * reload selectors and call main(). */ diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 489532e36..b4c19c8b6 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -5,6 +5,7 @@ */ #include "watchdog.h" +#include "arch/i386/glo.h" unsigned watchdog_local_timer_ticks = 0U; struct arch_watchdog *watchdog; diff --git a/share/mk/bsd.own.mk b/share/mk/bsd.own.mk index 60f8ad8b3..f3c64e1bc 100644 --- a/share/mk/bsd.own.mk +++ b/share/mk/bsd.own.mk @@ -3,6 +3,16 @@ .if !defined(_MINIX_OWN_MK_) _MINIX_OWN_MK_=1 +.ifdef CONFIG_SMP +SMP_FLAGS += -DCONFIG_SMP +.ifdef CONFIG_MAX_CPUS +SMP_FLAGS += -DCONFIG_MAX_CPUS=${CONFIG_MAX_CPUS} +.endif +.endif + +CFLAGS += ${SMP_FLAGS} +AFLAGS += ${SMP_FLAGS} + MAKECONF?= /etc/make.conf .-include "${MAKECONF}"