From: Tomas Hruby Date: Wed, 15 Sep 2010 14:10:03 +0000 (+0000) Subject: SMP - Big kernel lock (BKL) X-Git-Tag: v3.2.0~864 X-Git-Url: http://zhaoyanbai.com/repos/icons/debian/static/man.7.txt?a=commitdiff_plain;h=6aa26565e6ced67c7d1f955432ec0675734fdab3;p=minix.git SMP - Big kernel lock (BKL) - to isolate execution inside kernel we use a big kernel lock implemented as a spinlock - the lock is acquired asap after entering kernel mode and released as late as possible. Only one CPU as a time can execute the core kernel code - measurement son real hw show that the overhead of this lock is close to 0% of kernel time for the currnet system - the overhead of this lock may be as high as 45% of kernel time in virtual machines depending on the ratio between physical CPUs available and emulated CPUs. The performance degradation is significant --- diff --git a/kernel/arch/i386/apic.c b/kernel/arch/i386/apic.c index 29ad02cee..8bc855b2a 100644 --- a/kernel/arch/i386/apic.c +++ b/kernel/arch/i386/apic.c @@ -423,6 +423,7 @@ PRIVATE int calib_clk_handler(irq_hook_t * UNUSED(hook)) stop_8253A_timer(); } + BKL_UNLOCK(); return 1; } @@ -467,6 +468,14 @@ PRIVATE void apic_calibrate_clocks(unsigned cpu) /* set the PIC timer to get some time */ init_8253A_timer(system_hz); + + /* + * We must unlock BKL here as the in-kernel interrupt will lock it + * again. The handler will unlock it after it is done. This is + * absolutely safe as only the BSP is running. It is just a workaround a + * corner case for APIC timer calibration + */ + BKL_UNLOCK(); intr_enable(); /* loop for some time to get a sample */ diff --git a/kernel/arch/i386/arch_clock.c b/kernel/arch/i386/arch_clock.c index e66f4c30a..7c2309573 100644 --- a/kernel/arch/i386/arch_clock.c +++ b/kernel/arch/i386/arch_clock.c @@ -15,6 +15,7 @@ #ifdef CONFIG_APIC #include "apic.h" #endif +#include "spinlock.h" #define CLOCK_ACK_BIT 0x80 /* PS/2 clock interrupt acknowledge bit */ @@ -79,6 +80,8 @@ PRIVATE int calib_cpu_handler(irq_hook_t * UNUSED(hook)) tsc1 = tsc; } + /* just in case we are in an SMP single cpu fallback mode */ + BKL_UNLOCK(); return 1; } @@ -92,6 +95,8 @@ PRIVATE void estimate_cpu_freq(void) /* set the probe, we use the legacy timer, IRQ 0 */ put_irq_handler(&calib_cpu, CLOCK_IRQ, calib_cpu_handler); + /* just in case we are in an SMP single cpu fallback mode */ + BKL_UNLOCK(); /* set the PIC timer to get some time */ intr_enable(); @@ -101,6 +106,8 @@ PRIVATE void estimate_cpu_freq(void) } intr_disable(); + /* just in case we are in an SMP single cpu fallback mode */ + BKL_LOCK(); /* remove the probe */ rm_irq_handler(&calib_cpu); @@ -199,6 +206,19 @@ PUBLIC void context_stop(struct proc * p) } #endif } + + /* + * This function is called only if we switch from kernel to user or idle + * or back. Therefore this is a perfect location to place the big kernel + * lock which will hopefully disappear soon. + * + * If we stop accounting for KERNEL we must unlock the BKL. If account + * for IDLE we must not hold the lock + */ + if (p == proc_addr(KERNEL)) + BKL_UNLOCK(); + else + BKL_LOCK(); } PUBLIC void context_stop_idle(void) diff --git a/kernel/arch/i386/arch_smp.c b/kernel/arch/i386/arch_smp.c index 5ef8d5d16..ce6009aaa 100644 --- a/kernel/arch/i386/arch_smp.c +++ b/kernel/arch/i386/arch_smp.c @@ -189,6 +189,10 @@ PRIVATE void ap_finish_booting(void) while(!i386_paging_enabled) arch_pause(); + + BKL_LOCK(); + printf("CPU %d is running\n", cpu); + BKL_UNLOCK(); for(;;); /* finish processor initialisation. */ diff --git a/kernel/arch/i386/klib.S b/kernel/arch/i386/klib.S index bf1091bbe..4687f782a 100644 --- a/kernel/arch/i386/klib.S +++ b/kernel/arch/i386/klib.S @@ -964,26 +964,27 @@ ENTRY(smp_get_cores) * eax register is clobbered. */ ENTRY(arch_spinlock_lock) - push %ebp - mov %esp, %ebp - push %ebx - mov 8(%ebp), %eax - mov $1, %ebx -/* FIXME use exponential backoff */ + mov 4(%esp), %eax + mov $1, %edx 2: - xchg %ebx, (%eax) - test %ebx, %ebx + mov $1, %ecx + xchg %ecx, (%eax) + test %ecx, %ecx je 0f + + cmp $(1<< 16), %edx + je 1f + shl %edx 1: -/* FIXME don't use the byte code */ -.byte 0xf3, 0x90 /* pause */ - cmp $0, (%eax) - jne 1b - jmp 2b + mov %edx, %ecx +3: + pause + sub $1, %ecx + test %ecx, %ecx + jz 2b + jmp 3b 0: mfence - pop %ebx - pop %ebp ret /*===========================================================================*/ @@ -993,11 +994,9 @@ ENTRY(arch_spinlock_lock) /* spin lock release routine. */ ENTRY(arch_spinlock_unlock) mov 4(%esp), %eax - push %ebx - mov $0, %ebx - xchg %ebx, (%eax) + mov $0, %ecx + xchg %ecx, (%eax) mfence - pop %ebx ret /*===========================================================================*/ diff --git a/kernel/main.c b/kernel/main.c index da5532c39..a870e4466 100644 --- a/kernel/main.c +++ b/kernel/main.c @@ -24,6 +24,10 @@ #ifdef CONFIG_SMP #include "smp.h" #endif +#ifdef CONFIG_WATCHDOG +#include "watchdog.h" +#endif +#include "spinlock.h" /* Prototype declarations for PRIVATE functions. */ FORWARD _PROTOTYPE( void announce, (void)); @@ -93,6 +97,7 @@ PUBLIC int main(void) struct exec e_hdr; /* for a copy of an a.out header */ size_t argsz; /* size of arguments passed to crtso on stack */ + BKL_LOCK(); /* Global value to test segment sanity. */ magictest = MAGICTEST; diff --git a/kernel/smp.c b/kernel/smp.c index 61745cdc1..b0fafc266 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -5,3 +5,5 @@ unsigned ht_per_core; unsigned bsp_cpu_id; struct cpu cpus[CONFIG_MAX_CPUS]; + +SPINLOCK_DEFINE(big_kernel_lock) diff --git a/kernel/smp.h b/kernel/smp.h index d68c7ee05..405db60e8 100644 --- a/kernel/smp.h +++ b/kernel/smp.h @@ -7,6 +7,7 @@ #include "kernel.h" #include "arch_smp.h" +#include "spinlock.h" /* number of CPUs (execution strands in the system */ EXTERN unsigned ncpus; @@ -48,6 +49,8 @@ EXTERN struct cpu cpus[CONFIG_MAX_CPUS]; #define cpu_test_flag(cpu, flag) (cpus[cpu].flags & (flag)) #define cpu_is_ready(cpu) cpu_test_flag(cpu, CPU_IS_READY) +SPINLOCK_DECLARE(big_kernel_lock) + #endif /* __ASSEMBLY__ */ #endif /* CONFIG_SMP */ diff --git a/kernel/spinlock.h b/kernel/spinlock.h index 0a2bdffa5..946a57a55 100644 --- a/kernel/spinlock.h +++ b/kernel/spinlock.h @@ -23,6 +23,7 @@ typedef struct spinlock { #define PRIVATE_SPINLOCK_DEFINE(name) PRIVATE SPINLOCK_DEFINE(name) #define SPINLOCK_DECLARE(name) extern SPINLOCK_DEFINE(name) #define spinlock_init(sl) do { (sl)->val = 0; } while (0) + #if CONFIG_MAX_CPUS == 1 #define spinlock_lock(sl) #define spinlock_unlock(sl) @@ -32,6 +33,9 @@ typedef struct spinlock { #endif -#endif +#endif /* CONFIG_SMP */ + +#define BKL_LOCK() spinlock_lock(&big_kernel_lock) +#define BKL_UNLOCK() spinlock_unlock(&big_kernel_lock) #endif /* __SPINLOCK_H__ */