From: David van Moolenbroek Date: Sat, 7 Nov 2015 14:42:40 +0000 (+0000) Subject: Kernel: per-state CPU accounting X-Git-Url: http://zhaoyanbai.com/repos/%22http:/www.isc.org/icons/Bv9ARM.ch13.html?a=commitdiff_plain;h=366d18b2b85b51f93b2a90700336b4a150a9149d;p=minix.git Kernel: per-state CPU accounting This functionality is required for BSD top(1), as exposed through the CTL_KERN KERN_CP_TIME sysctl(2) call. The idea is that the overall time spent in the system is divided into five categories. While NetBSD uses a separate category for the kernel ("system") and interrupts, we redefine "system" to mean userspace system services and "interrupts" to mean time spent in the kernel, thereby providing the same categories as MINIX3's own top(1), while adding the "nice" category which, like on NetBSD, is used for time spent by processes with a priority lowered by the system administrator. Change-Id: I2114148d1e07d9635055ceca7b163f337c53c43a --- diff --git a/minix/include/minix/com.h b/minix/include/minix/com.h index 76621a517..4f611224e 100644 --- a/minix/include/minix/com.h +++ b/minix/include/minix/com.h @@ -334,6 +334,7 @@ # define GET_IDLETSC 21 /* get cumulative idle time stamp counter */ # define GET_CPUINFO 23 /* get information about cpus */ # define GET_REGS 24 /* get general process registers */ +# define GET_CPUTICKS 25 /* get per-state ticks for a cpu */ /* Subfunctions for SYS_PRIVCTL */ #define SYS_PRIV_ALLOW 1 /* Allow process to run */ diff --git a/minix/include/minix/const.h b/minix/include/minix/const.h index c69d76b71..43b3fbc13 100644 --- a/minix/include/minix/const.h +++ b/minix/include/minix/const.h @@ -167,4 +167,12 @@ #define MKF_I386_INTEL_SYSENTER (1L << 0) /* SYSENTER available and supported */ #define MKF_I386_AMD_SYSCALL (1L << 1) /* SYSCALL available and supported */ +/* + * Number of per-CPU states for which time will be accounted. This *must* be + * the same value as NetBSD's CPUSTATES, which is defined in a rather + * unfortunate location (sys/sched.h). If the NetBSD value changes, our kernel + * must be adapted accordingly. + */ +#define MINIX_CPUSTATES 5 + #endif /* _MINIX_CONST_H */ diff --git a/minix/include/minix/ipc.h b/minix/include/minix/ipc.h index f88ab4665..0934614c7 100644 --- a/minix/include/minix/ipc.h +++ b/minix/include/minix/ipc.h @@ -987,8 +987,9 @@ typedef struct { int quantum; int priority; int cpu; + int niced; - uint8_t padding[40]; + uint8_t padding[36]; } mess_lsys_krn_schedule; _ASSERT_MSG_SIZE(mess_lsys_krn_schedule); diff --git a/minix/include/minix/syslib.h b/minix/include/minix/syslib.h index 082118d3d..c0de856f5 100644 --- a/minix/include/minix/syslib.h +++ b/minix/include/minix/syslib.h @@ -37,8 +37,8 @@ int sys_clear(endpoint_t proc_ep); int sys_exit(void); int sys_trace(int req, endpoint_t proc_ep, long addr, long *data_p); -int sys_schedule(endpoint_t proc_ep, int priority, int quantum, int - cpu); +int sys_schedule(endpoint_t proc_ep, int priority, int quantum, int cpu, + int niced); int sys_schedctl(unsigned flags, endpoint_t proc_ep, int priority, int quantum, int cpu); @@ -190,6 +190,7 @@ int sys_diagctl(int ctl, char *arg1, int arg2); #define sys_getpriv(dst, nr) sys_getinfo(GET_PRIV, dst, 0,0, nr) #define sys_getidletsc(dst) sys_getinfo(GET_IDLETSC, dst, 0,0,0) #define sys_getregs(dst,nr) sys_getinfo(GET_REGS, dst, 0,0, nr) +#define sys_getcputicks(dst,nr) sys_getinfo(GET_CPUTICKS, dst, 0,0, nr) int sys_getinfo(int request, void *val_ptr, int val_len, void *val_ptr2, int val_len2); int sys_whoami(endpoint_t *ep, char *name, int namelen, int diff --git a/minix/kernel/arch/earm/arch_clock.c b/minix/kernel/arch/earm/arch_clock.c index 8990890c4..e6225d39b 100644 --- a/minix/kernel/arch/earm/arch_clock.c +++ b/minix/kernel/arch/earm/arch_clock.c @@ -12,6 +12,11 @@ #include +#include /* for CP_*, CPUSTATES */ +#if CPUSTATES != MINIX_CPUSTATES +/* If this breaks, the code in this file may have to be adapted accordingly. */ +#error "MINIX_CPUSTATES value is out of sync with NetBSD's!" +#endif #include "kernel/spinlock.h" @@ -23,6 +28,7 @@ #include "bsp_intr.h" static unsigned tsc_per_ms[CONFIG_MAX_CPUS]; +static uint64_t tsc_per_state[CONFIG_MAX_CPUS][CPUSTATES]; int init_local_timer(unsigned freq) { @@ -61,6 +67,7 @@ void context_stop(struct proc * p) { u64_t tsc; u32_t tsc_delta; + unsigned int counter; u64_t * __tsc_ctr_switch = get_cpulocal_var_ptr(tsc_ctr_switch); read_tsc_64(&tsc); @@ -81,9 +88,17 @@ void context_stop(struct proc * p) /* * deduct the just consumed cpu cycles from the cpu time left for this * process during its current quantum. Skip IDLE and other pseudo kernel - * tasks + * tasks, except for accounting purposes. */ if (p->p_endpoint >= 0) { + /* On MINIX3, the "system" counter covers system processes. */ + if (p->p_priv != priv_addr(USER_PRIV_ID)) + counter = CP_SYS; + else if (p->p_misc_flags & MF_NICED) + counter = CP_NICE; + else + counter = CP_USER; + #if DEBUG_RACE p->p_cpu_time_left = 0; #else @@ -91,6 +106,13 @@ void context_stop(struct proc * p) p->p_cpu_time_left -= tsc_delta; } else p->p_cpu_time_left = 0; #endif + } else { + /* On MINIX3, the "interrupts" counter covers the kernel. */ + if (p->p_endpoint == IDLE) + counter = CP_IDLE; + else + counter = CP_INTR; + } *__tsc_ctr_switch = tsc; @@ -139,3 +161,19 @@ short cpu_load(void) { return 0; } + +/* + * Return the number of clock ticks spent in each of a predefined number of + * CPU states. + */ +void +get_cpu_ticks(unsigned int cpu, uint64_t ticks[CPUSTATES]) +{ + unsigned int tsc_per_tick; + int i; + + tsc_per_tick = tsc_per_ms[0] * 1000 / system_hz; + + for (i = 0; i < CPUSTATES; i++) + ticks[i] = tsc_per_state[0][i] / tsc_per_tick; +} diff --git a/minix/kernel/arch/i386/arch_clock.c b/minix/kernel/arch/i386/arch_clock.c index 6c5fffdc5..a4ff11683 100644 --- a/minix/kernel/arch/i386/arch_clock.c +++ b/minix/kernel/arch/i386/arch_clock.c @@ -12,6 +12,11 @@ #include "glo.h" #include "kernel/profile.h" +#include /* for CP_*, CPUSTATES */ +#if CPUSTATES != MINIX_CPUSTATES +/* If this breaks, the code in this file may have to be adapted accordingly. */ +#error "MINIX_CPUSTATES value is out of sync with NetBSD's!" +#endif #ifdef USE_APIC #include "apic.h" @@ -40,6 +45,8 @@ static u64_t tsc0, tsc1; #define PROBE_TICKS (system_hz / 10) static unsigned tsc_per_ms[CONFIG_MAX_CPUS]; +static unsigned tsc_per_tick[CONFIG_MAX_CPUS]; +static uint64_t tsc_per_state[CONFIG_MAX_CPUS][CPUSTATES]; /*===========================================================================* * init_8235A_timer * @@ -133,7 +140,8 @@ int init_local_timer(unsigned freq) /* if we know the address, lapic is enabled and we should use it */ if (lapic_addr) { unsigned cpu = cpuid; - tsc_per_ms[cpu] = (unsigned long)(cpu_get_freq(cpu) / 1000); + tsc_per_ms[cpu] = (unsigned)(cpu_get_freq(cpu) / 1000); + tsc_per_tick[cpu] = (unsigned)(cpu_get_freq(cpu) / system_hz); lapic_set_timer_one_shot(1000000 / system_hz); } else { DEBUGBASIC(("Initiating legacy i8253 timer\n")); @@ -144,6 +152,7 @@ int init_local_timer(unsigned freq) estimate_cpu_freq(); /* always only 1 cpu in the system */ tsc_per_ms[0] = (unsigned long)(cpu_get_freq(0) / 1000); + tsc_per_tick[0] = (unsigned)(cpu_get_freq(0) / system_hz); } return 0; @@ -206,10 +215,12 @@ void context_stop(struct proc * p) { u64_t tsc, tsc_delta; u64_t * __tsc_ctr_switch = get_cpulocal_var_ptr(tsc_ctr_switch); + unsigned int cpu, counter; #ifdef CONFIG_SMP - unsigned cpu = cpuid; int must_bkl_unlock = 0; + cpu = cpuid; + /* * This function is called only if we switch from kernel to user or idle * or back. Therefore this is a perfect location to place the big kernel @@ -261,6 +272,7 @@ void context_stop(struct proc * p) #else read_tsc_64(&tsc); p->p_cycles = p->p_cycles + tsc - *__tsc_ctr_switch; + cpu = 0; #endif tsc_delta = tsc - *__tsc_ctr_switch; @@ -280,9 +292,17 @@ void context_stop(struct proc * p) /* * deduct the just consumed cpu cycles from the cpu time left for this * process during its current quantum. Skip IDLE and other pseudo kernel - * tasks + * tasks, except for global accounting purposes. */ if (p->p_endpoint >= 0) { + /* On MINIX3, the "system" counter covers system processes. */ + if (p->p_priv != priv_addr(USER_PRIV_ID)) + counter = CP_SYS; + else if (p->p_misc_flags & MF_NICED) + counter = CP_NICE; + else + counter = CP_USER; + #if DEBUG_RACE p->p_cpu_time_left = 0; #else @@ -295,8 +315,16 @@ void context_stop(struct proc * p) p->p_cpu_time_left = 0; } #endif + } else { + /* On MINIX3, the "interrupts" counter covers the kernel. */ + if (p->p_endpoint == IDLE) + counter = CP_IDLE; + else + counter = CP_INTR; } + tsc_per_state[cpu][counter] += tsc_delta; + *__tsc_ctr_switch = tsc; #ifdef CONFIG_SMP @@ -383,3 +411,16 @@ void busy_delay_ms(int ms) return; } +/* + * Return the number of clock ticks spent in each of a predefined number of + * CPU states. + */ +void +get_cpu_ticks(unsigned int cpu, uint64_t ticks[CPUSTATES]) +{ + int i; + + /* TODO: make this inter-CPU safe! */ + for (i = 0; i < CPUSTATES; i++) + ticks[i] = tsc_per_state[cpu][i] / tsc_per_tick[cpu]; +} diff --git a/minix/kernel/main.c b/minix/kernel/main.c index 691b2a6f5..a877fd604 100644 --- a/minix/kernel/main.c +++ b/minix/kernel/main.c @@ -68,8 +68,11 @@ void bsp_finish_booting(void) RTS_UNSET(proc_addr(i), RTS_PROC_STOP); } /* - * enable timer interrupts and clock task on the boot CPU + * Enable timer interrupts and clock task on the boot CPU. First reset the + * CPU accounting values, as the timer initialization (indirectly) uses them. */ + cycles_accounting_init(); + if (boot_cpu_init_timer(system_hz)) { panic("FATAL : failed to initialize timer interrupts, " "cannot continue without any clock source!"); @@ -91,10 +94,6 @@ void bsp_finish_booting(void) FIXME("PROC check enabled"); #endif - DEBUGEXTRA(("cycles_accounting_init()... ")); - cycles_accounting_init(); - DEBUGEXTRA(("done\n")); - #ifdef CONFIG_SMP cpu_set_flag(bsp_cpu_id, CPU_IS_READY); machine.processors_count = ncpus; diff --git a/minix/kernel/proc.h b/minix/kernel/proc.h index 75cb0517c..fda442fb0 100644 --- a/minix/kernel/proc.h +++ b/minix/kernel/proc.h @@ -254,6 +254,7 @@ struct proc { space*/ #define MF_STEP 0x40000 /* Single-step process */ #define MF_MSGFAILED 0x80000 +#define MF_NICED 0x100000 /* user has lowered max process priority */ /* Magic process table addresses. */ #define BEG_PROC_ADDR (&proc[0]) diff --git a/minix/kernel/proto.h b/minix/kernel/proto.h index a0b8dee0b..219790ed6 100644 --- a/minix/kernel/proto.h +++ b/minix/kernel/proto.h @@ -38,6 +38,7 @@ void cycles_accounting_init(void); void context_stop(struct proc * p); /* this is a wrapper to make calling it from assembly easier */ void context_stop_idle(void); +void get_cpu_ticks(unsigned int cpu, uint64_t ticks[MINIX_CPUSTATES]); int restore_fpu(struct proc *); void save_fpu(struct proc *); void save_local_fpu(struct proc *, int retain); @@ -105,7 +106,7 @@ void system_init(void); void clear_endpoint(struct proc *rc); void clear_ipc_refs(struct proc *rc, int caller_ret); void kernel_call_resume(struct proc *p); -int sched_proc(struct proc *rp, int priority, int quantum, int cpu); +int sched_proc(struct proc *rp, int priority, int quantum, int cpu, int niced); int add_ipc_filter(struct proc *rp, int type, vir_bytes address, size_t length); void clear_ipc_filters(struct proc *rp); diff --git a/minix/kernel/system.c b/minix/kernel/system.c index bdb22be47..a6bc979d1 100644 --- a/minix/kernel/system.c +++ b/minix/kernel/system.c @@ -639,10 +639,7 @@ void kernel_call_resume(struct proc *caller) /*===========================================================================* * sched_proc * *===========================================================================*/ -int sched_proc(struct proc *p, - int priority, - int quantum, - int cpu) +int sched_proc(struct proc *p, int priority, int quantum, int cpu, int niced) { /* Make sure the values given are within the allowed range.*/ if ((priority < TASK_Q && priority != -1) || priority > NR_SCHED_QUEUES) @@ -691,6 +688,11 @@ int sched_proc(struct proc *p, p->p_cpu = cpu; #endif + if (niced) + p->p_misc_flags |= MF_NICED; + else + p->p_misc_flags &= ~MF_NICED; + /* Clear the scheduling bit and enqueue the process */ RTS_UNSET(p, RTS_NO_QUANTUM); diff --git a/minix/kernel/system/do_getinfo.c b/minix/kernel/system/do_getinfo.c index 1955790ec..98b5db40c 100644 --- a/minix/kernel/system/do_getinfo.c +++ b/minix/kernel/system/do_getinfo.c @@ -189,6 +189,17 @@ int do_getinfo(struct proc * caller, message * m_ptr) src_vir = (vir_bytes) &idl->p_cycles; break; } + case GET_CPUTICKS: { + uint64_t ticks[MINIX_CPUSTATES]; + unsigned int cpu; + cpu = (unsigned int)m_ptr->m_lsys_krn_sys_getinfo.val_len2_e; + if (cpu >= CONFIG_MAX_CPUS) + return EINVAL; + get_cpu_ticks(cpu, ticks); + length = sizeof(ticks); + src_vir = (vir_bytes)ticks; + break; + } default: printf("do_getinfo: invalid request %d\n", m_ptr->m_lsys_krn_sys_getinfo.request); diff --git a/minix/kernel/system/do_schedctl.c b/minix/kernel/system/do_schedctl.c index f8b95b435..85c5b9652 100644 --- a/minix/kernel/system/do_schedctl.c +++ b/minix/kernel/system/do_schedctl.c @@ -34,7 +34,7 @@ int do_schedctl(struct proc * caller, message * m_ptr) cpu = m_ptr->m_lsys_krn_schedctl.cpu; /* Try to schedule the process. */ - if((r = sched_proc(p, priority, quantum, cpu) != OK)) + if((r = sched_proc(p, priority, quantum, cpu, FALSE) != OK)) return r; p->p_scheduler = NULL; } else { diff --git a/minix/kernel/system/do_schedule.c b/minix/kernel/system/do_schedule.c index 272cc29af..c7d37ae14 100644 --- a/minix/kernel/system/do_schedule.c +++ b/minix/kernel/system/do_schedule.c @@ -9,7 +9,7 @@ int do_schedule(struct proc * caller, message * m_ptr) { struct proc *p; int proc_nr; - int priority, quantum, cpu; + int priority, quantum, cpu, niced; if (!isokendpt(m_ptr->m_lsys_krn_schedule.endpoint, &proc_nr)) return EINVAL; @@ -24,6 +24,7 @@ int do_schedule(struct proc * caller, message * m_ptr) priority = m_ptr->m_lsys_krn_schedule.priority; quantum = m_ptr->m_lsys_krn_schedule.quantum; cpu = m_ptr->m_lsys_krn_schedule.cpu; + niced = !!(m_ptr->m_lsys_krn_schedule.niced); - return sched_proc(p, priority, quantum, cpu); + return sched_proc(p, priority, quantum, cpu, niced); } diff --git a/minix/lib/libsys/sys_schedule.c b/minix/lib/libsys/sys_schedule.c index 1c25c7e58..d890651a1 100644 --- a/minix/lib/libsys/sys_schedule.c +++ b/minix/lib/libsys/sys_schedule.c @@ -1,9 +1,7 @@ #include "syslib.h" -int sys_schedule(endpoint_t proc_ep, - int priority, - int quantum, - int cpu) +int +sys_schedule(endpoint_t proc_ep, int priority, int quantum, int cpu, int niced) { message m; @@ -11,5 +9,6 @@ int sys_schedule(endpoint_t proc_ep, m.m_lsys_krn_schedule.priority = priority; m.m_lsys_krn_schedule.quantum = quantum; m.m_lsys_krn_schedule.cpu = cpu; + m.m_lsys_krn_schedule.niced = niced; return(_kernel_call(SYS_SCHEDULE, &m)); } diff --git a/minix/servers/sched/schedule.c b/minix/servers/sched/schedule.c index 9d1c503a2..76f2e3f19 100644 --- a/minix/servers/sched/schedule.c +++ b/minix/servers/sched/schedule.c @@ -297,7 +297,7 @@ int do_nice(message *m_ptr) static int schedule_process(struct schedproc * rmp, unsigned flags) { int err; - int new_prio, new_quantum, new_cpu; + int new_prio, new_quantum, new_cpu, niced; pick_cpu(rmp); @@ -316,8 +316,10 @@ static int schedule_process(struct schedproc * rmp, unsigned flags) else new_cpu = -1; + niced = (rmp->max_priority > USER_Q); + if ((err = sys_schedule(rmp->endpoint, new_prio, - new_quantum, new_cpu)) != OK) { + new_quantum, new_cpu, niced)) != OK) { printf("PM: An error occurred when trying to schedule %d: %d\n", rmp->endpoint, err); }