From: Tomas Hruby Date: Tue, 18 May 2010 13:39:04 +0000 (+0000) Subject: Scheduling server (by Bjorn Swift) X-Git-Tag: v3.1.7~51 X-Git-Url: http://zhaoyanbai.com/repos/%22http:/www.isc.org/icons/CHANGES?a=commitdiff_plain;h=b09bcf67790d031019d7f4fc357fec69a45ab655;p=minix.git Scheduling server (by Bjorn Swift) In this second phase, scheduling is moved from PM to its own scheduler (see r6557 for phase one). In the next phase we hope to a) include useful information in the "out of quantum" message and b) create some simple scheduling policy that makes use of that information. When the system starts up, PM will iterate over its process table and ask SCHED to take over scheduling unprivileged processes. This is done by sending a SCHEDULING_START message to SCHED. This message includes the processes endpoint, the parent's endpoint and its nice level. The scheduler adds this process to its schedproc table, issues a schedctl, and returns its own endpoint to PM - as the endpoint of the effective scheduler. When a process terminates, a SCHEDULING_STOP message is sent to the scheduler. The reason for this effective endpoint is for future compatibility. Some day, we may have a scheduler that, instead of scheduling the process itself, forwards the SCHEDULING_START message on to another scheduler. PM has information on who schedules whom. As such, scheduling messages from user-land are sent through PM. An example is when processes change their priority, using nice(). In that case, a getsetpriority message is sent to PM, which then sends a SCHEDULING_SET_NICE to the process's effective scheduler. When a process is forked through PM, it inherits its parent's scheduler, but is spawned with an empty quantum. As before, a request to fork a process flows through VM before returning to PM, which then wakes up the child process. This flow has been modified slightly so that PM notifies the scheduler of the new process, before waking up the child process. If the scheduler fails to take over scheduling, the child process is torn down and the fork fails with an erroneous value. Process priority is entirely decided upon using nice levels. PM stores a copy of each process's nice level and when a child is forked, its parent's nice level is sent in the SCHEDULING_START message. How this level is mapped to a priority queue is up to the scheduler. It should be noted that the nice level is used to determine the max_priority and the parent could have been in a lower priority when it was spawned. To prevent a CPU intensive process from hawking the CPU by continuously forking children that get scheduled in the max_priority, the scheduler should determine in which queue the parent is currently scheduled, and schedule the child in that same queue. Other fixes: The USER_Q in kernel/proc.h was incorrectly defined as NR_SCHED_QUEUES/2. That results in a "off by one" error when converting priority->nice->priority for nice=0. This also had the side effect that if someone were to set the MAX_USER_Q to something else than 0, then USER_Q would be off. --- diff --git a/include/minix/com.h b/include/minix/com.h index ab87b66ed..fcd295624 100644 --- a/include/minix/com.h +++ b/include/minix/com.h @@ -73,8 +73,9 @@ #define DS_PROC_NR ((endpoint_t) 6) /* data store server */ #define MFS_PROC_NR ((endpoint_t) 7) /* minix root filesystem */ #define VM_PROC_NR ((endpoint_t) 8) /* memory server */ -#define PFS_PROC_NR ((endpoint_t) 9) /* pipe filesystem */ -#define LAST_SPECIAL_PROC_NR 10 /* An untyped version for +#define PFS_PROC_NR ((endpoint_t) 9) /* pipe filesystem */ +#define SCHED_PROC_NR ((endpoint_t) 10) /* scheduler */ +#define LAST_SPECIAL_PROC_NR 11 /* An untyped version for computation in macros.*/ #define INIT_PROC_NR ((endpoint_t) LAST_SPECIAL_PROC_NR) /* init -- goes multiuser */ @@ -1079,9 +1080,21 @@ #define SCHEDULING_BASE 0xF00 #define SCHEDULING_NO_QUANTUM (SCHEDULING_BASE+1) -# define SCHEDULING_ENDPOINT m1_i1 -# define SCHEDULING_PRIORITY m1_i2 -# define SCHEDULING_QUANTUM m1_i3 +# define SCHEDULING_ENDPOINT m9_l1 +# define SCHEDULING_QUANTUM m9_l2 +# define SCHEDULING_PRIORITY m9_s1 + +/* SCHEDULING_START uses _ENDPOINT, _PRIORITY and _QUANTUM from + * SCHEDULING_NO_QUANTUM */ +#define SCHEDULING_START (SCHEDULING_BASE+2) +# define SCHEDULING_SCHEDULER m9_l1 /* Overrides _ENDPOINT on return*/ +# define SCHEDULING_PARENT m9_l3 +# define SCHEDULING_NICE m9_l4 + +#define SCHEDULING_STOP (SCHEDULING_BASE+3) + +#define SCHEDULING_SET_NICE (SCHEDULING_BASE+4) + #endif /* _MINIX_COM_H */ diff --git a/kernel/proc.h b/kernel/proc.h index 673882cb7..915277f91 100644 --- a/kernel/proc.h +++ b/kernel/proc.h @@ -231,8 +231,8 @@ struct proc { #define NR_SCHED_QUEUES 16 /* MUST equal minimum priority + 1 */ #define TASK_Q 0 /* highest, used for kernel tasks */ #define MAX_USER_Q 0 /* highest priority for user processes */ -#define USER_Q (NR_SCHED_QUEUES / 2) /* default (should correspond to - nice 0) */ +#define USER_Q ((MIN_USER_Q - MAX_USER_Q) / 2 + MAX_USER_Q) /* default + (should correspond to nice 0) */ #define MIN_USER_Q (NR_SCHED_QUEUES - 1) /* minimum priority for user processes */ diff --git a/kernel/system/do_schedule.c b/kernel/system/do_schedule.c index 7e75bf8da..e647dceb2 100644 --- a/kernel/system/do_schedule.c +++ b/kernel/system/do_schedule.c @@ -29,8 +29,9 @@ PUBLIC int do_schedule(struct proc * caller, message * m_ptr) RTS_SET(p, RTS_NO_QUANTUM); /* Clear the scheduling bit and enqueue the process */ - p->p_priority = m_ptr->SCHEDULING_PRIORITY; - p->p_ticks_left = m_ptr->SCHEDULING_QUANTUM; + p->p_priority = m_ptr->SCHEDULING_PRIORITY; + p->p_quantum_size = m_ptr->SCHEDULING_QUANTUM; + p->p_ticks_left = m_ptr->SCHEDULING_QUANTUM; RTS_UNSET(p, RTS_NO_QUANTUM); diff --git a/kernel/table.c b/kernel/table.c index 24b3adfbe..97a319c36 100644 --- a/kernel/table.c +++ b/kernel/table.c @@ -75,6 +75,7 @@ PUBLIC struct boot_image image[] = { {RS_PROC_NR, 0, 0, 4, 4, 0, "rs" }, {PM_PROC_NR, 0, 0, 32, 4, 0, "pm" }, +{SCHED_PROC_NR, 0, 0, 32, 4, 0, "sched" }, {FS_PROC_NR, 0, 0, 32, 5, 0, "vfs" }, {MEM_PROC_NR, 0, BVM_F, 4, 3, 0, "memory"}, {LOG_PROC_NR, 0, BVM_F, 4, 2, 0, "log" }, diff --git a/lib/libsys/sys_schedctl.c b/lib/libsys/sys_schedctl.c index eb7af8833..f552b885a 100644 --- a/lib/libsys/sys_schedctl.c +++ b/lib/libsys/sys_schedctl.c @@ -7,4 +7,3 @@ PUBLIC int sys_schedctl(endpoint_t proc_ep) m.SCHEDULING_ENDPOINT = proc_ep; return(_kernel_call(SYS_SCHEDCTL, &m)); } - diff --git a/servers/Makefile b/servers/Makefile index 1e251c75b..3efb92e36 100644 --- a/servers/Makefile +++ b/servers/Makefile @@ -3,8 +3,8 @@ .include -SUBDIR= ds hgfs inet init ipc is iso9660fs mfs pfs pm rs vfs vm +SUBDIR= ds hgfs inet init ipc is iso9660fs mfs pfs pm rs sched vfs vm -IMAGE_SUBDIR= ds init mfs pfs pm rs vfs vm +IMAGE_SUBDIR= ds init mfs pfs pm rs sched vfs vm .include diff --git a/servers/pm/const.h b/servers/pm/const.h index 9ed579048..6e1c7bbe1 100644 --- a/servers/pm/const.h +++ b/servers/pm/const.h @@ -15,3 +15,6 @@ #define MAX_SECS (((1<<(sizeof(clock_t)*8-1))-1)/system_hz) /* max.secs for setitimer() ((2^31-1)/HZ) */ #define NR_ITIMERS 3 /* number of supported interval timers */ + +#define SEND_PRIORITY 1 /* send current priority queue to scheduler */ +#define SEND_TIME_SLICE 2 /* send current time slice to scheduler */ diff --git a/servers/pm/forkexit.c b/servers/pm/forkexit.c index 17a8c9f8d..3406296c2 100644 --- a/servers/pm/forkexit.c +++ b/servers/pm/forkexit.c @@ -92,7 +92,7 @@ PUBLIC int do_fork() sigemptyset(&rmc->mp_sigtrace); } /* Inherit only these flags. In normal fork(), PRIV_PROC is not inherited. */ - rmc->mp_flags &= (IN_USE|DELAY_CALL|PM_SCHEDULED); + rmc->mp_flags &= (IN_USE|DELAY_CALL); rmc->mp_child_utime = 0; /* reset administration */ rmc->mp_child_stime = 0; /* reset administration */ rmc->mp_exitstatus = 0; diff --git a/servers/pm/main.c b/servers/pm/main.c index 31e7b14ee..6047e382c 100644 --- a/servers/pm/main.c +++ b/servers/pm/main.c @@ -65,7 +65,7 @@ PUBLIC int main() /* SEF local startup. */ sef_local_startup(); - takeover_scheduling(); /* takeover all running processes */ + sched_init(); /* initialize user-space scheduling */ /* This is PM's main loop- get work and do it, forever and forever. */ while (TRUE) { @@ -131,16 +131,6 @@ PUBLIC int main() else result= ENOSYS; break; - case SCHEDULING_NO_QUANTUM: - /* This message was sent from the kernel, don't reply */ - if (IPC_STATUS_FLAGS_TEST(ipc_status, IPC_FLG_MSG_FROM_KERNEL)) { - do_noquantum(); - } else { - printf("PM: process %s/%d faked SCHEDULING_NO_QUANTUM " - "message!\n", - mp->mp_name, mp->mp_endpoint); - } - continue; default: /* Else, if the system call number is valid, perform the * call. @@ -282,10 +272,8 @@ PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info) /* Get kernel endpoint identifier. */ rmp->mp_endpoint = ip->endpoint; - /* Get scheduling info */ - rmp->mp_max_priority = ip->priority; - rmp->mp_priority = ip->priority; - rmp->mp_time_slice = ip->quantum; + /* Set scheduling info */ + rmp->mp_scheduler = KERNEL; /* Tell FS about this system process. */ mess.m_type = PM_INIT; @@ -491,6 +479,17 @@ PRIVATE void handle_fs_reply() break; case PM_EXIT_REPLY: + if((r = sched_stop(rmp)) != OK) { + /* If the scheduler refuses to give up scheduling, there is + * little we can do, except report it. This may cause problems + * later on, if this scheduler is asked to schedule another proc + * that has an endpoint->schedproc mapping identical to the proc + * we just tried to stop scheduling. + */ + printf("PM: The scheduler did not want to give up " + "scheduling %s, ret=%d.\n", rmp->mp_name, r); + } + exit_restart(rmp, FALSE /*dump_core*/); break; @@ -505,13 +504,29 @@ PRIVATE void handle_fs_reply() case PM_FORK_REPLY: /* Schedule the newly created process ... */ - if (rmp->mp_flags & PM_SCHEDULED) - schedule_process(rmp); - /* ... and wake it up */ - setreply(proc_n, OK); + r = (OK); + if (rmp->mp_scheduler != KERNEL && rmp->mp_scheduler != NONE) { + r = sched_start(rmp->mp_scheduler, rmp, 0); + } + + /* If scheduling the process failed, we want to tear down the process + * and fail the fork */ + if (r != (OK)) { + /* Tear down the newly created process */ + rmp->mp_scheduler = NONE; /* don't try to stop scheduling */ + exit_proc(rmp, -1, FALSE /*dump_core*/); + + /* Wake up the parent with a failed fork */ + setreply(rmp->mp_parent, -1); - /* Wake up the parent */ - setreply(rmp->mp_parent, rmp->mp_pid); + } + else { + /* Wake up the child */ + setreply(proc_n, OK); + + /* Wake up the parent */ + setreply(rmp->mp_parent, rmp->mp_pid); + } break; diff --git a/servers/pm/misc.c b/servers/pm/misc.c index 18e3ebac6..bfce13cc7 100644 --- a/servers/pm/misc.c +++ b/servers/pm/misc.c @@ -443,20 +443,11 @@ PUBLIC int do_getsetpriority() * The value passed in is currently between PRIO_MIN and PRIO_MAX. * We have to scale this between MIN_USER_Q and MAX_USER_Q to match * the kernel's scheduling queues. - * - * TODO: This assumes that we are the scheduler, this will be changed - * once the scheduler gets factored out of PM to its own server */ - if (arg_pri < PRIO_MIN || arg_pri > PRIO_MAX) return(EINVAL); - - new_q = MAX_USER_Q + (arg_pri-PRIO_MIN) * (MIN_USER_Q-MAX_USER_Q+1) / - (PRIO_MAX-PRIO_MIN+1); - if (new_q < MAX_USER_Q) new_q = MAX_USER_Q; /* shouldn't happen */ - if (new_q > MIN_USER_Q) new_q = MIN_USER_Q; /* shouldn't happen */ - rmp->mp_max_priority = rmp->mp_priority = new_q; - if ((r = schedule_process(rmp))) - return(r); + if ((r = sched_nice(rmp, arg_pri)) != OK) { + return r; + } rmp->mp_nice = arg_pri; return(OK); diff --git a/servers/pm/mproc.h b/servers/pm/mproc.h index e6d07bbe6..65cb183f7 100644 --- a/servers/pm/mproc.h +++ b/servers/pm/mproc.h @@ -56,9 +56,7 @@ EXTERN struct mproc { signed int mp_nice; /* nice is PRIO_MIN..PRIO_MAX, standard 0. */ /* User space scheduling */ - int mp_max_priority; /* this process' highest allowed priority */ - int mp_priority; /* the process' current priority */ - int mp_time_slice; /* this process's scheduling queue */ + endpoint_t mp_scheduler; /* scheduler endpoint id */ char mp_name[PROC_NAME_LEN]; /* process name */ } mproc[NR_PROCS]; diff --git a/servers/pm/proto.h b/servers/pm/proto.h index 501f24fe5..c44f0d392 100644 --- a/servers/pm/proto.h +++ b/servers/pm/proto.h @@ -61,10 +61,10 @@ _PROTOTYPE( int do_svrctl, (void) ); _PROTOTYPE( int do_getsetpriority, (void) ); /* schedule.c */ -_PROTOTYPE( int schedule_process, (struct mproc * rmp) ); -_PROTOTYPE( void do_noquantum, (void) ); -_PROTOTYPE( void takeover_scheduling, (void) ); -_PROTOTYPE( void balance_queues, (struct timer *tp) ); +_PROTOTYPE( void sched_init, (void) ); +_PROTOTYPE( int sched_start, (endpoint_t ep, struct mproc *rmp, int flags) ); +_PROTOTYPE( int sched_stop, (struct mproc *rmp) ); +_PROTOTYPE( int sched_nice, (struct mproc *rmp, int nice) ); /* profile.c */ _PROTOTYPE( int do_sprofile, (void) ); diff --git a/servers/pm/schedule.c b/servers/pm/schedule.c index 6378eb565..0ddd56bf4 100644 --- a/servers/pm/schedule.c +++ b/servers/pm/schedule.c @@ -7,94 +7,101 @@ #include #include #include "mproc.h" -#include "kernel/proc.h" /* for MIN_USER_Q */ -PRIVATE timer_t sched_timer; - -/* - * makes a kernel call that schedules the process using the actuall scheduling - * parameters set for this process - */ -PUBLIC int schedule_process(struct mproc * rmp) +/*===========================================================================* + * init_scheduling * + *===========================================================================*/ +PUBLIC void sched_init(void) { - int err; + struct mproc *trmp; + int proc_nr; - if ((err = sys_schedule(rmp->mp_endpoint, rmp->mp_priority, - rmp->mp_time_slice)) != OK) { - printf("PM: An error occurred when trying to schedule %s: %d\n", - rmp->mp_name, err); + for (proc_nr=0, trmp=mproc; proc_nr < NR_PROCS; proc_nr++, trmp++) { + /* Don't take over system processes. When the system starts, + * this will typically only take over init, from which other + * user space processes will inherit. */ + if (trmp->mp_flags & IN_USE && !(trmp->mp_flags & PRIV_PROC)) { + if (sched_start(SCHED_PROC_NR, trmp, + (SEND_PRIORITY | SEND_TIME_SLICE))) { + printf("PM: SCHED denied taking over scheduling of %s\n", + trmp->mp_name); + } + } } - - return err; } /*===========================================================================* - * do_noquantum * + * sched_start * *===========================================================================*/ - -PUBLIC void do_noquantum(void) +PUBLIC int sched_start(endpoint_t ep, struct mproc *rmp, int flags) { - int rv, proc_nr_n; - register struct mproc *rmp; + int rv; + message m; - if (pm_isokendpt(m_in.m_source, &proc_nr_n) != OK) { - printf("PM: WARNING: got an invalid endpoint in OOQ msg %u.\n", - m_in.m_source); - return; - } + m.SCHEDULING_ENDPOINT = rmp->mp_endpoint; + m.SCHEDULING_PARENT = mproc[rmp->mp_parent].mp_endpoint; + m.SCHEDULING_NICE = rmp->mp_nice; - rmp = &mproc[proc_nr_n]; - if (rmp->mp_priority < MIN_USER_Q) { - rmp->mp_priority += 1; /* lower priority */ + /* Send the request to the scheduler */ + if ((rv = _taskcall(ep, SCHEDULING_START, &m))) { + return rv; } - schedule_process(rmp); + /* Store the process' scheduler. Note that this might not be the + * scheduler we sent the SCHEDULING_START message to. That scheduler + * might have forwarded the scheduling message on to another scheduler + * before returning the message. + */ + rmp->mp_scheduler = m.SCHEDULING_SCHEDULER; + return (OK); } /*===========================================================================* - * takeover_scheduling * + * sched_stop * *===========================================================================*/ -PUBLIC void takeover_scheduling(void) +PUBLIC int sched_stop(struct mproc *rmp) { - struct mproc *trmp; - int proc_nr; + int rv; + message m; - tmr_inittimer(&sched_timer); + /* If the kernel is the scheduler, it will implicitly stop scheduling + * once another process takes over or the process terminates */ + if (rmp->mp_scheduler == KERNEL || rmp->mp_scheduler == NONE) + return(OK); - for (proc_nr=0, trmp=mproc; proc_nr < NR_PROCS; proc_nr++, trmp++) { - /* Don't takeover system processes. When the system starts, - * this will typically only takeover init, from which other - * user space processes will inherit. */ - if (trmp->mp_flags & IN_USE && !(trmp->mp_flags & PRIV_PROC)) { - if (sys_schedctl(trmp->mp_endpoint)) - printf("PM: Error while taking over scheduling for %s\n", - trmp->mp_name); - trmp->mp_flags |= PM_SCHEDULED; - } + m.SCHEDULING_ENDPOINT = rmp->mp_endpoint; + if ((rv = _taskcall(rmp->mp_scheduler, SCHEDULING_STOP, &m))) { + return rv; } - pm_set_timer(&sched_timer, 100, balance_queues, 0); + /* sched_stop is either called when the process is exiting or it is + * being moved between schedulers. If it is being moved between + * schedulers, we need to set the mp_scheduler to NONE so that PM + * doesn't forward messages to the process' scheduler while being moved + * (such as sched_nice). */ + rmp->mp_scheduler = NONE; + return (OK); } /*===========================================================================* - * balance_queues * + * sched_nice * *===========================================================================*/ - -PUBLIC void balance_queues(tp) -struct timer *tp; +PUBLIC int sched_nice(struct mproc *rmp, int nice) { - struct mproc *rmp; - int proc_nr; int rv; - - for (proc_nr=0, rmp=mproc; proc_nr < NR_PROCS; proc_nr++, rmp++) { - if (rmp->mp_flags & IN_USE) { - if (rmp->mp_priority > rmp->mp_max_priority) { - rmp->mp_priority -= 1; /* increase priority */ - schedule_process(rmp); - } - } + message m; + + /* If the kernel is the scheduler, we don't allow messing with the + * priority. If you want to control process priority, assign the process + * to a user-space scheduler */ + if (rmp->mp_scheduler == KERNEL || rmp->mp_scheduler == NONE) + return (EINVAL); + + m.SCHEDULING_ENDPOINT = rmp->mp_endpoint; + m.SCHEDULING_NICE = nice; + if ((rv = _taskcall(rmp->mp_scheduler, SCHEDULING_SET_NICE, &m))) { + return rv; } - pm_set_timer(&sched_timer, 100, balance_queues, 0); + return (OK); } diff --git a/servers/rs/table.c b/servers/rs/table.c index b66b89f8a..25f7dfa07 100644 --- a/servers/rs/table.c +++ b/servers/rs/table.c @@ -19,6 +19,7 @@ PRIVATE int pm_kc[] = { SYS_ALL_C, SYS_NULL_C }, + sched_kc[] ={ SYS_ALL_C, SYS_NULL_C }, vfs_kc[] = { FS_KC, SYS_NULL_C }, rs_kc[] = { SYS_ALL_C, SYS_NULL_C }, ds_kc[] = { SYS_ALL_C, SYS_NULL_C }, @@ -41,6 +42,7 @@ PRIVATE int pm_vmc[] = { VM_BASIC_CALLS, VM_EXIT, VM_FORK, VM_BRK, VM_EXEC_NEWMEM, VM_PUSH_SIG, VM_WILLEXIT, VM_ADDDMA, VM_DELDMA, VM_GETDMA, VM_NOTIFY_SIG, SYS_NULL_C }, + sched_vmc[] ={ VM_BASIC_CALLS, SYS_NULL_C }, vfs_vmc[] = { VM_BASIC_CALLS, SYS_NULL_C }, rs_vmc[] = { VM_BASIC_CALLS, VM_RS_SET_PRIV, VM_RS_UPDATE, SYS_NULL_C }, ds_vmc[] = { VM_BASIC_CALLS, SYS_NULL_C }, @@ -62,6 +64,7 @@ PUBLIC struct boot_image_priv boot_image_priv_table[] = { {RS_PROC_NR, "rs", RSYS_F, RSYS_T, RSYS_M, RSYS_SM, rs_kc, rs_vmc, 0 }, {VM_PROC_NR, "vm", VM_F, SRV_T, SRV_M, SRV_SM, vm_kc, vm_vmc, 0 }, {PM_PROC_NR, "pm", SRV_F, SRV_T, SRV_M, SRV_SM, pm_kc, pm_vmc, 0 }, +{SCHED_PROC_NR,"sched", SRV_F, SRV_T, SRV_M, SRV_SM, sched_kc, sched_vmc, 0 }, {VFS_PROC_NR, "vfs", SRV_F, SRV_T, SRV_M, SRV_SM, vfs_kc, vfs_vmc, 0 }, {DS_PROC_NR, "ds", SRV_F, SRV_T, SRV_M, SRV_SM, ds_kc, ds_vmc, 0 }, {TTY_PROC_NR, "tty", SRV_F, SRV_T, SRV_M, SRV_SM, tty_kc, tty_vmc, 0 }, diff --git a/servers/sched/Makefile b/servers/sched/Makefile new file mode 100644 index 000000000..a749527a4 --- /dev/null +++ b/servers/sched/Makefile @@ -0,0 +1,17 @@ +# Makefile for Scheduler (SCHED) +PROG= sched +SRCS= main.c schedule.c utility.c timers.c + +DPADD+= ${LIBSYS} ${LIBTIMERS} +LDADD+= -lsys -ltimers + +MAN= + +BINDIR?= /usr/sbin +INSTALLFLAGS+= -S 32k + +CPPFLAGS.main.c+= -I${MINIXSRCDIR} +CPPFLAGS.schedule.c+= -I${MINIXSRCDIR} +CPPFLAGS.utility.c+= -I${MINIXSRCDIR} + +.include diff --git a/servers/sched/main.c b/servers/sched/main.c new file mode 100644 index 000000000..fd9604036 --- /dev/null +++ b/servers/sched/main.c @@ -0,0 +1,119 @@ +/* This file contains the main program of the SCHED scheduler. It will sit idle + * until asked, by PM, to take over scheduling a particular process. + */ + +/* The _MAIN def indicates that we want the schedproc structs to be created + * here. Used from within schedproc.h */ +#define _MAIN + +#include "sched.h" +#include "schedproc.h" + +/* Declare some local functions. */ +FORWARD _PROTOTYPE( void reply, (endpoint_t whom, message *m_ptr) ); +FORWARD _PROTOTYPE( void sef_local_startup, (void) ); + +/*===========================================================================* + * main * + *===========================================================================*/ +PUBLIC int main(void) +{ + /* Main routine of the scheduler. */ + message m_in; /* the incoming message itself is kept here. */ + int call_nr; /* system call number */ + int who_e; /* caller's endpoint */ + int result; /* result to system call */ + int rv; + + /* SEF local startup. */ + sef_local_startup(); + + /* Initialize scheduling timers, used for running balance_queues */ + init_scheduling(); + + /* This is SCHED's main loop - get work and do it, forever and forever. */ + while (TRUE) { + int ipc_status; + + /* Wait for the next message and extract useful information from it. */ + if (sef_receive_status(ANY, &m_in, &ipc_status) != OK) + panic("SCHED sef_receive error"); + who_e = m_in.m_source; /* who sent the message */ + call_nr = m_in.m_type; /* system call number */ + + /* Check for system notifications first. Special cases. */ + if (is_ipc_notify(ipc_status)) { + switch(who_e) { + case CLOCK: + sched_expire_timers(m_in.NOTIFY_TIMESTAMP); + continue; /* don't reply */ + default : + result = ENOSYS; + } + + goto sendreply; + } + + switch(call_nr) { + case SCHEDULING_START: + result = do_start_scheduling(&m_in); + break; + case SCHEDULING_STOP: + result = do_stop_scheduling(&m_in); + break; + case SCHEDULING_SET_NICE: + result = do_nice(&m_in); + break; + case SCHEDULING_NO_QUANTUM: + /* This message was sent from the kernel, don't reply */ + if (IPC_STATUS_FLAGS_TEST(ipc_status, + IPC_FLG_MSG_FROM_KERNEL)) { + if ((rv = do_noquantum(&m_in)) != (OK)) { + printf("SCHED: Warning, do_noquantum " + "failed with %d\n", rv); + } + continue; /* Don't reply */ + } + else { + printf("SCHED: process %d faked " + "SCHEDULING_NO_QUANTUM message!\n", + who_e); + result = EPERM; + } + break; + default: + result = no_sys(who_e, call_nr); + } + +sendreply: + /* Send reply. */ + if (result != SUSPEND) { + m_in.m_type = result; /* build reply message */ + reply(who_e, &m_in); /* send it away */ + } + } + return(OK); +} + +/*===========================================================================* + * reply * + *===========================================================================*/ +PRIVATE void reply(endpoint_t who_e, message *m_ptr) +{ + int s = send(who_e, m_ptr); /* send the message */ + if (OK != s) + printf("SCHED: unable to send reply to %d: %d\n", who_e, s); +} + +/*===========================================================================* + * sef_local_startup * + *===========================================================================*/ +PRIVATE void sef_local_startup(void) +{ + /* No init callbacks for now. */ + /* No live update support for now. */ + /* No signal callbacks for now. */ + + /* Let SEF perform startup. */ + sef_startup(); +} diff --git a/servers/sched/proto.h b/servers/sched/proto.h new file mode 100644 index 000000000..8c1bb3893 --- /dev/null +++ b/servers/sched/proto.h @@ -0,0 +1,28 @@ +/* Function prototypes. */ + +struct schedproc; +#include + +/* main.c */ +_PROTOTYPE( int main, (void) ); +_PROTOTYPE( void setreply, (int proc_nr, int result) ); + +/* schedule.c */ +_PROTOTYPE( int do_noquantum, (message *m_ptr) ); +_PROTOTYPE( int do_start_scheduling, (message *m_ptr) ); +_PROTOTYPE( int do_stop_scheduling, (message *m_ptr) ); +_PROTOTYPE( int do_nice, (message *m_ptr) ); +/*_PROTOTYPE( void balance_queues, (struct timer *tp) );*/ +_PROTOTYPE( void init_scheduling, (void) ); + +/* utility.c */ +_PROTOTYPE( int no_sys, (int who_e, int call_nr) ); +_PROTOTYPE( int sched_isokendpt, (int ep, int *proc) ); +_PROTOTYPE( int sched_isemtyendpt, (int ep, int *proc) ); +_PROTOTYPE( int is_from_pm, (message *m_ptr) ); +_PROTOTYPE( int nice_to_priority, (int nice, unsigned *new_q) ); + +/* timers.c */ +_PROTOTYPE( void sched_set_timer, (timer_t *tp, int delta, + tmr_func_t watchdog, int arg) ); +_PROTOTYPE( void sched_expire_timers, (clock_t now) ); diff --git a/servers/sched/sched.h b/servers/sched/sched.h new file mode 100644 index 000000000..24cfeb36d --- /dev/null +++ b/servers/sched/sched.h @@ -0,0 +1,19 @@ +/* This is the master header for the Scheduler. It includes some other files + * and defines the principal constants. + */ +#define _POSIX_SOURCE 1 /* tell headers to include POSIX stuff */ +#define _MINIX 1 /* tell headers to include MINIX stuff */ +#define _SYSTEM 1 /* tell headers that this is the kernel */ + +/* The following are so basic, all the *.c files get them automatically. */ +#include /* MUST be first */ +#include /* MUST be second */ +#include +#include + +#include +#include + +#include + +#include "proto.h" diff --git a/servers/sched/schedproc.h b/servers/sched/schedproc.h new file mode 100644 index 000000000..d2d131a05 --- /dev/null +++ b/servers/sched/schedproc.h @@ -0,0 +1,32 @@ +/* This table has one slot per process. It contains scheduling information + * for each process. + */ +#include + +/* EXTERN should be extern except in main.c, where we want to keep the struct */ +#ifdef _MAIN +#undef EXTERN +#define EXTERN +#endif + +/** + * We might later want to add more information to this table, such as the + * process owner, process group or cpumask. + */ + +EXTERN struct schedproc { + endpoint_t endpoint; /* process endpoint id */ + endpoint_t parent; /* parent endpoint id */ + unsigned flags; /* flag bits */ + + /* Scheduling priority. */ + signed int nice; /* nice is PRIO_MIN..PRIO_MAX, standard 0. */ + + /* User space scheduling */ + unsigned max_priority; /* this process' highest allowed priority */ + unsigned priority; /* the process' current priority */ + unsigned time_slice; /* this process's time slice */ +} schedproc[NR_PROCS]; + +/* Flag values */ +#define IN_USE 0x00001 /* set when 'schedproc' slot in use */ diff --git a/servers/sched/schedule.c b/servers/sched/schedule.c new file mode 100644 index 000000000..4cfc9a266 --- /dev/null +++ b/servers/sched/schedule.c @@ -0,0 +1,253 @@ +/* This file contains the scheduling policy for SCHED + * + * The entry points are: + * do_noquantum: Called on behalf of process' that run out of quantum + * do_start_scheduling Request from PM to start scheduling a proc + * do_stop_scheduling Request from PM to stop scheduling a proc + * do_nice Request from PM to change the nice level on a proc + * init_scheduling Called from main.c to set up/prepare scheduling + */ +#include "sched.h" +#include "schedproc.h" +#include +#include +#include "kernel/proc.h" /* for queue constants */ + +PRIVATE timer_t sched_timer; +PRIVATE unsigned balance_timeout; + +#define BALANCE_TIMEOUT 5 /* how often to balance queues in seconds */ + +FORWARD _PROTOTYPE( int schedule_process, (struct schedproc * rmp) ); +FORWARD _PROTOTYPE( void balance_queues, (struct timer *tp) ); + +#define DEFAULT_USER_TIME_SLICE 8 + +/*===========================================================================* + * do_noquantum * + *===========================================================================*/ + +PUBLIC int do_noquantum(message *m_ptr) +{ + register struct schedproc *rmp; + int rv, proc_nr_n; + + if (sched_isokendpt(m_ptr->m_source, &proc_nr_n) != OK) { + printf("SCHED: WARNING: got an invalid endpoint in OOQ msg %u.\n", + m_ptr->m_source); + return EBADSRCDST; + } + + rmp = &schedproc[proc_nr_n]; + if (rmp->priority < MIN_USER_Q) { + rmp->priority += 1; /* lower priority */ + } + + if ((rv = schedule_process(rmp)) != OK) { + return rv; + } + return OK; +} + +/*===========================================================================* + * do_stop_scheduling * + *===========================================================================*/ +PUBLIC int do_stop_scheduling(message *m_ptr) +{ + register struct schedproc *rmp; + int rv, proc_nr_n; + + /* Only accept stop messages from PM */ + if (!is_from_pm(m_ptr)) + return EPERM; + + if (sched_isokendpt(m_ptr->SCHEDULING_ENDPOINT, &proc_nr_n) != OK) { + printf("SCHED: WARNING: got an invalid endpoint in OOQ msg %u.\n", + m_ptr->SCHEDULING_ENDPOINT); + return EBADSRCDST; + } + + rmp = &schedproc[proc_nr_n]; + rmp->flags = 0; /*&= ~IN_USE;*/ + + return OK; +} + +/*===========================================================================* + * do_start_scheduling * + *===========================================================================*/ +PUBLIC int do_start_scheduling(message *m_ptr) +{ + register struct schedproc *rmp; + int rv, proc_nr_n, parent_nr_n; + + /* Only accept start messages from PM */ + if (!is_from_pm(m_ptr)) + return EPERM; + + /* Resolve endpoint to proc slot. */ + if ((rv = sched_isemtyendpt(m_ptr->SCHEDULING_ENDPOINT, &proc_nr_n)) + != OK) { + return rv; + } + rmp = &schedproc[proc_nr_n]; + + /* Populate process slot */ + rmp->endpoint = m_ptr->SCHEDULING_ENDPOINT; + rmp->parent = m_ptr->SCHEDULING_PARENT; + rmp->nice = m_ptr->SCHEDULING_NICE; + + /* Find maximum priority from nice value */ + rv = nice_to_priority(rmp->nice, &rmp->max_priority); + if (rv != OK) + return rv; + + /* Inherit current priority and time slice from parent. Since there + * is currently only one scheduler scheduling the whole system, this + * value is local and we assert that the parent endpoint is valid */ + if (rmp->endpoint == rmp->parent) { + /* We have a special case here for init, which is the first + process scheduled, and the parent of itself. */ + rmp->priority = USER_Q; + rmp->time_slice = DEFAULT_USER_TIME_SLICE; + + } + else { + if ((rv = sched_isokendpt(m_ptr->SCHEDULING_PARENT, + &parent_nr_n)) != OK) + return rv; + + rmp->priority = schedproc[parent_nr_n].priority; + rmp->time_slice = schedproc[parent_nr_n].time_slice; + } + + /* Take over scheduling the process. The kernel reply message populates + * the processes current priority and its time slice */ + if ((rv = sys_schedctl(rmp->endpoint)) != OK) { + printf("Sched: Error overtaking scheduling for %d, kernel said %d\n", + rmp->endpoint, rv); + return rv; + } + rmp->flags = IN_USE; + + /* Schedule the process, giving it some quantum */ + if ((rv = schedule_process(rmp)) != OK) { + printf("Sched: Error while scheduling process, kernel replied %d\n", + rv); + return rv; + } + + /* Mark ourselves as the new scheduler. + * By default, processes are scheduled by the parents scheduler. In case + * this scheduler would want to delegate scheduling to another + * scheduler, it could do so and then write the endpoint of that + * scheduler into SCHEDULING_SCHEDULER + */ + + m_ptr->SCHEDULING_SCHEDULER = SCHED_PROC_NR; + + return OK; +} + +/*===========================================================================* + * do_nice * + *===========================================================================*/ +PUBLIC int do_nice(message *m_ptr) +{ + struct schedproc *rmp; + int rv; + int proc_nr_n; + int nice; + unsigned new_q, old_q, old_max_q; + int old_nice; + + /* Only accept nice messages from PM */ + if (!is_from_pm(m_ptr)) + return EPERM; + + if (sched_isokendpt(m_ptr->SCHEDULING_ENDPOINT, &proc_nr_n) != OK) { + printf("SCHED: WARNING: got an invalid endpoint in OOQ msg %u.\n", + m_ptr->SCHEDULING_ENDPOINT); + return EBADSRCDST; + } + + rmp = &schedproc[proc_nr_n]; + nice = m_ptr->SCHEDULING_NICE; + + if ((rv = nice_to_priority(nice, &new_q)) != OK) + return rv; + + /* Store old values, in case we need to roll back the changes */ + old_q = rmp->priority; + old_max_q = rmp->max_priority; + old_nice = rmp->nice; + + /* Update the proc entry and reschedule the process */ + rmp->max_priority = rmp->priority = new_q; + rmp->nice = nice; + + if ((rv = schedule_process(rmp)) != OK) { + /* Something went wrong when rescheduling the process, roll + * back the changes to proc struct */ + rmp->priority = old_q; + rmp->max_priority = old_max_q; + rmp->nice = old_nice; + } + + return rv; +} + +/*===========================================================================* + * schedule_process * + *===========================================================================*/ +PRIVATE int schedule_process(struct schedproc * rmp) +{ + int rv; + + if ((rv = sys_schedule(rmp->endpoint, rmp->priority, + rmp->time_slice)) != OK) { + printf("SCHED: An error occurred when trying to schedule %d: %d\n", + rmp->endpoint, rv); + } + + return rv; +} + + +/*===========================================================================* + * start_scheduling * + *===========================================================================*/ + +PUBLIC void init_scheduling(void) +{ + balance_timeout = BALANCE_TIMEOUT * sys_hz(); + tmr_inittimer(&sched_timer); + sched_set_timer(&sched_timer, balance_timeout, balance_queues, 0); +} + +/*===========================================================================* + * balance_queues * + *===========================================================================*/ + +/* This function in called every 100 ticks to rebalance the queues. The current + * scheduler bumps processes down one priority when ever they run out of + * quantum. This function will find all proccesses that have been bumped down, + * and pulls them back up. This default policy will soon be changed. + */ +PRIVATE void balance_queues(struct timer *tp) +{ + struct schedproc *rmp; + int proc_nr; + int rv; + + for (proc_nr=0, rmp=schedproc; proc_nr < NR_PROCS; proc_nr++, rmp++) { + if (rmp->flags & IN_USE) { + if (rmp->priority > rmp->max_priority) { + rmp->priority -= 1; /* increase priority */ + schedule_process(rmp); + } + } + } + + sched_set_timer(&sched_timer, balance_timeout, balance_queues, 0); +} diff --git a/servers/sched/timers.c b/servers/sched/timers.c new file mode 100644 index 000000000..39efb9b0a --- /dev/null +++ b/servers/sched/timers.c @@ -0,0 +1,62 @@ +/* SCHED watchdog timer management, based on servers/pm/timers.c. + * + * The entry points into this file are: + * sched_set_timer: reset and existing or set a new watchdog timer + * sched_expire_timers: check for expired timers and run watchdog functions + * + */ + +#include "sched.h" + +#include +#include +#include + +PRIVATE timer_t *sched_timers = NULL; +PRIVATE int sched_expiring = 0; + +/*===========================================================================* + * pm_set_timer * + *===========================================================================*/ +PUBLIC void sched_set_timer(timer_t *tp, int ticks, tmr_func_t watchdog, int arg) +{ + int r; + clock_t now, prev_time = 0, next_time; + + if ((r = getuptime(&now)) != OK) + panic("SCHED couldn't get uptime"); + + /* Set timer argument and add timer to the list. */ + tmr_arg(tp)->ta_int = arg; + prev_time = tmrs_settimer(&sched_timers,tp,now+ticks,watchdog,&next_time); + + /* Reschedule our synchronous alarm if necessary. */ + if (sched_expiring == 0 && (! prev_time || prev_time > next_time)) { + if (sys_setalarm(next_time, 1) != OK) + panic("SCHED set timer couldn't set alarm"); + } + + return; +} + +/*===========================================================================* + * sched_expire_timers * + *===========================================================================*/ +PUBLIC void sched_expire_timers(clock_t now) +{ + clock_t next_time; + + /* Check for expired timers. Use a global variable to indicate that + * watchdog functions are called, so that sys_setalarm() isn't called + * more often than necessary when sched_set_timer is + * called from these watchdog functions. */ + sched_expiring = 1; + tmrs_exptimers(&sched_timers, now, &next_time); + sched_expiring = 0; + + /* Reschedule an alarm if necessary. */ + if (next_time > 0) { + if (sys_setalarm(next_time, 1) != OK) + panic("SCHED expire timer couldn't set alarm"); + } +} diff --git a/servers/sched/utility.c b/servers/sched/utility.c new file mode 100644 index 000000000..58615a9e9 --- /dev/null +++ b/servers/sched/utility.c @@ -0,0 +1,84 @@ +/* This file contains some utility routines for SCHED. + * + * The entry points are: + * no_sys: called for invalid system call numbers + * sched_isokendpt: check the validity of an endpoint + * sched_isemtyendpt check for validity and availability of endpoint slot + * is_from_pm check whether message is originated from PM + * nice_to_priority convert nice level to priority queue + */ + +#include "sched.h" +#include +#include /* for PRIO_MAX & PRIO_MIN */ +#include "kernel/proc.h" /* for queue constants */ +#include "schedproc.h" + +/*===========================================================================* + * no_sys * + *===========================================================================*/ +PUBLIC int no_sys(int who_e, int call_nr) +{ +/* A system call number not implemented by PM has been requested. */ + printf("SCHED: in no_sys, call nr %d from %d\n", call_nr, who_e); + return(ENOSYS); +} + + +/*===========================================================================* + * sched_isokendpt * + *===========================================================================*/ +PUBLIC int sched_isokendpt(int endpoint, int *proc) +{ + *proc = _ENDPOINT_P(endpoint); + if (*proc < 0) + return (EBADSRCDST); /* Don't schedule tasks */ + if(*proc >= NR_PROCS) + return (EINVAL); + if(endpoint != schedproc[*proc].endpoint) + return (EDEADSRCDST); + if(!(schedproc[*proc].flags & IN_USE)) + return (EDEADSRCDST); + return (OK); +} + +/*===========================================================================* + * sched_isemtyendpt * + *===========================================================================*/ +PUBLIC int sched_isemtyendpt(int endpoint, int *proc) +{ + *proc = _ENDPOINT_P(endpoint); + if (*proc < 0) + return (EBADSRCDST); /* Don't schedule tasks */ + if(*proc >= NR_PROCS) + return (EINVAL); + if(schedproc[*proc].flags & IN_USE) + return (EDEADSRCDST); + return (OK); +} + +/*===========================================================================* + * is_from_pm * + *===========================================================================*/ +PUBLIC int is_from_pm(message *m_ptr) +{ + if (m_ptr->m_source == PM_PROC_NR) { + return 1; + } + return 0; +} + +/*===========================================================================* + * nice_to_priority * + *===========================================================================*/ +PUBLIC int nice_to_priority(int nice, unsigned* new_q) +{ + if (nice < PRIO_MIN || nice > PRIO_MAX) return(EINVAL); + + *new_q = MAX_USER_Q + (nice-PRIO_MIN) * (MIN_USER_Q-MAX_USER_Q+1) / + (PRIO_MAX-PRIO_MIN+1); + if (*new_q < MAX_USER_Q) *new_q = MAX_USER_Q; /* shouldn't happen */ + if (*new_q > MIN_USER_Q) *new_q = MIN_USER_Q; /* shouldn't happen */ + + return (OK); +} diff --git a/tools/Makefile b/tools/Makefile index 20ab2755e..2f770e00c 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -10,6 +10,7 @@ PROGRAMS= ../kernel/kernel \ ../servers/ds/ds \ ../servers/rs/rs \ ../servers/pm/pm \ + ../servers/sched/sched \ ../servers/vfs/vfs \ ../drivers/memory/memory_driver/memory \ ../drivers/log/log \